Better, harder, faster!
This commit is contained in:
@@ -8,10 +8,12 @@ use criterion::Criterion;
|
|||||||
use byte_ngram::*;
|
use byte_ngram::*;
|
||||||
|
|
||||||
fn criterion_benchmark(c: &mut Criterion) {
|
fn criterion_benchmark(c: &mut Criterion) {
|
||||||
let data = b"Blackmail is such an ugly word. I prefer extortion. The 'x' makes it sound cool.";
|
let data = "Blackmail is such an ugly word. I prefer extortion. The 'x' makes it sound cool.";
|
||||||
|
|
||||||
c.bench_function("from_slice", |b| b.iter(|| for _ in from_slice(&data[..]) {}));
|
let len = from_slice(&data[..]).len();
|
||||||
c.bench_function("ByteNgramReader", |b| b.iter(|| for _ in ByteNgramReader::new(&data[..]) {}));
|
|
||||||
|
c.bench_function("from_slice", |b| b.iter(|| assert_eq!(len, from_slice(&data[..]).len())));
|
||||||
|
c.bench_function("ByteNgramReader", |b| b.iter(|| assert_eq!(len, ByteNgramReader::new(&data).count())));
|
||||||
}
|
}
|
||||||
|
|
||||||
criterion_group!(benches, criterion_benchmark);
|
criterion_group!(benches, criterion_benchmark);
|
||||||
|
|||||||
20
src/lib.rs
20
src/lib.rs
@@ -1,11 +1,11 @@
|
|||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate serde_derive;
|
extern crate serde_derive;
|
||||||
|
|
||||||
use std::io::{Bytes, Read};
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::cmp;
|
use std::cmp;
|
||||||
use std::ops;
|
use std::ops;
|
||||||
use std::convert;
|
use std::convert;
|
||||||
|
use std::slice::Iter;
|
||||||
|
|
||||||
#[derive(Hash, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Serialize, Deserialize)]
|
#[derive(Hash, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Serialize, Deserialize)]
|
||||||
pub struct ByteNgram(u64);
|
pub struct ByteNgram(u64);
|
||||||
@@ -90,18 +90,18 @@ impl convert::From<u64> for ByteNgram {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ByteNgramReader<R: Read> {
|
pub struct ByteNgramReader<'a> {
|
||||||
inner: Bytes<R>,
|
inner: Iter<'a, u8>,
|
||||||
count: u64,
|
count: u64,
|
||||||
token: u64,
|
token: u64,
|
||||||
mask: u64,
|
mask: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R: Read> ByteNgramReader<R> {
|
impl<'a> ByteNgramReader<'a> {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new(inner: R) -> Self {
|
pub fn new<T: 'a>(inner: &'a T) -> Self where T: AsRef<[u8]> {
|
||||||
ByteNgramReader {
|
ByteNgramReader {
|
||||||
inner: inner.bytes(),
|
inner: inner.as_ref().iter(),
|
||||||
count: 0,
|
count: 0,
|
||||||
token: 0,
|
token: 0,
|
||||||
mask: 0,
|
mask: 0,
|
||||||
@@ -109,14 +109,14 @@ impl<R: Read> ByteNgramReader<R> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R: Read> Iterator for ByteNgramReader<R> {
|
impl<'a> Iterator for ByteNgramReader<'a> {
|
||||||
type Item = ByteNgram;
|
type Item = ByteNgram;
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
if self.mask == 0 {
|
if self.mask == 0 {
|
||||||
if let Some(Ok(byte)) = self.inner.next() {
|
if let Some(byte) = self.inner.next() {
|
||||||
self.token += u64::from(byte);
|
self.token += u64::from(*byte);
|
||||||
self.token <<= 8;
|
self.token <<= 8;
|
||||||
|
|
||||||
if self.count < 6 {
|
if self.count < 6 {
|
||||||
@@ -253,7 +253,7 @@ mod tests {
|
|||||||
let data = b"abc";
|
let data = b"abc";
|
||||||
|
|
||||||
let mut a = from_slice(&data);
|
let mut a = from_slice(&data);
|
||||||
let mut b = ByteNgramReader::new(&data[..]).collect::<Vec<_>>();
|
let mut b = ByteNgramReader::new(&data).collect::<Vec<_>>();
|
||||||
|
|
||||||
assert_eq!(a.len(), b.len());
|
assert_eq!(a.len(), b.len());
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user