Better, harder, faster!

This commit is contained in:
2018-01-12 13:23:24 +01:00
parent 70fda1c649
commit 0c8a017152
2 changed files with 15 additions and 13 deletions

View File

@@ -8,10 +8,12 @@ use criterion::Criterion;
use byte_ngram::*; use byte_ngram::*;
fn criterion_benchmark(c: &mut Criterion) { fn criterion_benchmark(c: &mut Criterion) {
let data = b"Blackmail is such an ugly word. I prefer extortion. The 'x' makes it sound cool."; let data = "Blackmail is such an ugly word. I prefer extortion. The 'x' makes it sound cool.";
c.bench_function("from_slice", |b| b.iter(|| for _ in from_slice(&data[..]) {})); let len = from_slice(&data[..]).len();
c.bench_function("ByteNgramReader", |b| b.iter(|| for _ in ByteNgramReader::new(&data[..]) {}));
c.bench_function("from_slice", |b| b.iter(|| assert_eq!(len, from_slice(&data[..]).len())));
c.bench_function("ByteNgramReader", |b| b.iter(|| assert_eq!(len, ByteNgramReader::new(&data).count())));
} }
criterion_group!(benches, criterion_benchmark); criterion_group!(benches, criterion_benchmark);

View File

@@ -1,11 +1,11 @@
#[macro_use] #[macro_use]
extern crate serde_derive; extern crate serde_derive;
use std::io::{Bytes, Read};
use std::fmt; use std::fmt;
use std::cmp; use std::cmp;
use std::ops; use std::ops;
use std::convert; use std::convert;
use std::slice::Iter;
#[derive(Hash, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Serialize, Deserialize)] #[derive(Hash, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Serialize, Deserialize)]
pub struct ByteNgram(u64); pub struct ByteNgram(u64);
@@ -90,18 +90,18 @@ impl convert::From<u64> for ByteNgram {
} }
} }
pub struct ByteNgramReader<R: Read> { pub struct ByteNgramReader<'a> {
inner: Bytes<R>, inner: Iter<'a, u8>,
count: u64, count: u64,
token: u64, token: u64,
mask: u64, mask: u64,
} }
impl<R: Read> ByteNgramReader<R> { impl<'a> ByteNgramReader<'a> {
#[inline] #[inline]
pub fn new(inner: R) -> Self { pub fn new<T: 'a>(inner: &'a T) -> Self where T: AsRef<[u8]> {
ByteNgramReader { ByteNgramReader {
inner: inner.bytes(), inner: inner.as_ref().iter(),
count: 0, count: 0,
token: 0, token: 0,
mask: 0, mask: 0,
@@ -109,14 +109,14 @@ impl<R: Read> ByteNgramReader<R> {
} }
} }
impl<R: Read> Iterator for ByteNgramReader<R> { impl<'a> Iterator for ByteNgramReader<'a> {
type Item = ByteNgram; type Item = ByteNgram;
#[inline] #[inline]
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if self.mask == 0 { if self.mask == 0 {
if let Some(Ok(byte)) = self.inner.next() { if let Some(byte) = self.inner.next() {
self.token += u64::from(byte); self.token += u64::from(*byte);
self.token <<= 8; self.token <<= 8;
if self.count < 6 { if self.count < 6 {
@@ -253,7 +253,7 @@ mod tests {
let data = b"abc"; let data = b"abc";
let mut a = from_slice(&data); let mut a = from_slice(&data);
let mut b = ByteNgramReader::new(&data[..]).collect::<Vec<_>>(); let mut b = ByteNgramReader::new(&data).collect::<Vec<_>>();
assert_eq!(a.len(), b.len()); assert_eq!(a.len(), b.len());