Remove index from reader.
This commit is contained in:
15
src/lib.rs
15
src/lib.rs
@@ -94,7 +94,6 @@ pub struct ByteNgramReader<R: Read> {
|
||||
inner: Bytes<R>,
|
||||
token: u64,
|
||||
count: u64,
|
||||
index: u64,
|
||||
mask: u64,
|
||||
}
|
||||
|
||||
@@ -104,7 +103,6 @@ impl<R: Read> ByteNgramReader<R> {
|
||||
inner: inner.bytes(),
|
||||
token: 0,
|
||||
count: 0,
|
||||
index: 0,
|
||||
mask: 0,
|
||||
}
|
||||
}
|
||||
@@ -114,7 +112,7 @@ impl<R: Read> Iterator for ByteNgramReader<R> {
|
||||
type Item = ByteNgram;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.index == 0 {
|
||||
if self.mask == 0 {
|
||||
if let Some(Ok(byte)) = self.inner.next() {
|
||||
self.token += u64::from(byte);
|
||||
self.token <<= 8;
|
||||
@@ -122,21 +120,18 @@ impl<R: Read> Iterator for ByteNgramReader<R> {
|
||||
if self.count < 6 {
|
||||
self.count += 1;
|
||||
|
||||
self.index = self.count;
|
||||
self.mask = (256u64.pow(self.count as u32) - 1) << 8;
|
||||
self.mask = ((256u64.pow(self.count as u32) - 1) << 8) + self.count;
|
||||
} else {
|
||||
self.index = 7;
|
||||
self.mask = 0xFFFF_FFFF_FFFF_FF00;
|
||||
self.mask = 0xFFFF_FFFF_FFFF_FF07;
|
||||
}
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
let token = ByteNgram((self.token & self.mask & !0xFF) + self.index);
|
||||
let token = ByteNgram((self.token & self.mask & !0xFF) + (self.mask & 0xFF));
|
||||
|
||||
self.index -= 1;
|
||||
self.mask >>= 8;
|
||||
self.mask = ((self.mask >> 8) & !0xFF) + (self.mask & 0xFF) - 1;
|
||||
|
||||
Some(token)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user