Better packing for u-sort
This commit is contained in:
84
crates/u-norm/src/buffer.rs
Normal file
84
crates/u-norm/src/buffer.rs
Normal file
@@ -0,0 +1,84 @@
|
||||
use std::ops::Range;
|
||||
|
||||
use tinyvec::TinyVec;
|
||||
|
||||
use crate::table;
|
||||
|
||||
pub(crate) struct Buffer {
|
||||
buffer: TinyVec<[(u8, char); 4]>,
|
||||
ready: Range<usize>,
|
||||
}
|
||||
|
||||
impl Buffer {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
buffer: TinyVec::new(),
|
||||
ready: 0..0,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn is_ready(&self) -> bool {
|
||||
self.ready.end != 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn is_empty(&self) -> bool {
|
||||
self.buffer.is_empty()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn finish(&mut self) {
|
||||
self.sort_pending();
|
||||
self.ready.end = self.buffer.len();
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn push(&mut self, ch: char, class: u8) {
|
||||
if class == 0 {
|
||||
self.sort_pending();
|
||||
|
||||
self.buffer.push((class, ch));
|
||||
self.ready.end = self.buffer.len();
|
||||
} else {
|
||||
self.buffer.push((class, ch));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn push_back(&mut self, ch: char) {
|
||||
self.push(ch, table::lookup(ch).combining_class());
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn pop(&mut self) -> char {
|
||||
let (_, ch) = self.buffer[self.ready.start];
|
||||
|
||||
let next = self.ready.start + 1;
|
||||
|
||||
if next == self.ready.end {
|
||||
self.reset();
|
||||
} else {
|
||||
self.ready.start = next;
|
||||
}
|
||||
|
||||
ch
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn sort_pending(&mut self) {
|
||||
self.buffer[self.ready.end..].sort_by_key(|k| k.0);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn reset(&mut self) {
|
||||
let pending = self.buffer.len() - self.ready.end;
|
||||
|
||||
for i in 0..pending {
|
||||
self.buffer[i] = self.buffer[i + self.ready.end];
|
||||
}
|
||||
|
||||
self.buffer.truncate(pending);
|
||||
self.ready = 0..0;
|
||||
}
|
||||
}
|
||||
@@ -1,11 +1,11 @@
|
||||
use std::iter::Fuse;
|
||||
use std::ops::Range;
|
||||
use std::str::Chars;
|
||||
|
||||
use tinyvec::TinyVec;
|
||||
|
||||
mod buffer;
|
||||
pub mod table;
|
||||
|
||||
use buffer::Buffer;
|
||||
|
||||
pub fn nfd(s: &str) -> Decompositions<Chars<'_>> {
|
||||
Decompositions {
|
||||
iter: s.chars().fuse(),
|
||||
@@ -13,65 +13,6 @@ pub fn nfd(s: &str) -> Decompositions<Chars<'_>> {
|
||||
}
|
||||
}
|
||||
|
||||
struct Buffer {
|
||||
buffer: TinyVec<[(u8, char); 4]>,
|
||||
ready: Range<usize>,
|
||||
}
|
||||
|
||||
impl Buffer {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
buffer: TinyVec::new(),
|
||||
ready: 0..0,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn push(&mut self, ch: char, class: u8) {
|
||||
if class == 0 {
|
||||
self.sort_pending();
|
||||
|
||||
self.buffer.push((class, ch));
|
||||
self.ready.end = self.buffer.len();
|
||||
} else {
|
||||
self.buffer.push((class, ch));
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn push_back(&mut self, ch: char) {
|
||||
self.push(ch, table::lookup(ch).combining_class());
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn sort_pending(&mut self) {
|
||||
self.buffer[self.ready.end..].sort_by_key(|k| k.0);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn reset(&mut self) {
|
||||
let pending = self.buffer.len() - self.ready.end;
|
||||
|
||||
for i in 0..pending {
|
||||
self.buffer[i] = self.buffer[i + self.ready.end];
|
||||
}
|
||||
|
||||
self.buffer.truncate(pending);
|
||||
self.ready = 0..0;
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn increment_next_ready(&mut self) {
|
||||
let next = self.ready.start + 1;
|
||||
|
||||
if next == self.ready.end {
|
||||
self.reset();
|
||||
} else {
|
||||
self.ready.start = next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Decompositions<I> {
|
||||
iter: Fuse<I>,
|
||||
buffer: Buffer,
|
||||
@@ -82,17 +23,16 @@ impl<I: Iterator<Item = char>> Iterator for Decompositions<I> {
|
||||
|
||||
#[inline(always)]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
while self.buffer.ready.end == 0 {
|
||||
while !self.buffer.is_ready() {
|
||||
match self.iter.next() {
|
||||
Some(ch) => {
|
||||
decompose(ch, &mut self.buffer);
|
||||
}
|
||||
None => {
|
||||
if self.buffer.buffer.is_empty() {
|
||||
if self.buffer.is_empty() {
|
||||
return None;
|
||||
} else {
|
||||
self.buffer.sort_pending();
|
||||
self.buffer.ready.end = self.buffer.buffer.len();
|
||||
self.buffer.finish();
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -100,11 +40,7 @@ impl<I: Iterator<Item = char>> Iterator for Decompositions<I> {
|
||||
}
|
||||
}
|
||||
|
||||
let (_, ch) = self.buffer.buffer[self.buffer.ready.start];
|
||||
|
||||
self.buffer.increment_next_ready();
|
||||
|
||||
Some(ch)
|
||||
Some(self.buffer.pop())
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
|
||||
Reference in New Issue
Block a user