Better packing for u-sort

This commit is contained in:
2022-05-27 07:29:28 +02:00
parent cb74158d5d
commit e9935c8b54
7 changed files with 367 additions and 241 deletions

View File

@@ -0,0 +1,84 @@
use std::ops::Range;
use tinyvec::TinyVec;
use crate::table;
pub(crate) struct Buffer {
buffer: TinyVec<[(u8, char); 4]>,
ready: Range<usize>,
}
impl Buffer {
pub(crate) fn new() -> Self {
Self {
buffer: TinyVec::new(),
ready: 0..0,
}
}
#[inline(always)]
pub(crate) fn is_ready(&self) -> bool {
self.ready.end != 0
}
#[inline(always)]
pub(crate) fn is_empty(&self) -> bool {
self.buffer.is_empty()
}
#[inline(always)]
pub(crate) fn finish(&mut self) {
self.sort_pending();
self.ready.end = self.buffer.len();
}
#[inline(always)]
pub(crate) fn push(&mut self, ch: char, class: u8) {
if class == 0 {
self.sort_pending();
self.buffer.push((class, ch));
self.ready.end = self.buffer.len();
} else {
self.buffer.push((class, ch));
}
}
#[inline(always)]
pub(crate) fn push_back(&mut self, ch: char) {
self.push(ch, table::lookup(ch).combining_class());
}
#[inline(always)]
pub(crate) fn pop(&mut self) -> char {
let (_, ch) = self.buffer[self.ready.start];
let next = self.ready.start + 1;
if next == self.ready.end {
self.reset();
} else {
self.ready.start = next;
}
ch
}
#[inline(always)]
fn sort_pending(&mut self) {
self.buffer[self.ready.end..].sort_by_key(|k| k.0);
}
#[inline(always)]
fn reset(&mut self) {
let pending = self.buffer.len() - self.ready.end;
for i in 0..pending {
self.buffer[i] = self.buffer[i + self.ready.end];
}
self.buffer.truncate(pending);
self.ready = 0..0;
}
}

View File

@@ -1,11 +1,11 @@
use std::iter::Fuse;
use std::ops::Range;
use std::str::Chars;
use tinyvec::TinyVec;
mod buffer;
pub mod table;
use buffer::Buffer;
pub fn nfd(s: &str) -> Decompositions<Chars<'_>> {
Decompositions {
iter: s.chars().fuse(),
@@ -13,65 +13,6 @@ pub fn nfd(s: &str) -> Decompositions<Chars<'_>> {
}
}
struct Buffer {
buffer: TinyVec<[(u8, char); 4]>,
ready: Range<usize>,
}
impl Buffer {
fn new() -> Self {
Self {
buffer: TinyVec::new(),
ready: 0..0,
}
}
#[inline(always)]
fn push(&mut self, ch: char, class: u8) {
if class == 0 {
self.sort_pending();
self.buffer.push((class, ch));
self.ready.end = self.buffer.len();
} else {
self.buffer.push((class, ch));
}
}
#[inline(always)]
fn push_back(&mut self, ch: char) {
self.push(ch, table::lookup(ch).combining_class());
}
#[inline(always)]
fn sort_pending(&mut self) {
self.buffer[self.ready.end..].sort_by_key(|k| k.0);
}
#[inline(always)]
fn reset(&mut self) {
let pending = self.buffer.len() - self.ready.end;
for i in 0..pending {
self.buffer[i] = self.buffer[i + self.ready.end];
}
self.buffer.truncate(pending);
self.ready = 0..0;
}
#[inline(always)]
fn increment_next_ready(&mut self) {
let next = self.ready.start + 1;
if next == self.ready.end {
self.reset();
} else {
self.ready.start = next;
}
}
}
pub struct Decompositions<I> {
iter: Fuse<I>,
buffer: Buffer,
@@ -82,17 +23,16 @@ impl<I: Iterator<Item = char>> Iterator for Decompositions<I> {
#[inline(always)]
fn next(&mut self) -> Option<Self::Item> {
while self.buffer.ready.end == 0 {
while !self.buffer.is_ready() {
match self.iter.next() {
Some(ch) => {
decompose(ch, &mut self.buffer);
}
None => {
if self.buffer.buffer.is_empty() {
if self.buffer.is_empty() {
return None;
} else {
self.buffer.sort_pending();
self.buffer.ready.end = self.buffer.buffer.len();
self.buffer.finish();
break;
}
@@ -100,11 +40,7 @@ impl<I: Iterator<Item = char>> Iterator for Decompositions<I> {
}
}
let (_, ch) = self.buffer.buffer[self.buffer.ready.start];
self.buffer.increment_next_ready();
Some(ch)
Some(self.buffer.pop())
}
fn size_hint(&self) -> (usize, Option<usize>) {