Improve ascii handling, improve performance

2022-05-24 21:13:43 +02:00
parent 9f44196e6c
commit 17ea7bc60c
3 changed files with 26 additions and 6 deletions
--- a/crates/u-norm/Cargo.toml
+++ b/crates/u-norm/Cargo.toml
@@ -11,7 +11,7 @@ name = "bench"
 harness = false
 [dependencies]
-fst = "0.4.7"
+# fst = "0.4.7"
 tinyvec = { version = "1.6.0", features = ["alloc"] }
 u-fst = { path = "../u-fst" }
@@ -22,4 +22,5 @@ u-fst = { path = "../u-fst" }
 criterion = "0.3.5"
 proptest = "1.0.0"
 similar-asserts = "1.2.0"
 unic-normal = "0.9.0"
 unicode-normalization = "0.1.19"
--- a/crates/u-norm/benches/bench.rs
+++ b/crates/u-norm/benches/bench.rs
@@ -3,6 +3,7 @@ use std::fs;
 use criterion::{criterion_group, criterion_main, Criterion};
 use u_norm::nfd;
 use unic_normal::StrNormalForm;
 use unicode_normalization::UnicodeNormalization;
 const ASCII: &str = "all types of normalized";
@@ -10,8 +11,13 @@ const ASCII: &str = "all types of normalized";
 fn criterion_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("ASCII");
-    group.bench_function("unf", |b| b.iter(|| nfd(ASCII).count()));
+    group.bench_function("u-norm", |b| b.iter(|| nfd(ASCII).count()));
-    group.bench_function("unicode-normalization", |b| b.iter(|| ASCII.nfd().count()));
+    group.bench_function("unicode-normalization", |b| {
        b.iter(|| UnicodeNormalization::nfd(ASCII).count())
    });
    group.bench_function("unic-normal", |b| {
        b.iter(|| StrNormalForm::nfd(ASCII).count())
    });
    group.finish();
@@ -19,8 +25,13 @@ fn criterion_benchmark(c: &mut Criterion) {
    let mut group = c.benchmark_group("Long");
-    group.bench_function("unf", |b| b.iter(|| nfd(&long).count()));
+    group.bench_function("u-norm", |b| b.iter(|| nfd(&long).count()));
-    group.bench_function("unicode-normalization", |b| b.iter(|| long.nfd().count()));
+    group.bench_function("unicode-normalization", |b| {
        b.iter(|| UnicodeNormalization::nfd(long.as_str()).count())
    });
    group.bench_function("unic-normal", |b| {
        b.iter(|| StrNormalForm::nfd(long.as_str()).count())
    });
    group.finish();
 }
--- a/crates/u-norm/src/lib.rs
+++ b/crates/u-norm/src/lib.rs
@@ -26,6 +26,14 @@ impl Buffer {
        }
    }
    #[inline(always)]
    fn push_zero(&mut self, ch: char) {
        self.sort_pending();
        self.buffer.push((0, ch));
        self.ready.end = self.buffer.len();
    }
    #[inline(always)]
    fn push_back(&mut self, ch: char) {
        let class = table::lookup(ch).combining_class();
@@ -125,7 +133,7 @@ const S_COUNT: u32 = L_COUNT * N_COUNT;
 fn decompose(c: char, buffer: &mut Buffer) {
    // 7-bit ASCII never decomposes
    if c <= '\x7f' {
-        buffer.push_back(c);
+        buffer.push_zero(c);
        return;
    }