Improve ascii handling, improve performance
This commit is contained in:
@@ -11,7 +11,7 @@ name = "bench"
|
||||
harness = false
|
||||
|
||||
[dependencies]
|
||||
fst = "0.4.7"
|
||||
# fst = "0.4.7"
|
||||
tinyvec = { version = "1.6.0", features = ["alloc"] }
|
||||
u-fst = { path = "../u-fst" }
|
||||
|
||||
@@ -22,4 +22,5 @@ u-fst = { path = "../u-fst" }
|
||||
criterion = "0.3.5"
|
||||
proptest = "1.0.0"
|
||||
similar-asserts = "1.2.0"
|
||||
unic-normal = "0.9.0"
|
||||
unicode-normalization = "0.1.19"
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::fs;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
|
||||
use u_norm::nfd;
|
||||
use unic_normal::StrNormalForm;
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
const ASCII: &str = "all types of normalized";
|
||||
@@ -10,8 +11,13 @@ const ASCII: &str = "all types of normalized";
|
||||
fn criterion_benchmark(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("ASCII");
|
||||
|
||||
group.bench_function("unf", |b| b.iter(|| nfd(ASCII).count()));
|
||||
group.bench_function("unicode-normalization", |b| b.iter(|| ASCII.nfd().count()));
|
||||
group.bench_function("u-norm", |b| b.iter(|| nfd(ASCII).count()));
|
||||
group.bench_function("unicode-normalization", |b| {
|
||||
b.iter(|| UnicodeNormalization::nfd(ASCII).count())
|
||||
});
|
||||
group.bench_function("unic-normal", |b| {
|
||||
b.iter(|| StrNormalForm::nfd(ASCII).count())
|
||||
});
|
||||
|
||||
group.finish();
|
||||
|
||||
@@ -19,8 +25,13 @@ fn criterion_benchmark(c: &mut Criterion) {
|
||||
|
||||
let mut group = c.benchmark_group("Long");
|
||||
|
||||
group.bench_function("unf", |b| b.iter(|| nfd(&long).count()));
|
||||
group.bench_function("unicode-normalization", |b| b.iter(|| long.nfd().count()));
|
||||
group.bench_function("u-norm", |b| b.iter(|| nfd(&long).count()));
|
||||
group.bench_function("unicode-normalization", |b| {
|
||||
b.iter(|| UnicodeNormalization::nfd(long.as_str()).count())
|
||||
});
|
||||
group.bench_function("unic-normal", |b| {
|
||||
b.iter(|| StrNormalForm::nfd(long.as_str()).count())
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
@@ -26,6 +26,14 @@ impl Buffer {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn push_zero(&mut self, ch: char) {
|
||||
self.sort_pending();
|
||||
|
||||
self.buffer.push((0, ch));
|
||||
self.ready.end = self.buffer.len();
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn push_back(&mut self, ch: char) {
|
||||
let class = table::lookup(ch).combining_class();
|
||||
@@ -125,7 +133,7 @@ const S_COUNT: u32 = L_COUNT * N_COUNT;
|
||||
fn decompose(c: char, buffer: &mut Buffer) {
|
||||
// 7-bit ASCII never decomposes
|
||||
if c <= '\x7f' {
|
||||
buffer.push_back(c);
|
||||
buffer.push_zero(c);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user