Improve ascii handling, improve performance
This commit is contained in:
@@ -11,7 +11,7 @@ name = "bench"
|
|||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
fst = "0.4.7"
|
# fst = "0.4.7"
|
||||||
tinyvec = { version = "1.6.0", features = ["alloc"] }
|
tinyvec = { version = "1.6.0", features = ["alloc"] }
|
||||||
u-fst = { path = "../u-fst" }
|
u-fst = { path = "../u-fst" }
|
||||||
|
|
||||||
@@ -22,4 +22,5 @@ u-fst = { path = "../u-fst" }
|
|||||||
criterion = "0.3.5"
|
criterion = "0.3.5"
|
||||||
proptest = "1.0.0"
|
proptest = "1.0.0"
|
||||||
similar-asserts = "1.2.0"
|
similar-asserts = "1.2.0"
|
||||||
|
unic-normal = "0.9.0"
|
||||||
unicode-normalization = "0.1.19"
|
unicode-normalization = "0.1.19"
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ use std::fs;
|
|||||||
use criterion::{criterion_group, criterion_main, Criterion};
|
use criterion::{criterion_group, criterion_main, Criterion};
|
||||||
|
|
||||||
use u_norm::nfd;
|
use u_norm::nfd;
|
||||||
|
use unic_normal::StrNormalForm;
|
||||||
use unicode_normalization::UnicodeNormalization;
|
use unicode_normalization::UnicodeNormalization;
|
||||||
|
|
||||||
const ASCII: &str = "all types of normalized";
|
const ASCII: &str = "all types of normalized";
|
||||||
@@ -10,8 +11,13 @@ const ASCII: &str = "all types of normalized";
|
|||||||
fn criterion_benchmark(c: &mut Criterion) {
|
fn criterion_benchmark(c: &mut Criterion) {
|
||||||
let mut group = c.benchmark_group("ASCII");
|
let mut group = c.benchmark_group("ASCII");
|
||||||
|
|
||||||
group.bench_function("unf", |b| b.iter(|| nfd(ASCII).count()));
|
group.bench_function("u-norm", |b| b.iter(|| nfd(ASCII).count()));
|
||||||
group.bench_function("unicode-normalization", |b| b.iter(|| ASCII.nfd().count()));
|
group.bench_function("unicode-normalization", |b| {
|
||||||
|
b.iter(|| UnicodeNormalization::nfd(ASCII).count())
|
||||||
|
});
|
||||||
|
group.bench_function("unic-normal", |b| {
|
||||||
|
b.iter(|| StrNormalForm::nfd(ASCII).count())
|
||||||
|
});
|
||||||
|
|
||||||
group.finish();
|
group.finish();
|
||||||
|
|
||||||
@@ -19,8 +25,13 @@ fn criterion_benchmark(c: &mut Criterion) {
|
|||||||
|
|
||||||
let mut group = c.benchmark_group("Long");
|
let mut group = c.benchmark_group("Long");
|
||||||
|
|
||||||
group.bench_function("unf", |b| b.iter(|| nfd(&long).count()));
|
group.bench_function("u-norm", |b| b.iter(|| nfd(&long).count()));
|
||||||
group.bench_function("unicode-normalization", |b| b.iter(|| long.nfd().count()));
|
group.bench_function("unicode-normalization", |b| {
|
||||||
|
b.iter(|| UnicodeNormalization::nfd(long.as_str()).count())
|
||||||
|
});
|
||||||
|
group.bench_function("unic-normal", |b| {
|
||||||
|
b.iter(|| StrNormalForm::nfd(long.as_str()).count())
|
||||||
|
});
|
||||||
|
|
||||||
group.finish();
|
group.finish();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,6 +26,14 @@ impl Buffer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn push_zero(&mut self, ch: char) {
|
||||||
|
self.sort_pending();
|
||||||
|
|
||||||
|
self.buffer.push((0, ch));
|
||||||
|
self.ready.end = self.buffer.len();
|
||||||
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn push_back(&mut self, ch: char) {
|
fn push_back(&mut self, ch: char) {
|
||||||
let class = table::lookup(ch).combining_class();
|
let class = table::lookup(ch).combining_class();
|
||||||
@@ -125,7 +133,7 @@ const S_COUNT: u32 = L_COUNT * N_COUNT;
|
|||||||
fn decompose(c: char, buffer: &mut Buffer) {
|
fn decompose(c: char, buffer: &mut Buffer) {
|
||||||
// 7-bit ASCII never decomposes
|
// 7-bit ASCII never decomposes
|
||||||
if c <= '\x7f' {
|
if c <= '\x7f' {
|
||||||
buffer.push_back(c);
|
buffer.push_zero(c);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user