Initial commit

This commit is contained in:
2022-05-19 23:26:00 +02:00
commit 8a8baffba8
53 changed files with 761345 additions and 0 deletions

108
crates/smol-uca/build.rs Normal file
View File

@@ -0,0 +1,108 @@
use std::env;
use std::fs;
use std::path::Path;
use parse::uca::allkeys;
use ufst::raw::Builder;
fn main() {
println!("cargo:rerun-if-changed=data/allkeys.txt");
println!("cargo:rerun-if-changed=build.rs");
let all_keys = {
let data = std::fs::read_to_string("data/allkeys.txt").unwrap();
allkeys::parse(&data)
};
let out_dir = env::var_os("OUT_DIR").unwrap();
let mut implicit_weights = String::new();
implicit_weights.push_str(&format!(
"pub const IMPLICIT_WEIGHTS: [(u32, u32, u32); {}] = [\n",
all_keys.implicit_weights.len(),
));
for implicit_weight in all_keys.implicit_weights {
implicit_weights.push_str(&format!(
" ({}, {}, {}),\n",
implicit_weight.start, implicit_weight.end, implicit_weight.base
));
}
implicit_weights.push_str("];\n");
fs::write(
&Path::new(&out_dir).join("implicit_weights.rs"),
implicit_weights,
)
.unwrap();
let mut entries = all_keys
.entries
.into_iter()
.map(|entry| {
(
bytemuck::cast_slice::<u32, u8>(&entry.chars).to_vec(),
entry.elements,
)
})
.collect::<Vec<_>>();
entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));
let mut builder = Builder::memory();
let mut overflow = Vec::new();
for (chars, mut elements) in entries.into_iter() {
let value = match elements.len() {
1 => {
let element = elements.pop().unwrap();
((element.l4 as u64) << 42)
| ((element.l3 as u64) << 34)
| ((element.l2 as u64) << 18)
| ((element.l1 as u64) << 2)
| (if element.variable { 1 } else { 0 } << 1)
}
2.. => {
let idx = overflow.len();
let len = elements.len();
overflow.extend(elements.into_iter());
((idx as u64) << 9) | ((len as u64) << 1) | 1
}
_ => panic!("this shouldn't happen!"),
};
builder.insert(chars, value).unwrap();
}
let data = builder.into_fst().into_inner();
fs::write(&Path::new(&out_dir).join("table.fst"), data).unwrap();
let mut explicit_weights = String::new();
explicit_weights.push_str(&format!(
"pub const EXPLICIT_WEIGHTS: [(u16, u16, u8, u16, bool); {}] = [\n",
overflow.len(),
));
for element in overflow {
explicit_weights.push_str(&format!(
" ({}, {}, {}, {}, {}),\n",
element.l1, element.l2, element.l3, element.l4, element.variable,
));
}
explicit_weights.push_str("];\n");
fs::write(
&Path::new(&out_dir).join("explicit_weights.rs"),
explicit_weights,
)
.unwrap();
}