109 lines
2.8 KiB
Rust
109 lines
2.8 KiB
Rust
use std::env;
|
|
use std::fs;
|
|
use std::path::Path;
|
|
|
|
use parse::uca::allkeys;
|
|
use ufst::raw::Builder;
|
|
|
|
fn main() {
|
|
println!("cargo:rerun-if-changed=data/allkeys.txt");
|
|
println!("cargo:rerun-if-changed=build.rs");
|
|
|
|
let all_keys = {
|
|
let data = std::fs::read_to_string("data/allkeys.txt").unwrap();
|
|
|
|
allkeys::parse(&data)
|
|
};
|
|
|
|
let out_dir = env::var_os("OUT_DIR").unwrap();
|
|
|
|
let mut implicit_weights = String::new();
|
|
|
|
implicit_weights.push_str(&format!(
|
|
"pub const IMPLICIT_WEIGHTS: [(u32, u32, u32); {}] = [\n",
|
|
all_keys.implicit_weights.len(),
|
|
));
|
|
|
|
for implicit_weight in all_keys.implicit_weights {
|
|
implicit_weights.push_str(&format!(
|
|
" ({}, {}, {}),\n",
|
|
implicit_weight.start, implicit_weight.end, implicit_weight.base
|
|
));
|
|
}
|
|
|
|
implicit_weights.push_str("];\n");
|
|
|
|
fs::write(
|
|
&Path::new(&out_dir).join("implicit_weights.rs"),
|
|
implicit_weights,
|
|
)
|
|
.unwrap();
|
|
|
|
let mut entries = all_keys
|
|
.entries
|
|
.into_iter()
|
|
.map(|entry| {
|
|
(
|
|
bytemuck::cast_slice::<u32, u8>(&entry.chars).to_vec(),
|
|
entry.elements,
|
|
)
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));
|
|
|
|
let mut builder = Builder::memory();
|
|
let mut overflow = Vec::new();
|
|
|
|
for (chars, mut elements) in entries.into_iter() {
|
|
let value = match elements.len() {
|
|
1 => {
|
|
let element = elements.pop().unwrap();
|
|
|
|
((element.l4 as u64) << 42)
|
|
| ((element.l3 as u64) << 34)
|
|
| ((element.l2 as u64) << 18)
|
|
| ((element.l1 as u64) << 2)
|
|
| (if element.variable { 1 } else { 0 } << 1)
|
|
}
|
|
2.. => {
|
|
let idx = overflow.len();
|
|
let len = elements.len();
|
|
|
|
overflow.extend(elements.into_iter());
|
|
|
|
((idx as u64) << 9) | ((len as u64) << 1) | 1
|
|
}
|
|
_ => panic!("this shouldn't happen!"),
|
|
};
|
|
|
|
builder.insert(chars, value).unwrap();
|
|
}
|
|
|
|
let data = builder.into_fst().into_inner();
|
|
|
|
fs::write(&Path::new(&out_dir).join("table.fst"), data).unwrap();
|
|
|
|
let mut explicit_weights = String::new();
|
|
|
|
explicit_weights.push_str(&format!(
|
|
"pub const EXPLICIT_WEIGHTS: [(u16, u16, u8, u16, bool); {}] = [\n",
|
|
overflow.len(),
|
|
));
|
|
|
|
for element in overflow {
|
|
explicit_weights.push_str(&format!(
|
|
" ({}, {}, {}, {}, {}),\n",
|
|
element.l1, element.l2, element.l3, element.l4, element.variable,
|
|
));
|
|
}
|
|
|
|
explicit_weights.push_str("];\n");
|
|
|
|
fs::write(
|
|
&Path::new(&out_dir).join("explicit_weights.rs"),
|
|
explicit_weights,
|
|
)
|
|
.unwrap();
|
|
}
|