Files
u-code/crates/u-sort/build.rs

120 lines
3.0 KiB
Rust

use std::env;
use std::fs;
use std::path::Path;
use parse::uca::allkeys;
use u_fst::raw::Builder;
#[path = "src/element.rs"]
mod element;
use element::{Element, Range};
fn main() {
println!("cargo:rerun-if-changed=data/allkeys.txt");
println!("cargo:rerun-if-changed=build.rs");
let all_keys = {
let data = std::fs::read_to_string("data/allkeys.txt").unwrap();
allkeys::parse(&data)
};
let out_dir = env::var_os("OUT_DIR").unwrap();
let mut implicit_weights = String::new();
implicit_weights.push_str(&format!(
"pub const IMPLICIT_WEIGHTS: [(u32, u32, u32); {}] = [\n",
all_keys.implicit_weights.len(),
));
for implicit_weight in all_keys.implicit_weights {
implicit_weights.push_str(&format!(
" ({}, {}, {}),\n",
implicit_weight.start, implicit_weight.end, implicit_weight.base
));
}
implicit_weights.push_str("];\n");
fs::write(
&Path::new(&out_dir).join("implicit_weights.rs"),
implicit_weights,
)
.unwrap();
let mut entries = all_keys
.entries
.into_iter()
.map(|entry| {
(
bytemuck::cast_slice::<u32, u8>(&entry.chars).to_vec(),
entry.elements,
)
})
.collect::<Vec<_>>();
entries.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));
let mut builder = Builder::memory();
let mut overflow = Vec::new();
for (chars, elements) in entries.into_iter() {
let value = if elements.len() <= 2 {
let chunks = elements
.iter()
.map(|e| Element::new(e.l1, e.l2, e.l3, e.l4, e.variable))
.collect::<Vec<_>>();
element::pack(&chunks)
} else {
let elements = elements
.chunks(2)
.map(|chunks| {
let chunks = chunks
.iter()
.map(|e| Element::new(e.l1, e.l2, e.l3, e.l4, e.variable))
.collect::<Vec<_>>();
element::pack(&chunks)
})
.collect::<Vec<_>>();
let start = overflow.len() as u32;
let len = elements.len() as u8;
overflow.extend(elements.into_iter());
let data = Range::new(start, len).as_u64();
data << 1
};
builder.insert(chars, value).unwrap();
}
let data = builder.into_fst().into_inner();
fs::write(&Path::new(&out_dir).join("table.fst"), data).unwrap();
let mut explicit_weights = String::new();
explicit_weights.push_str(&format!(
"pub const EXPLICIT_WEIGHTS: [u64; {}] = [\n",
overflow.len(),
));
for elements in overflow {
explicit_weights.push_str(&format!(" {},\n", elements));
}
explicit_weights.push_str("];\n");
fs::write(
&Path::new(&out_dir).join("explicit_weights.rs"),
explicit_weights,
)
.unwrap();
}