Initial commit
This commit is contained in:
66
crates/unf/build.rs
Normal file
66
crates/unf/build.rs
Normal file
@@ -0,0 +1,66 @@
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use ufst::raw::Fst;
|
||||
|
||||
fn main() {
|
||||
let data = fs::read_to_string("data/UnicodeData.txt").unwrap();
|
||||
|
||||
let out_dir = env::var_os("OUT_DIR").unwrap();
|
||||
let dest_path = Path::new(&out_dir).join("table.fst");
|
||||
|
||||
let mut entries = parse(&data)
|
||||
.into_iter()
|
||||
.map(|(code_value, entry)| (code_value.to_ne_bytes(), entry))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
entries.sort_unstable_by_key(|(k, _)| *k);
|
||||
|
||||
let data = Fst::from_iter_map(entries).unwrap().into_inner();
|
||||
|
||||
fs::write(&dest_path, data).unwrap();
|
||||
|
||||
println!("cargo:rerun-if-changed=data/UnicodeData.txt");
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
}
|
||||
|
||||
fn parse(data: &str) -> Vec<(u32, u64)> {
|
||||
let mut entries = Vec::new();
|
||||
|
||||
for line in data.lines() {
|
||||
let mut iter = line.split(';');
|
||||
|
||||
let code_point = iter
|
||||
.next()
|
||||
.map(|s| u32::from_str_radix(s, 16).expect("valid u32"))
|
||||
.expect("code value");
|
||||
|
||||
let combining_class = iter
|
||||
.nth(2)
|
||||
.map(|s| s.parse::<u8>().expect("valid u8"))
|
||||
.expect("canonical combining classes");
|
||||
|
||||
let mut entry = combining_class as u64;
|
||||
|
||||
let decomposition_mapping = iter.nth(1).unwrap();
|
||||
|
||||
if !decomposition_mapping.starts_with('<') {
|
||||
let mappings = decomposition_mapping
|
||||
.split(' ')
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| u32::from_str_radix(s, 16).expect("valid u32"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert!(mappings.len() <= 2);
|
||||
|
||||
for (i, mapping) in mappings.into_iter().enumerate() {
|
||||
entry |= (mapping as u64) << ((21 * i) + 8);
|
||||
}
|
||||
}
|
||||
|
||||
entries.push((code_point, entry));
|
||||
}
|
||||
|
||||
entries
|
||||
}
|
||||
Reference in New Issue
Block a user