Initial commit

This commit is contained in:
2022-05-19 23:26:00 +02:00
commit 8a8baffba8
53 changed files with 761345 additions and 0 deletions

66
crates/unf/build.rs Normal file
View File

@@ -0,0 +1,66 @@
use std::env;
use std::fs;
use std::path::Path;
use ufst::raw::Fst;
fn main() {
let data = fs::read_to_string("data/UnicodeData.txt").unwrap();
let out_dir = env::var_os("OUT_DIR").unwrap();
let dest_path = Path::new(&out_dir).join("table.fst");
let mut entries = parse(&data)
.into_iter()
.map(|(code_value, entry)| (code_value.to_ne_bytes(), entry))
.collect::<Vec<_>>();
entries.sort_unstable_by_key(|(k, _)| *k);
let data = Fst::from_iter_map(entries).unwrap().into_inner();
fs::write(&dest_path, data).unwrap();
println!("cargo:rerun-if-changed=data/UnicodeData.txt");
println!("cargo:rerun-if-changed=build.rs");
}
fn parse(data: &str) -> Vec<(u32, u64)> {
let mut entries = Vec::new();
for line in data.lines() {
let mut iter = line.split(';');
let code_point = iter
.next()
.map(|s| u32::from_str_radix(s, 16).expect("valid u32"))
.expect("code value");
let combining_class = iter
.nth(2)
.map(|s| s.parse::<u8>().expect("valid u8"))
.expect("canonical combining classes");
let mut entry = combining_class as u64;
let decomposition_mapping = iter.nth(1).unwrap();
if !decomposition_mapping.starts_with('<') {
let mappings = decomposition_mapping
.split(' ')
.filter(|s| !s.is_empty())
.map(|s| u32::from_str_radix(s, 16).expect("valid u32"))
.collect::<Vec<_>>();
assert!(mappings.len() <= 2);
for (i, mapping) in mappings.into_iter().enumerate() {
entry |= (mapping as u64) << ((21 * i) + 8);
}
}
entries.push((code_point, entry));
}
entries
}