use std::env; use std::fs; use std::path::Path; use ufst::raw::Fst; fn main() { let data = fs::read_to_string("data/UnicodeData.txt").unwrap(); let out_dir = env::var_os("OUT_DIR").unwrap(); let dest_path = Path::new(&out_dir).join("table.fst"); let mut entries = parse(&data) .into_iter() .map(|(code_value, entry)| (code_value.to_ne_bytes(), entry)) .collect::>(); entries.sort_unstable_by_key(|(k, _)| *k); let data = Fst::from_iter_map(entries).unwrap().into_inner(); fs::write(&dest_path, data).unwrap(); println!("cargo:rerun-if-changed=data/UnicodeData.txt"); println!("cargo:rerun-if-changed=build.rs"); } fn parse(data: &str) -> Vec<(u32, u64)> { let mut entries = Vec::new(); for line in data.lines() { let mut iter = line.split(';'); let code_point = iter .next() .map(|s| u32::from_str_radix(s, 16).expect("valid u32")) .expect("code value"); let combining_class = iter .nth(2) .map(|s| s.parse::().expect("valid u8")) .expect("canonical combining classes"); let mut entry = combining_class as u64; let decomposition_mapping = iter.nth(1).unwrap(); if !decomposition_mapping.starts_with('<') { let mappings = decomposition_mapping .split(' ') .filter(|s| !s.is_empty()) .map(|s| u32::from_str_radix(s, 16).expect("valid u32")) .collect::>(); assert!(mappings.len() <= 2); for (i, mapping) in mappings.into_iter().enumerate() { entry |= (mapping as u64) << ((21 * i) + 8); } } entries.push((code_point, entry)); } entries }