use u_fst::raw::Fst; const TABLE: Fst<&'static [u8]> = Fst::new_unchecked(include_bytes!(concat!(env!("OUT_DIR"), "/table.fst"))); #[inline(always)] pub fn lookup(ch: char) -> Entry { Entry::new( TABLE .get((ch as u32).to_ne_bytes()) .map(|output| output.value()) .unwrap_or(0), ) } #[derive(Clone, Copy, PartialEq, Debug)] pub struct Decomposition(u64); impl Iterator for Decomposition { type Item = char; #[inline(always)] fn next(&mut self) -> Option { let d = (self.0 & 0x1FFFFF) as u32; if d > 0 { self.0 >>= 21; Some(unsafe { char::from_u32_unchecked(d) }) } else { None } } } #[derive(Clone, Copy, PartialEq, Debug)] pub struct Entry(u64); impl Entry { pub(crate) fn new(data: u64) -> Self { Self(data) } #[inline(always)] pub fn combining_class(&self) -> u8 { (self.0 & 0xFF) as u8 } #[inline(always)] pub fn decomposition(&self) -> Option { let data = self.0 >> 8; if data > 0 { Some(Decomposition(data)) } else { None } } } #[cfg(test)] mod tests { use proptest::prelude::*; use super::*; fn entry_strategy() -> impl Strategy { ( any::(), (0u8..2), any::().prop_filter("", |c| *c != '\u{0}'), any::().prop_filter("", |c| *c != '\u{0}'), ) .prop_map( |(combining_class, mapping_count, decomposition_first, decomposition_second)| { let mut entry = combining_class as u64; if mapping_count > 0 { entry |= (decomposition_first as u64) << 8; } if mapping_count > 1 { entry |= (decomposition_second as u64) << (21 + 8); } ( entry, ( combining_class, mapping_count, decomposition_first, decomposition_second, ), ) }, ) } proptest! { #[test] fn proptest_entry_serialize_and_deserialize(a in entry_strategy()) { let (data, (combining_class, mapping_count, decomposition_first, decomposition_second)) = a; let b = Entry::new(data); prop_assert_eq!(b.combining_class(), combining_class, "data = {:064b}", data); let c = b.decomposition().map(|i| i.collect::>()); match mapping_count { 0 => prop_assert_eq!(c, None, "data = {:064b}", data), 1 => prop_assert_eq!(c, Some(vec![decomposition_first]), "data = {:064b}", data), 2 => prop_assert_eq!(c, Some(vec![decomposition_first, decomposition_second]), "data = {:064b}", data), _ => unreachable!(), } } } }