//! Parse allkeys.txt //! //! See http://unicode.org/reports/tr10/#File_Format for information about file format. #[derive(Debug, Default)] pub struct AllKeys { pub version: Option, pub implicit_weights: Vec, pub entries: Vec, } #[derive(Debug)] pub struct Version { pub major: u16, pub minor: u16, pub variant: u16, } #[derive(Debug)] pub struct ImplicitWeight { pub start: u32, pub end: u32, pub base: u32, pub comment: Option, } #[derive(Debug)] pub struct Entry { pub chars: Vec, pub elements: Vec, pub comment: Option, } #[derive(Debug, PartialEq)] pub struct Element { pub l1: u16, pub l2: u16, pub l3: u8, pub l4: u16, pub variable: bool, } pub fn parse(input: &str) -> AllKeys { let mut all_keys = AllKeys::default(); for line in input.lines() { let line = line.trim(); // If the line is empty, there is nothing to do wight it if line.is_empty() { continue; } // If a line starts with '#', it is a comment, so skip it if line.starts_with('#') { continue; } if line.starts_with("@version") { let mut iter = line.trim_start_matches("@version").trim().splitn(3, '.'); all_keys.version = Some(Version { major: iter.next().unwrap().parse().unwrap(), minor: iter.next().unwrap().parse().unwrap(), variant: iter.next().unwrap().parse().unwrap(), }); } else if line.starts_with("@implicitweights") { let (range, base) = line .trim_start_matches("@implicitweights") .trim() .split_once(';') .unwrap(); let (start, end) = range.split_once("..").unwrap(); let (base, comment) = base.split_once('#').unwrap(); let comment = comment.trim(); all_keys.implicit_weights.push(ImplicitWeight { start: u32::from_str_radix(start.trim(), 16).unwrap(), end: u32::from_str_radix(end.trim(), 16).unwrap(), base: u32::from_str_radix(base.trim(), 16).unwrap(), comment: if !comment.is_empty() { Some(comment.to_string()) } else { None }, }); } else { let (chars, rest) = line.split_once(';').unwrap(); let chars = chars .trim() .split(' ') .map(|x| u32::from_str_radix(x, 16).unwrap()) .collect::>(); let (elements, comment) = rest.split_once('#').unwrap(); let comment = comment.trim(); let elements = elements .split("][") .map(|coll_element| { let coll_element = coll_element .trim() .trim_start_matches('[') .trim_end_matches(']'); let variable = coll_element.starts_with('*'); let mut iter = coll_element .trim_start_matches(['.', '*']) .split(['.', '*']); Element { l1: iter .next() .and_then(|x| u16::from_str_radix(x, 16).ok()) .expect("valid l1 value"), l2: iter .next() .and_then(|x| u16::from_str_radix(x, 16).ok()) .expect("valid l2 value"), l3: iter .next() .and_then(|x| u8::from_str_radix(x, 16).ok()) .expect("valid l3 value"), l4: iter .next() .map(|x| u16::from_str_radix(x, 16).expect("valid l4 value")) .unwrap_or(0), variable, } }) .collect::>(); all_keys.entries.push(Entry { chars, elements, comment: if !comment.is_empty() { Some(comment.to_string()) } else { None }, }); } } all_keys } #[cfg(test)] mod tests { use super::*; #[test] fn test_allkeys() { let data = std::fs::read_to_string("data/allkeys.txt").unwrap(); let all_keys = parse(&data); similar_asserts::assert_eq!( all_keys .entries .iter() .find(|entry| entry.chars[..] == [0x1abc]) .map(|entry| &entry.elements), Some(&vec![Element { l1: 0, l2: 51, l3: 2, l4: 0, variable: false }]) ); similar_asserts::assert_eq!( all_keys .entries .iter() .find(|entry| entry.chars[..] == [0x1ac1]) .map(|entry| &entry.elements), Some(&vec![Element { l1: 0, l2: 51, l3: 2, l4: 0, variable: false }]) ); } }