193 lines
5.4 KiB
Rust
193 lines
5.4 KiB
Rust
//! Parse allkeys.txt
|
|
//!
|
|
//! See http://unicode.org/reports/tr10/#File_Format for information about file format.
|
|
|
|
#[derive(Debug, Default)]
|
|
pub struct AllKeys {
|
|
pub version: Option<Version>,
|
|
pub implicit_weights: Vec<ImplicitWeight>,
|
|
pub entries: Vec<Entry>,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct Version {
|
|
pub major: u16,
|
|
pub minor: u16,
|
|
pub variant: u16,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct ImplicitWeight {
|
|
pub start: u32,
|
|
pub end: u32,
|
|
pub base: u32,
|
|
pub comment: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct Entry {
|
|
pub chars: Vec<u32>,
|
|
pub elements: Vec<Element>,
|
|
pub comment: Option<String>,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub struct Element {
|
|
pub l1: u16,
|
|
pub l2: u16,
|
|
pub l3: u8,
|
|
pub l4: u16,
|
|
pub variable: bool,
|
|
}
|
|
|
|
pub fn parse(input: &str) -> AllKeys {
|
|
let mut all_keys = AllKeys::default();
|
|
|
|
for line in input.lines() {
|
|
let line = line.trim();
|
|
|
|
// If the line is empty, there is nothing to do wight it
|
|
if line.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
// If a line starts with '#', it is a comment, so skip it
|
|
if line.starts_with('#') {
|
|
continue;
|
|
}
|
|
|
|
if line.starts_with("@version") {
|
|
let mut iter = line.trim_start_matches("@version").trim().splitn(3, '.');
|
|
|
|
all_keys.version = Some(Version {
|
|
major: iter.next().unwrap().parse().unwrap(),
|
|
minor: iter.next().unwrap().parse().unwrap(),
|
|
variant: iter.next().unwrap().parse().unwrap(),
|
|
});
|
|
} else if line.starts_with("@implicitweights") {
|
|
let (range, base) = line
|
|
.trim_start_matches("@implicitweights")
|
|
.trim()
|
|
.split_once(';')
|
|
.unwrap();
|
|
|
|
let (start, end) = range.split_once("..").unwrap();
|
|
let (base, comment) = base.split_once('#').unwrap();
|
|
|
|
let comment = comment.trim();
|
|
|
|
all_keys.implicit_weights.push(ImplicitWeight {
|
|
start: u32::from_str_radix(start.trim(), 16).unwrap(),
|
|
end: u32::from_str_radix(end.trim(), 16).unwrap(),
|
|
base: u32::from_str_radix(base.trim(), 16).unwrap(),
|
|
comment: if !comment.is_empty() {
|
|
Some(comment.to_string())
|
|
} else {
|
|
None
|
|
},
|
|
});
|
|
} else {
|
|
let (chars, rest) = line.split_once(';').unwrap();
|
|
|
|
let chars = chars
|
|
.trim()
|
|
.split(' ')
|
|
.map(|x| u32::from_str_radix(x, 16).unwrap())
|
|
.collect::<Vec<_>>();
|
|
|
|
let (elements, comment) = rest.split_once('#').unwrap();
|
|
let comment = comment.trim();
|
|
|
|
let elements = elements
|
|
.split("][")
|
|
.map(|coll_element| {
|
|
let coll_element = coll_element
|
|
.trim()
|
|
.trim_start_matches('[')
|
|
.trim_end_matches(']');
|
|
|
|
let variable = coll_element.starts_with('*');
|
|
|
|
let mut iter = coll_element
|
|
.trim_start_matches(['.', '*'])
|
|
.split(['.', '*']);
|
|
|
|
Element {
|
|
l1: iter
|
|
.next()
|
|
.and_then(|x| u16::from_str_radix(x, 16).ok())
|
|
.expect("valid l1 value"),
|
|
l2: iter
|
|
.next()
|
|
.and_then(|x| u16::from_str_radix(x, 16).ok())
|
|
.expect("valid l2 value"),
|
|
l3: iter
|
|
.next()
|
|
.and_then(|x| u8::from_str_radix(x, 16).ok())
|
|
.expect("valid l3 value"),
|
|
l4: iter
|
|
.next()
|
|
.map(|x| u16::from_str_radix(x, 16).expect("valid l4 value"))
|
|
.unwrap_or(0),
|
|
variable,
|
|
}
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
all_keys.entries.push(Entry {
|
|
chars,
|
|
elements,
|
|
comment: if !comment.is_empty() {
|
|
Some(comment.to_string())
|
|
} else {
|
|
None
|
|
},
|
|
});
|
|
}
|
|
}
|
|
|
|
all_keys
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_allkeys() {
|
|
let data = std::fs::read_to_string("data/allkeys.txt").unwrap();
|
|
|
|
let all_keys = parse(&data);
|
|
|
|
similar_asserts::assert_eq!(
|
|
all_keys
|
|
.entries
|
|
.iter()
|
|
.find(|entry| entry.chars[..] == [0x1abc])
|
|
.map(|entry| &entry.elements),
|
|
Some(&vec![Element {
|
|
l1: 0,
|
|
l2: 51,
|
|
l3: 2,
|
|
l4: 0,
|
|
variable: false
|
|
}])
|
|
);
|
|
|
|
similar_asserts::assert_eq!(
|
|
all_keys
|
|
.entries
|
|
.iter()
|
|
.find(|entry| entry.chars[..] == [0x1ac1])
|
|
.map(|entry| &entry.elements),
|
|
Some(&vec![Element {
|
|
l1: 0,
|
|
l2: 51,
|
|
l3: 2,
|
|
l4: 0,
|
|
variable: false
|
|
}])
|
|
);
|
|
}
|
|
}
|