Initial commit

This commit is contained in:
2022-05-19 23:26:00 +02:00
commit 8a8baffba8
53 changed files with 761345 additions and 0 deletions

1
crates/parse/src/lib.rs Normal file
View File

@@ -0,0 +1 @@
pub mod uca;

1
crates/parse/src/uca.rs Normal file
View File

@@ -0,0 +1 @@
pub mod allkeys;

View File

@@ -0,0 +1,192 @@
//! Parse allkeys.txt
//!
//! See http://unicode.org/reports/tr10/#File_Format for information about file format.
#[derive(Debug, Default)]
pub struct AllKeys {
pub version: Option<Version>,
pub implicit_weights: Vec<ImplicitWeight>,
pub entries: Vec<Entry>,
}
#[derive(Debug)]
pub struct Version {
pub major: u16,
pub minor: u16,
pub variant: u16,
}
#[derive(Debug)]
pub struct ImplicitWeight {
pub start: u32,
pub end: u32,
pub base: u32,
pub comment: Option<String>,
}
#[derive(Debug)]
pub struct Entry {
pub chars: Vec<u32>,
pub elements: Vec<Element>,
pub comment: Option<String>,
}
#[derive(Debug, PartialEq)]
pub struct Element {
pub l1: u16,
pub l2: u16,
pub l3: u8,
pub l4: u16,
pub variable: bool,
}
pub fn parse(input: &str) -> AllKeys {
let mut all_keys = AllKeys::default();
for line in input.lines() {
let line = line.trim();
// If the line is empty, there is nothing to do wight it
if line.is_empty() {
continue;
}
// If a line starts with '#', it is a comment, so skip it
if line.starts_with('#') {
continue;
}
if line.starts_with("@version") {
let mut iter = line.trim_start_matches("@version").trim().splitn(3, '.');
all_keys.version = Some(Version {
major: iter.next().unwrap().parse().unwrap(),
minor: iter.next().unwrap().parse().unwrap(),
variant: iter.next().unwrap().parse().unwrap(),
});
} else if line.starts_with("@implicitweights") {
let (range, base) = line
.trim_start_matches("@implicitweights")
.trim()
.split_once(';')
.unwrap();
let (start, end) = range.split_once("..").unwrap();
let (base, comment) = base.split_once('#').unwrap();
let comment = comment.trim();
all_keys.implicit_weights.push(ImplicitWeight {
start: u32::from_str_radix(start.trim(), 16).unwrap(),
end: u32::from_str_radix(end.trim(), 16).unwrap(),
base: u32::from_str_radix(base.trim(), 16).unwrap(),
comment: if !comment.is_empty() {
Some(comment.to_string())
} else {
None
},
});
} else {
let (chars, rest) = line.split_once(';').unwrap();
let chars = chars
.trim()
.split(' ')
.map(|x| u32::from_str_radix(x, 16).unwrap())
.collect::<Vec<_>>();
let (elements, comment) = rest.split_once('#').unwrap();
let comment = comment.trim();
let elements = elements
.split("][")
.map(|coll_element| {
let coll_element = coll_element
.trim()
.trim_start_matches('[')
.trim_end_matches(']');
let variable = coll_element.starts_with('*');
let mut iter = coll_element
.trim_start_matches(['.', '*'])
.split(['.', '*']);
Element {
l1: iter
.next()
.and_then(|x| u16::from_str_radix(x, 16).ok())
.expect("valid l1 value"),
l2: iter
.next()
.and_then(|x| u16::from_str_radix(x, 16).ok())
.expect("valid l2 value"),
l3: iter
.next()
.and_then(|x| u8::from_str_radix(x, 16).ok())
.expect("valid l3 value"),
l4: iter
.next()
.map(|x| u16::from_str_radix(x, 16).expect("valid l4 value"))
.unwrap_or(0),
variable,
}
})
.collect::<Vec<_>>();
all_keys.entries.push(Entry {
chars,
elements,
comment: if !comment.is_empty() {
Some(comment.to_string())
} else {
None
},
});
}
}
all_keys
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_allkeys() {
let data = std::fs::read_to_string("data/allkeys.txt").unwrap();
let all_keys = parse(&data);
similar_asserts::assert_eq!(
all_keys
.entries
.iter()
.find(|entry| entry.chars[..] == [0x1abc])
.map(|entry| &entry.elements),
Some(&vec![Element {
l1: 0,
l2: 51,
l3: 2,
l4: 0,
variable: false
}])
);
similar_asserts::assert_eq!(
all_keys
.entries
.iter()
.find(|entry| entry.chars[..] == [0x1ac1])
.map(|entry| &entry.elements),
Some(&vec![Element {
l1: 0,
l2: 51,
l3: 2,
l4: 0,
variable: false
}])
);
}
}