Initial commit
This commit is contained in:
7
crates/parse/Cargo.toml
Normal file
7
crates/parse/Cargo.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
[package]
|
||||
name = "parse"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dev-dependencies]
|
||||
similar-asserts = "1.2.0"
|
||||
1
crates/parse/src/lib.rs
Normal file
1
crates/parse/src/lib.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod uca;
|
||||
1
crates/parse/src/uca.rs
Normal file
1
crates/parse/src/uca.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod allkeys;
|
||||
192
crates/parse/src/uca/allkeys.rs
Normal file
192
crates/parse/src/uca/allkeys.rs
Normal file
@@ -0,0 +1,192 @@
|
||||
//! Parse allkeys.txt
|
||||
//!
|
||||
//! See http://unicode.org/reports/tr10/#File_Format for information about file format.
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct AllKeys {
|
||||
pub version: Option<Version>,
|
||||
pub implicit_weights: Vec<ImplicitWeight>,
|
||||
pub entries: Vec<Entry>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Version {
|
||||
pub major: u16,
|
||||
pub minor: u16,
|
||||
pub variant: u16,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ImplicitWeight {
|
||||
pub start: u32,
|
||||
pub end: u32,
|
||||
pub base: u32,
|
||||
pub comment: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Entry {
|
||||
pub chars: Vec<u32>,
|
||||
pub elements: Vec<Element>,
|
||||
pub comment: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Element {
|
||||
pub l1: u16,
|
||||
pub l2: u16,
|
||||
pub l3: u8,
|
||||
pub l4: u16,
|
||||
pub variable: bool,
|
||||
}
|
||||
|
||||
pub fn parse(input: &str) -> AllKeys {
|
||||
let mut all_keys = AllKeys::default();
|
||||
|
||||
for line in input.lines() {
|
||||
let line = line.trim();
|
||||
|
||||
// If the line is empty, there is nothing to do wight it
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If a line starts with '#', it is a comment, so skip it
|
||||
if line.starts_with('#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if line.starts_with("@version") {
|
||||
let mut iter = line.trim_start_matches("@version").trim().splitn(3, '.');
|
||||
|
||||
all_keys.version = Some(Version {
|
||||
major: iter.next().unwrap().parse().unwrap(),
|
||||
minor: iter.next().unwrap().parse().unwrap(),
|
||||
variant: iter.next().unwrap().parse().unwrap(),
|
||||
});
|
||||
} else if line.starts_with("@implicitweights") {
|
||||
let (range, base) = line
|
||||
.trim_start_matches("@implicitweights")
|
||||
.trim()
|
||||
.split_once(';')
|
||||
.unwrap();
|
||||
|
||||
let (start, end) = range.split_once("..").unwrap();
|
||||
let (base, comment) = base.split_once('#').unwrap();
|
||||
|
||||
let comment = comment.trim();
|
||||
|
||||
all_keys.implicit_weights.push(ImplicitWeight {
|
||||
start: u32::from_str_radix(start.trim(), 16).unwrap(),
|
||||
end: u32::from_str_radix(end.trim(), 16).unwrap(),
|
||||
base: u32::from_str_radix(base.trim(), 16).unwrap(),
|
||||
comment: if !comment.is_empty() {
|
||||
Some(comment.to_string())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
});
|
||||
} else {
|
||||
let (chars, rest) = line.split_once(';').unwrap();
|
||||
|
||||
let chars = chars
|
||||
.trim()
|
||||
.split(' ')
|
||||
.map(|x| u32::from_str_radix(x, 16).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let (elements, comment) = rest.split_once('#').unwrap();
|
||||
let comment = comment.trim();
|
||||
|
||||
let elements = elements
|
||||
.split("][")
|
||||
.map(|coll_element| {
|
||||
let coll_element = coll_element
|
||||
.trim()
|
||||
.trim_start_matches('[')
|
||||
.trim_end_matches(']');
|
||||
|
||||
let variable = coll_element.starts_with('*');
|
||||
|
||||
let mut iter = coll_element
|
||||
.trim_start_matches(['.', '*'])
|
||||
.split(['.', '*']);
|
||||
|
||||
Element {
|
||||
l1: iter
|
||||
.next()
|
||||
.and_then(|x| u16::from_str_radix(x, 16).ok())
|
||||
.expect("valid l1 value"),
|
||||
l2: iter
|
||||
.next()
|
||||
.and_then(|x| u16::from_str_radix(x, 16).ok())
|
||||
.expect("valid l2 value"),
|
||||
l3: iter
|
||||
.next()
|
||||
.and_then(|x| u8::from_str_radix(x, 16).ok())
|
||||
.expect("valid l3 value"),
|
||||
l4: iter
|
||||
.next()
|
||||
.map(|x| u16::from_str_radix(x, 16).expect("valid l4 value"))
|
||||
.unwrap_or(0),
|
||||
variable,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
all_keys.entries.push(Entry {
|
||||
chars,
|
||||
elements,
|
||||
comment: if !comment.is_empty() {
|
||||
Some(comment.to_string())
|
||||
} else {
|
||||
None
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
all_keys
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_allkeys() {
|
||||
let data = std::fs::read_to_string("data/allkeys.txt").unwrap();
|
||||
|
||||
let all_keys = parse(&data);
|
||||
|
||||
similar_asserts::assert_eq!(
|
||||
all_keys
|
||||
.entries
|
||||
.iter()
|
||||
.find(|entry| entry.chars[..] == [0x1abc])
|
||||
.map(|entry| &entry.elements),
|
||||
Some(&vec![Element {
|
||||
l1: 0,
|
||||
l2: 51,
|
||||
l3: 2,
|
||||
l4: 0,
|
||||
variable: false
|
||||
}])
|
||||
);
|
||||
|
||||
similar_asserts::assert_eq!(
|
||||
all_keys
|
||||
.entries
|
||||
.iter()
|
||||
.find(|entry| entry.chars[..] == [0x1ac1])
|
||||
.map(|entry| &entry.elements),
|
||||
Some(&vec![Element {
|
||||
l1: 0,
|
||||
l2: 51,
|
||||
l3: 2,
|
||||
l4: 0,
|
||||
variable: false
|
||||
}])
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user