Rename crates

This commit is contained in:
2022-05-24 20:58:27 +02:00
parent 8a8baffba8
commit 9f44196e6c
51 changed files with 2531 additions and 54 deletions

161
crates/u-sort/src/table.rs Normal file
View File

@@ -0,0 +1,161 @@
use std::fmt::Display;
use u_fst::raw::{Fst, Output};
const TABLE: Fst<&'static [u8]> =
Fst::new_unchecked(include_bytes!(concat!(env!("OUT_DIR"), "/table.fst")));
pub fn lookup(value: &[char]) -> Option<(Vec<Element>, usize)> {
let mut node = TABLE.root();
let mut out = Output::zero();
let mut last_match = None;
'char: for (i, &c) in value.iter().enumerate() {
for b in (c as u32).to_ne_bytes() {
if let Some(trans_index) = node.find_input(b) {
let t = node.transition(trans_index);
node = TABLE.node(t.addr);
out = out.cat(t.out);
if node.is_final() {
last_match = Some((out.cat(node.final_output()).value(), i));
}
} else {
break 'char;
}
}
}
last_match.map(|(data, idx)| {
(
match Value::from_u64(data) {
Value::Entry(element) => vec![element],
Value::Index(idx, len) => {
let start = idx as usize;
let end = start + len as usize;
crate::weights::EXPLICIT_WEIGHTS[start..end]
.iter()
.map(|(l1, l2, l3, l4, variable)| Element {
l1: *l1,
l2: *l2,
l3: *l3,
l4: *l4,
variable: *variable,
})
.collect()
}
},
idx,
)
})
}
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct Element {
pub l1: u16,
pub l2: u16,
pub l3: u8,
pub l4: u16,
pub variable: bool,
}
impl Display for Element {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}{:04X}.{:04X}.{:04X}.{:04X}",
if self.variable { "*" } else { "." },
self.l1,
self.l2,
self.l3,
self.l4
)?;
Ok(())
}
}
#[derive(Clone, Copy, PartialEq, Debug)]
enum Value {
Entry(Element),
Index(u32, u8),
}
impl Value {
fn to_u64(self) -> u64 {
match self {
Self::Entry(element) => {
((element.l4 as u64) << 42)
| ((element.l3 as u64) << 34)
| ((element.l2 as u64) << 18)
| ((element.l1 as u64) << 2)
| (if element.variable { 1 } else { 0 } << 1)
}
Self::Index(idx, len) => ((idx as u64) << 9) | ((len as u64) << 1) | 1,
}
}
fn from_u64(data: u64) -> Self {
if (data & 1) == 0 {
let variable = ((data >> 1) & 1) == 1;
let l1 = ((data >> 2) & 0xFFFF) as u16;
let l2 = ((data >> 18) & 0xFFFF) as u16;
let l3 = ((data >> 34) & 0xFF) as u8;
let l4 = ((data >> 42) & 0xFFFF) as u16;
Self::Entry(Element {
l1,
l2,
l3,
l4,
variable,
})
} else {
let len = ((data >> 1) & 0xFF) as u8;
let idx = ((data >> 9) & 0xFFFFFFFF) as u32;
Self::Index(idx, len)
}
}
}
#[cfg(test)]
mod tests {
use proptest::prelude::*;
use super::*;
fn value_strategy() -> impl Strategy<Value = Value> {
prop_oneof![
(any::<u32>(), any::<u8>()).prop_map(|(idx, len)| Value::Index(idx, len)),
(
any::<u16>(),
any::<u16>(),
any::<u8>(),
any::<u16>(),
any::<bool>()
)
.prop_map(|(l1, l2, l3, l4, variable)| Value::Entry(Element {
l1,
l2,
l3,
l4,
variable
})),
]
}
proptest! {
#[test]
fn proptest_serialize_and_deserialize(a in value_strategy()) {
let data = a.to_u64();
let b = Value::from_u64(data);
prop_assert_eq!(a, b);
}
}
}