Better packing for u-sort
This commit is contained in:
@@ -1,11 +1,12 @@
|
||||
use std::fmt::Display;
|
||||
|
||||
use u_fst::raw::{Fst, Output};
|
||||
|
||||
use crate::element::{Elements, Range};
|
||||
use crate::weights::EXPLICIT_WEIGHTS;
|
||||
|
||||
const TABLE: Fst<&'static [u8]> =
|
||||
Fst::new_unchecked(include_bytes!(concat!(env!("OUT_DIR"), "/table.fst")));
|
||||
|
||||
pub fn lookup(value: &[char]) -> Option<(Vec<Element>, usize)> {
|
||||
pub(crate) fn lookup(value: &[char]) -> Option<(Elements, usize)> {
|
||||
let mut node = TABLE.root();
|
||||
let mut out = Output::zero();
|
||||
|
||||
@@ -30,132 +31,19 @@ pub fn lookup(value: &[char]) -> Option<(Vec<Element>, usize)> {
|
||||
|
||||
last_match.map(|(data, idx)| {
|
||||
(
|
||||
match Value::from_u64(data) {
|
||||
Value::Entry(element) => vec![element],
|
||||
Value::Index(idx, len) => {
|
||||
let start = idx as usize;
|
||||
let end = start + len as usize;
|
||||
|
||||
crate::weights::EXPLICIT_WEIGHTS[start..end]
|
||||
.iter()
|
||||
.map(|(l1, l2, l3, l4, variable)| Element {
|
||||
l1: *l1,
|
||||
l2: *l2,
|
||||
l3: *l3,
|
||||
l4: *l4,
|
||||
variable: *variable,
|
||||
})
|
||||
.collect()
|
||||
if data & 0x1 == 1 {
|
||||
Elements {
|
||||
buf: &[],
|
||||
cur: data,
|
||||
}
|
||||
} else {
|
||||
let r = Range(data >> 1);
|
||||
|
||||
let (first, buf) = EXPLICIT_WEIGHTS[r.start()..r.end()].split_at(1);
|
||||
|
||||
Elements { buf, cur: first[0] }
|
||||
},
|
||||
idx,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||
pub struct Element {
|
||||
pub l1: u16,
|
||||
pub l2: u16,
|
||||
pub l3: u8,
|
||||
pub l4: u16,
|
||||
pub variable: bool,
|
||||
}
|
||||
|
||||
impl Display for Element {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}{:04X}.{:04X}.{:04X}.{:04X}",
|
||||
if self.variable { "*" } else { "." },
|
||||
self.l1,
|
||||
self.l2,
|
||||
self.l3,
|
||||
self.l4
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Debug)]
|
||||
enum Value {
|
||||
Entry(Element),
|
||||
Index(u32, u8),
|
||||
}
|
||||
|
||||
impl Value {
|
||||
fn to_u64(self) -> u64 {
|
||||
match self {
|
||||
Self::Entry(element) => {
|
||||
((element.l4 as u64) << 42)
|
||||
| ((element.l3 as u64) << 34)
|
||||
| ((element.l2 as u64) << 18)
|
||||
| ((element.l1 as u64) << 2)
|
||||
| (if element.variable { 1 } else { 0 } << 1)
|
||||
}
|
||||
Self::Index(idx, len) => ((idx as u64) << 9) | ((len as u64) << 1) | 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_u64(data: u64) -> Self {
|
||||
if (data & 1) == 0 {
|
||||
let variable = ((data >> 1) & 1) == 1;
|
||||
|
||||
let l1 = ((data >> 2) & 0xFFFF) as u16;
|
||||
let l2 = ((data >> 18) & 0xFFFF) as u16;
|
||||
let l3 = ((data >> 34) & 0xFF) as u8;
|
||||
let l4 = ((data >> 42) & 0xFFFF) as u16;
|
||||
|
||||
Self::Entry(Element {
|
||||
l1,
|
||||
l2,
|
||||
l3,
|
||||
l4,
|
||||
variable,
|
||||
})
|
||||
} else {
|
||||
let len = ((data >> 1) & 0xFF) as u8;
|
||||
let idx = ((data >> 9) & 0xFFFFFFFF) as u32;
|
||||
|
||||
Self::Index(idx, len)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use proptest::prelude::*;
|
||||
|
||||
use super::*;
|
||||
|
||||
fn value_strategy() -> impl Strategy<Value = Value> {
|
||||
prop_oneof![
|
||||
(any::<u32>(), any::<u8>()).prop_map(|(idx, len)| Value::Index(idx, len)),
|
||||
(
|
||||
any::<u16>(),
|
||||
any::<u16>(),
|
||||
any::<u8>(),
|
||||
any::<u16>(),
|
||||
any::<bool>()
|
||||
)
|
||||
.prop_map(|(l1, l2, l3, l4, variable)| Value::Entry(Element {
|
||||
l1,
|
||||
l2,
|
||||
l3,
|
||||
l4,
|
||||
variable
|
||||
})),
|
||||
]
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn proptest_serialize_and_deserialize(a in value_strategy()) {
|
||||
let data = a.to_u64();
|
||||
let b = Value::from_u64(data);
|
||||
|
||||
prop_assert_eq!(a, b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user