Better packing for u-sort

This commit is contained in:
2022-05-27 07:29:28 +02:00
parent cb74158d5d
commit e9935c8b54
7 changed files with 367 additions and 241 deletions

View File

@@ -5,6 +5,11 @@ use std::path::Path;
use parse::uca::allkeys;
use u_fst::raw::Builder;
#[path = "src/element.rs"]
mod element;
use element::{Element, Range};
fn main() {
println!("cargo:rerun-if-changed=data/allkeys.txt");
println!("cargo:rerun-if-changed=build.rs");
@@ -55,26 +60,35 @@ fn main() {
let mut builder = Builder::memory();
let mut overflow = Vec::new();
for (chars, mut elements) in entries.into_iter() {
let value = match elements.len() {
1 => {
let element = elements.pop().unwrap();
for (chars, elements) in entries.into_iter() {
let value = if elements.len() <= 2 {
let chunks = elements
.iter()
.map(|e| Element::new(e.l1, e.l2, e.l3, e.l4, e.variable))
.collect::<Vec<_>>();
((element.l4 as u64) << 42)
| ((element.l3 as u64) << 34)
| ((element.l2 as u64) << 18)
| ((element.l1 as u64) << 2)
| (if element.variable { 1 } else { 0 } << 1)
}
2.. => {
let idx = overflow.len();
let len = elements.len();
element::pack(&chunks)
} else {
let elements = elements
.chunks(2)
.map(|chunks| {
let chunks = chunks
.iter()
.map(|e| Element::new(e.l1, e.l2, e.l3, e.l4, e.variable))
.collect::<Vec<_>>();
overflow.extend(elements.into_iter());
element::pack(&chunks)
})
.collect::<Vec<_>>();
((idx as u64) << 9) | ((len as u64) << 1) | 1
}
_ => panic!("this shouldn't happen!"),
let start = overflow.len() as u32;
let len = elements.len() as u8;
overflow.extend(elements.into_iter());
let data = Range::new(start, len).as_u64();
data << 1
};
builder.insert(chars, value).unwrap();
@@ -87,15 +101,12 @@ fn main() {
let mut explicit_weights = String::new();
explicit_weights.push_str(&format!(
"pub const EXPLICIT_WEIGHTS: [(u16, u16, u8, u16, bool); {}] = [\n",
"pub const EXPLICIT_WEIGHTS: [u64; {}] = [\n",
overflow.len(),
));
for element in overflow {
explicit_weights.push_str(&format!(
" ({}, {}, {}, {}, {}),\n",
element.l1, element.l2, element.l3, element.l4, element.variable,
));
for elements in overflow {
explicit_weights.push_str(&format!(" {},\n", elements));
}
explicit_weights.push_str("];\n");