Better packing for u-sort
This commit is contained in:
@@ -5,6 +5,11 @@ use std::path::Path;
|
||||
use parse::uca::allkeys;
|
||||
use u_fst::raw::Builder;
|
||||
|
||||
#[path = "src/element.rs"]
|
||||
mod element;
|
||||
|
||||
use element::{Element, Range};
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-changed=data/allkeys.txt");
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
@@ -55,26 +60,35 @@ fn main() {
|
||||
let mut builder = Builder::memory();
|
||||
let mut overflow = Vec::new();
|
||||
|
||||
for (chars, mut elements) in entries.into_iter() {
|
||||
let value = match elements.len() {
|
||||
1 => {
|
||||
let element = elements.pop().unwrap();
|
||||
for (chars, elements) in entries.into_iter() {
|
||||
let value = if elements.len() <= 2 {
|
||||
let chunks = elements
|
||||
.iter()
|
||||
.map(|e| Element::new(e.l1, e.l2, e.l3, e.l4, e.variable))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
((element.l4 as u64) << 42)
|
||||
| ((element.l3 as u64) << 34)
|
||||
| ((element.l2 as u64) << 18)
|
||||
| ((element.l1 as u64) << 2)
|
||||
| (if element.variable { 1 } else { 0 } << 1)
|
||||
}
|
||||
2.. => {
|
||||
let idx = overflow.len();
|
||||
let len = elements.len();
|
||||
element::pack(&chunks)
|
||||
} else {
|
||||
let elements = elements
|
||||
.chunks(2)
|
||||
.map(|chunks| {
|
||||
let chunks = chunks
|
||||
.iter()
|
||||
.map(|e| Element::new(e.l1, e.l2, e.l3, e.l4, e.variable))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
overflow.extend(elements.into_iter());
|
||||
element::pack(&chunks)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
((idx as u64) << 9) | ((len as u64) << 1) | 1
|
||||
}
|
||||
_ => panic!("this shouldn't happen!"),
|
||||
let start = overflow.len() as u32;
|
||||
let len = elements.len() as u8;
|
||||
|
||||
overflow.extend(elements.into_iter());
|
||||
|
||||
let data = Range::new(start, len).as_u64();
|
||||
|
||||
data << 1
|
||||
};
|
||||
|
||||
builder.insert(chars, value).unwrap();
|
||||
@@ -87,15 +101,12 @@ fn main() {
|
||||
let mut explicit_weights = String::new();
|
||||
|
||||
explicit_weights.push_str(&format!(
|
||||
"pub const EXPLICIT_WEIGHTS: [(u16, u16, u8, u16, bool); {}] = [\n",
|
||||
"pub const EXPLICIT_WEIGHTS: [u64; {}] = [\n",
|
||||
overflow.len(),
|
||||
));
|
||||
|
||||
for element in overflow {
|
||||
explicit_weights.push_str(&format!(
|
||||
" ({}, {}, {}, {}, {}),\n",
|
||||
element.l1, element.l2, element.l3, element.l4, element.variable,
|
||||
));
|
||||
for elements in overflow {
|
||||
explicit_weights.push_str(&format!(" {},\n", elements));
|
||||
}
|
||||
|
||||
explicit_weights.push_str("];\n");
|
||||
|
||||
Reference in New Issue
Block a user