diff --git a/Cargo.toml b/Cargo.toml index 9b393a2..2359d30 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,10 @@ [package] -name = "bit_set" +name = "bit-set" version = "0.1.0" authors = ["logaritmisk "] [dependencies] +fnv = "1.0" + +[profile.release] +lto = true diff --git a/src/lib.rs b/src/lib.rs index 331680f..92099bd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,48 +1,169 @@ +extern crate fnv; + +use std::ops; +use std::iter::FromIterator; + +use fnv::FnvHashMap; + +const BITS: usize = 64; + +type Block = u64; +type Storage = FnvHashMap; + +#[inline] +fn block_bit(x: usize, d: usize) -> (usize, usize) { + (x / d, x % d) +} + +#[derive(Debug)] pub struct BitSet { - data: Box<[u64]>, + blocks: Storage, + nbits: usize, } impl BitSet { - pub fn with_capacity(max: usize) -> BitSet { + pub fn new() -> BitSet { BitSet { - data: vec![0; max / 64 + if max % 64 == 0 { 0 } else { 1 }].into_boxed_slice() + blocks: Storage::default(), + nbits: 0, } } - pub fn capacity(&self) -> u64 { - self.data.len() as u64 * 64 + pub fn with_capacity(nbits: usize) -> BitSet { + let (mut block, bit) = block_bit(nbits, BITS); + block += (bit > 0) as usize; + + BitSet { + blocks: Storage::with_capacity_and_hasher(block, Default::default()), + nbits: 0, + } } - pub fn insert(&mut self, value: usize) { - let block = value / 64 as usize; - let bit = 2u64.pow((value - (block * 64)) as u32); - - self.data[block] |= bit; + pub fn capacity(&self) -> usize { + self.blocks.len() * BITS } - pub fn remove(&mut self, value: usize) { - let block = value / 64 as usize; - let bit = 2u64.pow((value - (block * 64)) as u32); + pub fn len(&self) -> usize { + self.nbits + } - self.data[block] &= !bit; + pub fn is_empty(&self) -> bool { + self.nbits == 0 + } + + pub fn insert(&mut self, value: usize) -> bool { + let (block, bit) = block_bit(value, BITS); + let block = self.blocks.entry(block).or_insert(0); + + if (*block & (1 << bit)) == 0 { + *block |= 1 << bit; + self.nbits += 1; + + true + } else { + false + } + } + + pub fn remove(&mut self, value: usize) -> bool { + let (block, bit) = block_bit(value, BITS); + let block = self.blocks.entry(block).or_insert(0); + + if (*block & (1 << bit)) != 0 { + *block &= !(1 << bit); + self.nbits -= 1; + + true + } else { + false + } } pub fn contains(&self, value: usize) -> bool { - let block = value / 64 as usize; - let bit = 2u64.pow((value - (block * 64)) as u32); + let (block, bit) = block_bit(value, BITS); - self.data[block] & bit == bit + match self.blocks.get(&block) { + None => false, + Some(block) => (block & (1 << bit)) != 0, + } } } +impl Default for BitSet { + #[inline] + fn default() -> BitSet { + BitSet::new() + } +} + +impl FromIterator for BitSet { + fn from_iter>(iter: I) -> BitSet { + let mut set = BitSet::new(); + set.extend(iter); + set + } +} + +impl Extend for BitSet { + #[inline] + fn extend>(&mut self, iter: I) { + for i in iter { + self.insert(i); + } + } +} + +impl ops::BitOr for BitSet { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self { + let mut blocks = self.blocks.clone(); + + for (key, value) in &rhs.blocks { + *blocks.entry(*key).or_insert(0) |= value; + } + + let nbits = blocks + .values() + .map(|block| block.count_ones() as usize) + .sum(); + + BitSet { + blocks: blocks, + nbits: nbits, + } + } +} + +impl<'a> ops::BitOr<&'a Self> for BitSet { + type Output = Self; + + fn bitor(self, rhs: &'a Self) -> Self { + let mut blocks = self.blocks.clone(); + + for (key, value) in &rhs.blocks { + *blocks.entry(*key).or_insert(0) |= value; + } + + let nbits = blocks + .values() + .map(|block| block.count_ones() as usize) + .sum(); + + BitSet { + blocks: blocks, + nbits: nbits, + } + } +} #[cfg(test)] mod tests { use super::*; #[test] - fn test() { - let mut set = BitSet::with_capacity(100); + fn insert() { + let mut set = BitSet::with_capacity(10); assert_eq!(set.contains(0), false); assert_eq!(set.contains(10), false); @@ -52,5 +173,40 @@ mod tests { assert_eq!(set.contains(0), true); assert_eq!(set.contains(10), true); + + assert_eq!(set.contains(100), false); + + set.insert(100); + + assert_eq!(set.contains(100), true); + } + + #[test] + fn from_iter() { + let set = [1, 2, 3, 10, 100].iter().cloned().collect::(); + + assert_eq!(set.len(), 5); + + assert_eq!(set.contains(1), true); + assert_eq!(set.contains(2), true); + assert_eq!(set.contains(3), true); + assert_eq!(set.contains(10), true); + assert_eq!(set.contains(100), true); + } + + #[test] + fn bitor() { + let set_a = [1, 2, 3].iter().cloned().collect::(); + let set_b = [3, 4, 5].iter().cloned().collect::(); + + let set = set_a | set_b; + + assert_eq!(set.len(), 5); + + assert_eq!(set.contains(1), true); + assert_eq!(set.contains(2), true); + assert_eq!(set.contains(3), true); + assert_eq!(set.contains(4), true); + assert_eq!(set.contains(5), true); } }