From 54d93d62499645eeb4d3056dde636af4c72ea7e2 Mon Sep 17 00:00:00 2001 From: logaritmisk Date: Sat, 6 Jan 2018 16:45:25 +0100 Subject: [PATCH] Implement more methods and added tests from std::collections::HashSet. Fixed len for serde implementation. --- Cargo.toml | 2 +- src/lib.rs | 593 ++++++++++++++++++++++++++++++++--------------------- 2 files changed, 355 insertions(+), 240 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ba7b59c..658ab28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,9 +4,9 @@ version = "0.1.0" authors = ["logaritmisk "] [dependencies] -unreachable = "1.0" serde = "1.0" serde_test = "1.0" +unreachable = "1.0" [profile.release] lto = true diff --git a/src/lib.rs b/src/lib.rs index 0b6cb8e..1899977 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,8 +8,9 @@ use std::iter::{FromIterator, IntoIterator, Iterator}; use std::default::Default; use std::collections::HashMap; -use serde::ser::{self, Serialize}; -use serde::de::{self, Deserialize, SeqAccess, Visitor}; +use serde::{Serialize, Serializer, Deserialize, Deserializer}; +use serde::ser::SerializeSeq; +use serde::de::{SeqAccess, Visitor}; use serde_test::{assert_tokens, Token}; mod hasher; @@ -24,11 +25,11 @@ type Storage = HashMap; pub type BitHashMap = HashMap; #[inline] -fn block_bit(x: u64, d: u64) -> (u64, u64) { +fn block_bit(x: &u64, d: &u64) -> (u64, u64) { (x / d, x % d) } -#[derive(PartialEq, Debug)] +#[derive(PartialEq)] pub struct BitSet { blocks: Storage, nbits: usize, @@ -46,22 +47,38 @@ impl BitSet { #[inline] pub fn with_capacity(capacity: usize) -> BitSet { BitSet { - blocks: Storage::with_capacity_and_hasher(capacity, Default::default()), + blocks: Storage::with_capacity_and_hasher(capacity / BITS as usize, Default::default()), nbits: 0, } } #[inline] pub fn capacity(&self) -> usize { - self.blocks.len() * BITS as usize + self.blocks.capacity() * BITS as usize + } + + pub fn reserve(&mut self, additional: usize) { + self.blocks.reserve(additional / BITS as usize) + } + + pub fn shrink_to_fit(&mut self) { + self.blocks.retain(|_, block| *block != 0); + self.blocks.shrink_to_fit() + } + + pub fn iter(&self) -> Iter { + Iter { + iter: self.blocks.iter(), + block: 0, + bits: 0, + bit: BITS, + } } - #[inline] pub fn len(&self) -> usize { self.nbits } - #[inline] pub fn is_empty(&self) -> bool { self.nbits == 0 } @@ -71,43 +88,8 @@ impl BitSet { self.nbits = 0; } - #[inline] - pub fn insert(&mut self, value: u64) -> bool { - let (block, bit) = block_bit(value, BITS); - let block = self.blocks.entry(block).or_insert(0); - - let n = 1 << bit; - - if (*block & n) == 0 { - *block |= n; - self.nbits += 1; - - true - } else { - false - } - } - - #[inline] - pub fn remove(&mut self, value: u64) -> bool { - let (block, bit) = block_bit(value, BITS); - let block = self.blocks.entry(block).or_insert(0); - - let n = 1 << bit; - - if (*block & n) != 0 { - *block &= !n; - self.nbits -= 1; - - true - } else { - false - } - } - - #[inline] - pub fn contains(&self, value: u64) -> bool { - let (block, bit) = block_bit(value, BITS); + pub fn contains(&self, value: &u64) -> bool { + let (block, bit) = block_bit(value, &BITS); match self.blocks.get(&block) { Some(block) => (block & (1 << bit)) != 0, @@ -115,18 +97,6 @@ impl BitSet { } } - #[inline] - pub fn extend_from_bitset(&mut self, other: &Self) { - for (key, value) in &other.blocks { - *self.blocks.entry(*key).or_insert(0) |= value; - } - - self.nbits = self.blocks - .values() - .map(|block| block.count_ones() as usize) - .sum(); - } - pub fn is_subset(&self, other: &BitSet) -> bool { if self.len() > other.len() { false @@ -145,117 +115,54 @@ impl BitSet { other.is_subset(self) } - pub fn iter(&self) -> Iter { - Iter { - iter: self.blocks.iter(), - block: 0, - bits: 0, - bit: BITS, + pub fn insert(&mut self, value: u64) -> bool { + let (block, bit) = block_bit(&value, &BITS); + let block = self.blocks.entry(block).or_insert(0); + + let n = 1 << bit; + + if (*block & n) == 0 { + *block |= n; + self.nbits += 1; + + true + } else { + false } } -} -pub struct Iter<'a> { - iter: std::collections::hash_map::Iter<'a, u64, u64>, - block: u64, - bits: u64, - bit: u64, -} + pub fn remove(&mut self, value: &u64) -> bool { + let (block, bit) = block_bit(value, &BITS); + let block = self.blocks.entry(block).or_insert(0); -impl<'a> Iterator for Iter<'a> { - type Item = u64; + let n = 1 << bit; + + if (*block & n) != 0 { + *block &= !n; + self.nbits -= 1; + + true + } else { + false + } + } #[inline] - fn next(&mut self) -> Option { - loop { - if self.bits == 0 || self.bit == BITS { - match self.iter.next() { - Some((block, bits)) => { - self.block = *block; - self.bits = *bits; - self.bit = 0; - } - None => return None, - } - } - - for i in self.bit..BITS { - if self.bits & (1 << i) != 0 { - self.bit = i + 1; - - return Some((self.block * BITS) + i); - } - } - - self.bit = BITS; + pub fn extend_from_bitset(&mut self, other: &Self) { + for (key, value) in &other.blocks { + *self.blocks.entry(*key).or_insert(0) |= value; } + + self.nbits = self.blocks + .values() + .map(|block| block.count_ones() as usize) + .sum(); } } -pub struct IntoIter { - iter: std::collections::hash_map::IntoIter, - block: u64, - bits: u64, - bit: u64, -} - -impl Iterator for IntoIter { - type Item = u64; - - #[inline] - fn next(&mut self) -> Option { - loop { - if self.bits == 0 || self.bit == BITS { - match self.iter.next() { - Some((block, bits)) => { - self.block = block; - self.bits = bits; - self.bit = 0; - } - None => return None, - } - } - - for i in self.bit..BITS { - if self.bits & (1 << i) != 0 { - self.bit = i + 1; - - return Some((self.block * BITS) + i); - } - } - - self.bit = BITS; - } - } -} - -impl<'a> IntoIterator for &'a BitSet { - type Item = u64; - type IntoIter = Iter<'a>; - - fn into_iter(self) -> Iter<'a> { - self.iter() - } -} - -impl IntoIterator for BitSet { - type Item = u64; - type IntoIter = IntoIter; - - fn into_iter(self) -> IntoIter { - IntoIter { - iter: self.blocks.into_iter(), - block: 0, - bits: 0, - bit: BITS, - } - } -} - -impl Default for BitSet { - #[inline] - fn default() -> BitSet { - BitSet::new() +impl fmt::Debug for BitSet{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_set().entries(self.iter()).finish() } } @@ -286,6 +193,13 @@ impl<'a> Extend<&'a u64> for BitSet { } } +impl Default for BitSet { + #[inline] + fn default() -> BitSet { + BitSet::new() + } +} + impl ops::BitOr for BitSet { type Output = Self; @@ -322,20 +236,121 @@ impl<'a> ops::BitOr<&'a Self> for BitSet { } } +pub struct Iter<'a> { + iter: std::collections::hash_map::Iter<'a, u64, u64>, + block: u64, + bits: u64, + bit: u64, +} + +pub struct IntoIter { + iter: std::collections::hash_map::IntoIter, + block: u64, + bits: u64, + bit: u64, +} + +impl<'a> IntoIterator for &'a BitSet { + type Item = u64; + type IntoIter = Iter<'a>; + + fn into_iter(self) -> Iter<'a> { + self.iter() + } +} + +impl IntoIterator for BitSet { + type Item = u64; + type IntoIter = IntoIter; + + fn into_iter(self) -> IntoIter { + IntoIter { + iter: self.blocks.into_iter(), + block: 0, + bits: 0, + bit: BITS, + } + } +} + +impl<'a> Iterator for Iter<'a> { + type Item = u64; + + #[inline] + fn next(&mut self) -> Option { + loop { + if self.bits == 0 || self.bit == BITS { + match self.iter.next() { + Some((block, bits)) => { + self.block = *block; + self.bits = *bits; + self.bit = 0; + } + None => return None, + } + } + + for i in self.bit..BITS { + if self.bits & (1 << i) != 0 { + self.bit = i + 1; + + return Some((self.block * BITS) + i); + } + } + + self.bit = BITS; + } + } +} + +impl Iterator for IntoIter { + type Item = u64; + + #[inline] + fn next(&mut self) -> Option { + loop { + if self.bits == 0 || self.bit == BITS { + match self.iter.next() { + Some((block, bits)) => { + self.block = block; + self.bits = bits; + self.bit = 0; + } + None => return None, + } + } + + for i in self.bit..BITS { + if self.bits & (1 << i) != 0 { + self.bit = i + 1; + + return Some((self.block * BITS) + i); + } + } + + self.bit = BITS; + } + } +} + impl Serialize for BitSet { #[inline] fn serialize(&self, serializer: S) -> Result where - S: ser::Serializer, + S: Serializer, { - serializer.collect_seq(self) + let mut seq = serializer.serialize_seq(Some(self.len()))?; + for element in self { + seq.serialize_element(&element)?; + } + seq.end() } } impl<'de> Deserialize<'de> for BitSet { fn deserialize(deserializer: D) -> Result where - D: de::Deserializer<'de>, + D: Deserializer<'de>, { struct SeqVisitor; @@ -368,7 +383,7 @@ impl<'de> Deserialize<'de> for BitSet { fn deserialize_in_place(deserializer: D, place: &mut Self) -> Result<(), D::Error> where - D: de::Deserializer<'de>, + D: Deserializer<'de>, { struct SeqInPlaceVisitor<'a>(&'a mut BitSet); @@ -403,40 +418,183 @@ mod tests { use super::*; #[test] - fn insert() { + fn test_zero_capacities() { + let s = BitSet::new(); + assert_eq!(s.capacity(), 0); + + let s = BitSet::default(); + assert_eq!(s.capacity(), 0); + + let s = BitSet::with_capacity(0); + assert_eq!(s.capacity(), 0); + + let mut s = BitSet::new(); + s.insert(1); + s.insert(2); + s.remove(&1); + s.remove(&2); + s.shrink_to_fit(); + assert_eq!(s.capacity(), 0); + + let mut s = BitSet::new(); + s.reserve(0); + assert_eq!(s.capacity(), 0); + } + + #[test] + fn test_subset_and_superset() { + let mut a = BitSet::new(); + assert!(a.insert(0)); + assert!(a.insert(5)); + assert!(a.insert(11)); + assert!(a.insert(7)); + + let mut b = BitSet::new(); + assert!(b.insert(0)); + assert!(b.insert(7)); + assert!(b.insert(19)); + assert!(b.insert(250)); + assert!(b.insert(11)); + assert!(b.insert(200)); + + assert!(!a.is_subset(&b)); + assert!(!a.is_superset(&b)); + assert!(!b.is_subset(&a)); + assert!(!b.is_superset(&a)); + + assert!(b.insert(5)); + + assert!(a.is_subset(&b)); + assert!(!a.is_superset(&b)); + assert!(!b.is_subset(&a)); + assert!(b.is_superset(&a)); + } + + #[test] + fn test_iterate() { + let mut a = BitSet::new(); + for i in 0..32 { + assert!(a.insert(i)); + } + let mut observed: u32 = 0; + for k in &a { + observed |= 1 << k; + } + assert_eq!(observed, 0xFFFF_FFFF); + } + + #[test] + fn test_from_iter() { + let xs = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + + let set: BitSet = xs.iter().cloned().collect(); + + for x in &xs { + assert!(set.contains(x)); + } + } + + #[test] + fn test_move_iter() { + let hs = { + let mut hs = BitSet::new(); + + hs.insert(1); + hs.insert(2); + + hs + }; + + let v = hs.into_iter().collect::>(); + assert_eq!(v, [1, 2]); + } + + #[test] + fn test_eq() { + let mut s1 = BitSet::new(); + + s1.insert(1); + s1.insert(2); + s1.insert(3); + + let mut s2 = BitSet::new(); + + s2.insert(1); + s2.insert(2); + + assert!(s1 != s2); + + s2.insert(3); + + assert_eq!(s1, s2); + } + + #[test] + fn test_show() { + let mut set = BitSet::new(); + let empty = BitSet::new(); + + set.insert(1); + set.insert(2); + + let set_str = format!("{:?}", set); + + assert_eq!(set_str, "{1, 2}"); + assert_eq!(format!("{:?}", empty), "{}"); + } + + #[test] + fn test_extend_ref() { + let mut a = BitSet::new(); + a.insert(1); + + a.extend(&[2, 3, 4]); + + assert_eq!(a.len(), 4); + assert!(a.contains(&1)); + assert!(a.contains(&2)); + assert!(a.contains(&3)); + assert!(a.contains(&4)); + + let mut b = BitSet::new(); + b.insert(5); + b.insert(6); + + a.extend(&b); + + assert_eq!(a.len(), 6); + assert!(a.contains(&1)); + assert!(a.contains(&2)); + assert!(a.contains(&3)); + assert!(a.contains(&4)); + assert!(a.contains(&5)); + assert!(a.contains(&6)); + } + + // ------------------------- + + #[test] + fn test_insert() { let mut set = BitSet::with_capacity(10); - assert_eq!(set.contains(0), false); - assert_eq!(set.contains(10), false); + assert_eq!(set.contains(&0), false); + assert_eq!(set.contains(&10), false); set.insert(0); set.insert(10); - assert_eq!(set.contains(0), true); - assert_eq!(set.contains(10), true); + assert_eq!(set.contains(&0), true); + assert_eq!(set.contains(&10), true); - assert_eq!(set.contains(100), false); + assert_eq!(set.contains(&100), false); set.insert(100); - assert_eq!(set.contains(100), true); + assert_eq!(set.contains(&100), true); } #[test] - fn from_iter() { - let set = [1, 2, 3, 10, 100].iter().cloned().collect::(); - - assert_eq!(set.len(), 5); - - assert_eq!(set.contains(1), true); - assert_eq!(set.contains(2), true); - assert_eq!(set.contains(3), true); - assert_eq!(set.contains(10), true); - assert_eq!(set.contains(100), true); - } - - #[test] - fn bitor() { + fn test_bitor() { let set_a = [1, 2, 3].iter().cloned().collect::(); let set_b = [3, 4, 5].iter().cloned().collect::(); @@ -444,15 +602,15 @@ mod tests { assert_eq!(set.len(), 5); - assert_eq!(set.contains(1), true); - assert_eq!(set.contains(2), true); - assert_eq!(set.contains(3), true); - assert_eq!(set.contains(4), true); - assert_eq!(set.contains(5), true); + assert_eq!(set.contains(&1), true); + assert_eq!(set.contains(&2), true); + assert_eq!(set.contains(&3), true); + assert_eq!(set.contains(&4), true); + assert_eq!(set.contains(&5), true); } #[test] - fn extend_from_bitset() { + fn test_extend_from_bitset() { let mut set = [1, 2, 3].iter().cloned().collect::(); let other = [3, 4, 5].iter().cloned().collect::(); @@ -460,58 +618,15 @@ mod tests { assert_eq!(set.len(), 5); - assert_eq!(set.contains(1), true); - assert_eq!(set.contains(2), true); - assert_eq!(set.contains(3), true); - assert_eq!(set.contains(4), true); - assert_eq!(set.contains(5), true); + assert_eq!(set.contains(&1), true); + assert_eq!(set.contains(&2), true); + assert_eq!(set.contains(&3), true); + assert_eq!(set.contains(&4), true); + assert_eq!(set.contains(&5), true); } #[test] - fn is_subset() { - let sup: BitSet = [1, 2, 3].iter().cloned().collect(); - let mut set = BitSet::new(); - - assert_eq!(set.is_subset(&sup), true); - set.insert(2); - assert_eq!(set.is_subset(&sup), true); - set.insert(4); - assert_eq!(set.is_subset(&sup), false); - } - - #[test] - fn iter() { - let set = [1, 2, 3].iter().cloned().collect::(); - let mut iter = set.iter(); - - assert_eq!(Some(1), iter.next()); - assert_eq!(Some(2), iter.next()); - assert_eq!(Some(3), iter.next()); - assert_eq!(None, iter.next()); - assert_eq!(None, iter.next()); - } - - #[test] - fn into_iter() { - let set = [1, 2, 3, 65, 66, 129, 130] - .iter() - .cloned() - .collect::(); - let mut iter = set.into_iter(); - - assert_eq!(Some(1), iter.next()); - assert_eq!(Some(2), iter.next()); - assert_eq!(Some(3), iter.next()); - assert_eq!(Some(65), iter.next()); - assert_eq!(Some(66), iter.next()); - assert_eq!(Some(129), iter.next()); - assert_eq!(Some(130), iter.next()); - assert_eq!(None, iter.next()); - assert_eq!(None, iter.next()); - } - - #[test] - fn test_ser_de() { + fn test_serde_serialize() { let mut set = BitSet::new(); set.insert(20); set.insert(10); @@ -520,7 +635,7 @@ mod tests { assert_tokens( &set, &[ - Token::Seq { len: None }, + Token::Seq { len: Some(3) }, Token::U64(10), Token::U64(20), Token::U64(30),