Implement more methods and added tests from std::collections::HashSet.

Fixed len for serde implementation.
This commit is contained in:
2018-01-06 16:45:25 +01:00
parent 4bbfba26fe
commit 54d93d6249
2 changed files with 355 additions and 240 deletions

View File

@@ -4,9 +4,9 @@ version = "0.1.0"
authors = ["logaritmisk <anders.e.olsson@gmail.com>"] authors = ["logaritmisk <anders.e.olsson@gmail.com>"]
[dependencies] [dependencies]
unreachable = "1.0"
serde = "1.0" serde = "1.0"
serde_test = "1.0" serde_test = "1.0"
unreachable = "1.0"
[profile.release] [profile.release]
lto = true lto = true

View File

@@ -8,8 +8,9 @@ use std::iter::{FromIterator, IntoIterator, Iterator};
use std::default::Default; use std::default::Default;
use std::collections::HashMap; use std::collections::HashMap;
use serde::ser::{self, Serialize}; use serde::{Serialize, Serializer, Deserialize, Deserializer};
use serde::de::{self, Deserialize, SeqAccess, Visitor}; use serde::ser::SerializeSeq;
use serde::de::{SeqAccess, Visitor};
use serde_test::{assert_tokens, Token}; use serde_test::{assert_tokens, Token};
mod hasher; mod hasher;
@@ -24,11 +25,11 @@ type Storage = HashMap<u64, Block, BitBuildHasher>;
pub type BitHashMap<V> = HashMap<u64, V, BitBuildHasher>; pub type BitHashMap<V> = HashMap<u64, V, BitBuildHasher>;
#[inline] #[inline]
fn block_bit(x: u64, d: u64) -> (u64, u64) { fn block_bit(x: &u64, d: &u64) -> (u64, u64) {
(x / d, x % d) (x / d, x % d)
} }
#[derive(PartialEq, Debug)] #[derive(PartialEq)]
pub struct BitSet { pub struct BitSet {
blocks: Storage, blocks: Storage,
nbits: usize, nbits: usize,
@@ -46,22 +47,38 @@ impl BitSet {
#[inline] #[inline]
pub fn with_capacity(capacity: usize) -> BitSet { pub fn with_capacity(capacity: usize) -> BitSet {
BitSet { BitSet {
blocks: Storage::with_capacity_and_hasher(capacity, Default::default()), blocks: Storage::with_capacity_and_hasher(capacity / BITS as usize, Default::default()),
nbits: 0, nbits: 0,
} }
} }
#[inline] #[inline]
pub fn capacity(&self) -> usize { pub fn capacity(&self) -> usize {
self.blocks.len() * BITS as usize self.blocks.capacity() * BITS as usize
}
pub fn reserve(&mut self, additional: usize) {
self.blocks.reserve(additional / BITS as usize)
}
pub fn shrink_to_fit(&mut self) {
self.blocks.retain(|_, block| *block != 0);
self.blocks.shrink_to_fit()
}
pub fn iter(&self) -> Iter {
Iter {
iter: self.blocks.iter(),
block: 0,
bits: 0,
bit: BITS,
}
} }
#[inline]
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.nbits self.nbits
} }
#[inline]
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
self.nbits == 0 self.nbits == 0
} }
@@ -71,43 +88,8 @@ impl BitSet {
self.nbits = 0; self.nbits = 0;
} }
#[inline] pub fn contains(&self, value: &u64) -> bool {
pub fn insert(&mut self, value: u64) -> bool { let (block, bit) = block_bit(value, &BITS);
let (block, bit) = block_bit(value, BITS);
let block = self.blocks.entry(block).or_insert(0);
let n = 1 << bit;
if (*block & n) == 0 {
*block |= n;
self.nbits += 1;
true
} else {
false
}
}
#[inline]
pub fn remove(&mut self, value: u64) -> bool {
let (block, bit) = block_bit(value, BITS);
let block = self.blocks.entry(block).or_insert(0);
let n = 1 << bit;
if (*block & n) != 0 {
*block &= !n;
self.nbits -= 1;
true
} else {
false
}
}
#[inline]
pub fn contains(&self, value: u64) -> bool {
let (block, bit) = block_bit(value, BITS);
match self.blocks.get(&block) { match self.blocks.get(&block) {
Some(block) => (block & (1 << bit)) != 0, Some(block) => (block & (1 << bit)) != 0,
@@ -115,18 +97,6 @@ impl BitSet {
} }
} }
#[inline]
pub fn extend_from_bitset(&mut self, other: &Self) {
for (key, value) in &other.blocks {
*self.blocks.entry(*key).or_insert(0) |= value;
}
self.nbits = self.blocks
.values()
.map(|block| block.count_ones() as usize)
.sum();
}
pub fn is_subset(&self, other: &BitSet) -> bool { pub fn is_subset(&self, other: &BitSet) -> bool {
if self.len() > other.len() { if self.len() > other.len() {
false false
@@ -145,117 +115,54 @@ impl BitSet {
other.is_subset(self) other.is_subset(self)
} }
pub fn iter(&self) -> Iter { pub fn insert(&mut self, value: u64) -> bool {
Iter { let (block, bit) = block_bit(&value, &BITS);
iter: self.blocks.iter(), let block = self.blocks.entry(block).or_insert(0);
block: 0,
bits: 0, let n = 1 << bit;
bit: BITS,
if (*block & n) == 0 {
*block |= n;
self.nbits += 1;
true
} else {
false
} }
} }
pub fn remove(&mut self, value: &u64) -> bool {
let (block, bit) = block_bit(value, &BITS);
let block = self.blocks.entry(block).or_insert(0);
let n = 1 << bit;
if (*block & n) != 0 {
*block &= !n;
self.nbits -= 1;
true
} else {
false
}
}
#[inline]
pub fn extend_from_bitset(&mut self, other: &Self) {
for (key, value) in &other.blocks {
*self.blocks.entry(*key).or_insert(0) |= value;
}
self.nbits = self.blocks
.values()
.map(|block| block.count_ones() as usize)
.sum();
}
} }
pub struct Iter<'a> { impl fmt::Debug for BitSet{
iter: std::collections::hash_map::Iter<'a, u64, u64>, fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
block: u64, f.debug_set().entries(self.iter()).finish()
bits: u64,
bit: u64,
}
impl<'a> Iterator for Iter<'a> {
type Item = u64;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.bits == 0 || self.bit == BITS {
match self.iter.next() {
Some((block, bits)) => {
self.block = *block;
self.bits = *bits;
self.bit = 0;
}
None => return None,
}
}
for i in self.bit..BITS {
if self.bits & (1 << i) != 0 {
self.bit = i + 1;
return Some((self.block * BITS) + i);
}
}
self.bit = BITS;
}
}
}
pub struct IntoIter {
iter: std::collections::hash_map::IntoIter<u64, u64>,
block: u64,
bits: u64,
bit: u64,
}
impl Iterator for IntoIter {
type Item = u64;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.bits == 0 || self.bit == BITS {
match self.iter.next() {
Some((block, bits)) => {
self.block = block;
self.bits = bits;
self.bit = 0;
}
None => return None,
}
}
for i in self.bit..BITS {
if self.bits & (1 << i) != 0 {
self.bit = i + 1;
return Some((self.block * BITS) + i);
}
}
self.bit = BITS;
}
}
}
impl<'a> IntoIterator for &'a BitSet {
type Item = u64;
type IntoIter = Iter<'a>;
fn into_iter(self) -> Iter<'a> {
self.iter()
}
}
impl IntoIterator for BitSet {
type Item = u64;
type IntoIter = IntoIter;
fn into_iter(self) -> IntoIter {
IntoIter {
iter: self.blocks.into_iter(),
block: 0,
bits: 0,
bit: BITS,
}
}
}
impl Default for BitSet {
#[inline]
fn default() -> BitSet {
BitSet::new()
} }
} }
@@ -286,6 +193,13 @@ impl<'a> Extend<&'a u64> for BitSet {
} }
} }
impl Default for BitSet {
#[inline]
fn default() -> BitSet {
BitSet::new()
}
}
impl ops::BitOr for BitSet { impl ops::BitOr for BitSet {
type Output = Self; type Output = Self;
@@ -322,20 +236,121 @@ impl<'a> ops::BitOr<&'a Self> for BitSet {
} }
} }
pub struct Iter<'a> {
iter: std::collections::hash_map::Iter<'a, u64, u64>,
block: u64,
bits: u64,
bit: u64,
}
pub struct IntoIter {
iter: std::collections::hash_map::IntoIter<u64, u64>,
block: u64,
bits: u64,
bit: u64,
}
impl<'a> IntoIterator for &'a BitSet {
type Item = u64;
type IntoIter = Iter<'a>;
fn into_iter(self) -> Iter<'a> {
self.iter()
}
}
impl IntoIterator for BitSet {
type Item = u64;
type IntoIter = IntoIter;
fn into_iter(self) -> IntoIter {
IntoIter {
iter: self.blocks.into_iter(),
block: 0,
bits: 0,
bit: BITS,
}
}
}
impl<'a> Iterator for Iter<'a> {
type Item = u64;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.bits == 0 || self.bit == BITS {
match self.iter.next() {
Some((block, bits)) => {
self.block = *block;
self.bits = *bits;
self.bit = 0;
}
None => return None,
}
}
for i in self.bit..BITS {
if self.bits & (1 << i) != 0 {
self.bit = i + 1;
return Some((self.block * BITS) + i);
}
}
self.bit = BITS;
}
}
}
impl Iterator for IntoIter {
type Item = u64;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.bits == 0 || self.bit == BITS {
match self.iter.next() {
Some((block, bits)) => {
self.block = block;
self.bits = bits;
self.bit = 0;
}
None => return None,
}
}
for i in self.bit..BITS {
if self.bits & (1 << i) != 0 {
self.bit = i + 1;
return Some((self.block * BITS) + i);
}
}
self.bit = BITS;
}
}
}
impl Serialize for BitSet { impl Serialize for BitSet {
#[inline] #[inline]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where where
S: ser::Serializer, S: Serializer,
{ {
serializer.collect_seq(self) let mut seq = serializer.serialize_seq(Some(self.len()))?;
for element in self {
seq.serialize_element(&element)?;
}
seq.end()
} }
} }
impl<'de> Deserialize<'de> for BitSet { impl<'de> Deserialize<'de> for BitSet {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where where
D: de::Deserializer<'de>, D: Deserializer<'de>,
{ {
struct SeqVisitor; struct SeqVisitor;
@@ -368,7 +383,7 @@ impl<'de> Deserialize<'de> for BitSet {
fn deserialize_in_place<D>(deserializer: D, place: &mut Self) -> Result<(), D::Error> fn deserialize_in_place<D>(deserializer: D, place: &mut Self) -> Result<(), D::Error>
where where
D: de::Deserializer<'de>, D: Deserializer<'de>,
{ {
struct SeqInPlaceVisitor<'a>(&'a mut BitSet); struct SeqInPlaceVisitor<'a>(&'a mut BitSet);
@@ -403,40 +418,183 @@ mod tests {
use super::*; use super::*;
#[test] #[test]
fn insert() { fn test_zero_capacities() {
let s = BitSet::new();
assert_eq!(s.capacity(), 0);
let s = BitSet::default();
assert_eq!(s.capacity(), 0);
let s = BitSet::with_capacity(0);
assert_eq!(s.capacity(), 0);
let mut s = BitSet::new();
s.insert(1);
s.insert(2);
s.remove(&1);
s.remove(&2);
s.shrink_to_fit();
assert_eq!(s.capacity(), 0);
let mut s = BitSet::new();
s.reserve(0);
assert_eq!(s.capacity(), 0);
}
#[test]
fn test_subset_and_superset() {
let mut a = BitSet::new();
assert!(a.insert(0));
assert!(a.insert(5));
assert!(a.insert(11));
assert!(a.insert(7));
let mut b = BitSet::new();
assert!(b.insert(0));
assert!(b.insert(7));
assert!(b.insert(19));
assert!(b.insert(250));
assert!(b.insert(11));
assert!(b.insert(200));
assert!(!a.is_subset(&b));
assert!(!a.is_superset(&b));
assert!(!b.is_subset(&a));
assert!(!b.is_superset(&a));
assert!(b.insert(5));
assert!(a.is_subset(&b));
assert!(!a.is_superset(&b));
assert!(!b.is_subset(&a));
assert!(b.is_superset(&a));
}
#[test]
fn test_iterate() {
let mut a = BitSet::new();
for i in 0..32 {
assert!(a.insert(i));
}
let mut observed: u32 = 0;
for k in &a {
observed |= 1 << k;
}
assert_eq!(observed, 0xFFFF_FFFF);
}
#[test]
fn test_from_iter() {
let xs = [1, 2, 3, 4, 5, 6, 7, 8, 9];
let set: BitSet = xs.iter().cloned().collect();
for x in &xs {
assert!(set.contains(x));
}
}
#[test]
fn test_move_iter() {
let hs = {
let mut hs = BitSet::new();
hs.insert(1);
hs.insert(2);
hs
};
let v = hs.into_iter().collect::<Vec<u64>>();
assert_eq!(v, [1, 2]);
}
#[test]
fn test_eq() {
let mut s1 = BitSet::new();
s1.insert(1);
s1.insert(2);
s1.insert(3);
let mut s2 = BitSet::new();
s2.insert(1);
s2.insert(2);
assert!(s1 != s2);
s2.insert(3);
assert_eq!(s1, s2);
}
#[test]
fn test_show() {
let mut set = BitSet::new();
let empty = BitSet::new();
set.insert(1);
set.insert(2);
let set_str = format!("{:?}", set);
assert_eq!(set_str, "{1, 2}");
assert_eq!(format!("{:?}", empty), "{}");
}
#[test]
fn test_extend_ref() {
let mut a = BitSet::new();
a.insert(1);
a.extend(&[2, 3, 4]);
assert_eq!(a.len(), 4);
assert!(a.contains(&1));
assert!(a.contains(&2));
assert!(a.contains(&3));
assert!(a.contains(&4));
let mut b = BitSet::new();
b.insert(5);
b.insert(6);
a.extend(&b);
assert_eq!(a.len(), 6);
assert!(a.contains(&1));
assert!(a.contains(&2));
assert!(a.contains(&3));
assert!(a.contains(&4));
assert!(a.contains(&5));
assert!(a.contains(&6));
}
// -------------------------
#[test]
fn test_insert() {
let mut set = BitSet::with_capacity(10); let mut set = BitSet::with_capacity(10);
assert_eq!(set.contains(0), false); assert_eq!(set.contains(&0), false);
assert_eq!(set.contains(10), false); assert_eq!(set.contains(&10), false);
set.insert(0); set.insert(0);
set.insert(10); set.insert(10);
assert_eq!(set.contains(0), true); assert_eq!(set.contains(&0), true);
assert_eq!(set.contains(10), true); assert_eq!(set.contains(&10), true);
assert_eq!(set.contains(100), false); assert_eq!(set.contains(&100), false);
set.insert(100); set.insert(100);
assert_eq!(set.contains(100), true); assert_eq!(set.contains(&100), true);
} }
#[test] #[test]
fn from_iter() { fn test_bitor() {
let set = [1, 2, 3, 10, 100].iter().cloned().collect::<BitSet>();
assert_eq!(set.len(), 5);
assert_eq!(set.contains(1), true);
assert_eq!(set.contains(2), true);
assert_eq!(set.contains(3), true);
assert_eq!(set.contains(10), true);
assert_eq!(set.contains(100), true);
}
#[test]
fn bitor() {
let set_a = [1, 2, 3].iter().cloned().collect::<BitSet>(); let set_a = [1, 2, 3].iter().cloned().collect::<BitSet>();
let set_b = [3, 4, 5].iter().cloned().collect::<BitSet>(); let set_b = [3, 4, 5].iter().cloned().collect::<BitSet>();
@@ -444,15 +602,15 @@ mod tests {
assert_eq!(set.len(), 5); assert_eq!(set.len(), 5);
assert_eq!(set.contains(1), true); assert_eq!(set.contains(&1), true);
assert_eq!(set.contains(2), true); assert_eq!(set.contains(&2), true);
assert_eq!(set.contains(3), true); assert_eq!(set.contains(&3), true);
assert_eq!(set.contains(4), true); assert_eq!(set.contains(&4), true);
assert_eq!(set.contains(5), true); assert_eq!(set.contains(&5), true);
} }
#[test] #[test]
fn extend_from_bitset() { fn test_extend_from_bitset() {
let mut set = [1, 2, 3].iter().cloned().collect::<BitSet>(); let mut set = [1, 2, 3].iter().cloned().collect::<BitSet>();
let other = [3, 4, 5].iter().cloned().collect::<BitSet>(); let other = [3, 4, 5].iter().cloned().collect::<BitSet>();
@@ -460,58 +618,15 @@ mod tests {
assert_eq!(set.len(), 5); assert_eq!(set.len(), 5);
assert_eq!(set.contains(1), true); assert_eq!(set.contains(&1), true);
assert_eq!(set.contains(2), true); assert_eq!(set.contains(&2), true);
assert_eq!(set.contains(3), true); assert_eq!(set.contains(&3), true);
assert_eq!(set.contains(4), true); assert_eq!(set.contains(&4), true);
assert_eq!(set.contains(5), true); assert_eq!(set.contains(&5), true);
} }
#[test] #[test]
fn is_subset() { fn test_serde_serialize() {
let sup: BitSet = [1, 2, 3].iter().cloned().collect();
let mut set = BitSet::new();
assert_eq!(set.is_subset(&sup), true);
set.insert(2);
assert_eq!(set.is_subset(&sup), true);
set.insert(4);
assert_eq!(set.is_subset(&sup), false);
}
#[test]
fn iter() {
let set = [1, 2, 3].iter().cloned().collect::<BitSet>();
let mut iter = set.iter();
assert_eq!(Some(1), iter.next());
assert_eq!(Some(2), iter.next());
assert_eq!(Some(3), iter.next());
assert_eq!(None, iter.next());
assert_eq!(None, iter.next());
}
#[test]
fn into_iter() {
let set = [1, 2, 3, 65, 66, 129, 130]
.iter()
.cloned()
.collect::<BitSet>();
let mut iter = set.into_iter();
assert_eq!(Some(1), iter.next());
assert_eq!(Some(2), iter.next());
assert_eq!(Some(3), iter.next());
assert_eq!(Some(65), iter.next());
assert_eq!(Some(66), iter.next());
assert_eq!(Some(129), iter.next());
assert_eq!(Some(130), iter.next());
assert_eq!(None, iter.next());
assert_eq!(None, iter.next());
}
#[test]
fn test_ser_de() {
let mut set = BitSet::new(); let mut set = BitSet::new();
set.insert(20); set.insert(20);
set.insert(10); set.insert(10);
@@ -520,7 +635,7 @@ mod tests {
assert_tokens( assert_tokens(
&set, &set,
&[ &[
Token::Seq { len: None }, Token::Seq { len: Some(3) },
Token::U64(10), Token::U64(10),
Token::U64(20), Token::U64(20),
Token::U64(30), Token::U64(30),