diff --git a/src/hashmaps/mod.rs b/src/hashmaps/mod.rs new file mode 100644 index 0000000..6266272 --- /dev/null +++ b/src/hashmaps/mod.rs @@ -0,0 +1,94 @@ +use std::hash::{BuildHasher, Hash}; + +pub mod simple_open_addressing; + +trait HashMapFamily { + type Map: HashMap; +} + +trait HashMap: IntoIterator { + fn with_hasher(state: S) -> Self; + + fn len(&self) -> usize; + + fn is_empty(&self) -> bool { + self.len() == 0 + } + + fn get(&self, key: &K) -> Option<&V> + where + K: Eq + Hash, + S: BuildHasher; + + fn insert(&mut self, key: K, value: V) -> Option + where + K: Eq + Hash, + S: BuildHasher; +} + +#[cfg(test)] +mod tests { + use std::hash::{BuildHasher, BuildHasherDefault, Hasher, RandomState}; + + use super::{HashMap, HashMapFamily}; + + #[derive(Default)] + struct CollidingHasher; + impl Hasher for CollidingHasher { + fn finish(&self) -> u64 { + 0 + } + fn write(&mut self, _bytes: &[u8]) {} + } + + pub(super) fn run_tests() + where + M: HashMapFamily, + { + let mk_str = || M::Map::<&str, &str, _>::with_hasher(RandomState::new()); + + let m = mk_str(); + assert_eq!(m.get(&"uwu"), None); + assert_eq!(m.get(&"uwu"), None); + + let mut m = mk_str(); + m.insert("hello", "world"); + assert_eq!(m.get(&"hello"), Some(&"world")); + assert_eq!(m.len(), 1); + m.insert("aaa", "yes"); + assert_eq!(m.get(&"hello"), Some(&"world")); + assert_eq!(m.get(&"aaa"), Some(&"yes")); + assert_eq!(m.len(), 2); + + let mut m = mk_str(); + m.insert("hello", "world"); + assert_eq!(m.get(&"hello"), Some(&"world")); + assert_eq!(m.len(), 1); + m.insert("hello", "no"); + assert_eq!(m.get(&"hello"), Some(&"no")); + assert_eq!(m.len(), 1); + + for count in [1, 10, 100, 1000, 10_000, 100_000] { + test_many::(count, RandomState::new()); + } + test_many::(5000, BuildHasherDefault::::default()); + } + + fn test_many(count: usize, h: H) { + let mut m = M::Map::with_hasher(h); + + for i in 0..count { + m.insert(i, i); + } + + let mut found = vec![false; count]; + for (k, v) in m.into_iter() { + assert_eq!(k, v); + assert!(!found[k], "duplicate element"); + found[k] = true; + } + for (i, found) in found.iter().enumerate() { + assert!(found, "element {i} was lost"); + } + } +} diff --git a/src/hashmap.rs b/src/hashmaps/simple_open_addressing.rs similarity index 51% rename from src/hashmap.rs rename to src/hashmaps/simple_open_addressing.rs index 098d528..a23a8d7 100644 --- a/src/hashmap.rs +++ b/src/hashmaps/simple_open_addressing.rs @@ -1,3 +1,4 @@ +use super::{HashMap, HashMapFamily}; use std::{ hash::{BuildHasher, Hash, RandomState}, vec, @@ -5,20 +6,44 @@ use std::{ type Entry = Option<(K, V)>; -pub struct HashMap { +pub struct SimpleOAHashMap { buckets: Vec>, filled: usize, s: S, } -impl HashMap { +impl SimpleOAHashMap { pub fn new() -> Self { Self::with_hasher(RandomState::new()) } } -impl HashMap { - pub fn with_hasher(state: S) -> Self { +impl SimpleOAHashMap { + fn bucket_of_elem(&self, key: &K) -> usize { + assert_ne!(self.buckets.len(), 0, "cannot compute bucket of empty map"); + let hash = self.s.hash_one(&key) as usize; + hash % self.buckets.len() + } + + fn grow(&mut self) { + let len = self.buckets.len(); + let new = if len == 0 { 8 } else { len * 2 }; + let old = IntoIter::new(std::mem::take(&mut self.buckets)); + let new_buckets = (0..new).map(|_| None).collect(); + self.buckets = new_buckets; + self.extend(old); + } +} + +impl Extend<(K, V)> for SimpleOAHashMap { + fn extend>(&mut self, iter: T) { + iter.into_iter() + .for_each(|(key, value)| drop(self.insert(key, value))); + } +} + +impl super::HashMap for SimpleOAHashMap { + fn with_hasher(state: S) -> Self { Self { buckets: Vec::new(), filled: 0, @@ -26,15 +51,19 @@ impl HashMap { } } - pub fn len(&self) -> usize { + fn len(&self) -> usize { self.filled } - pub fn is_empty(&self) -> bool { - self.buckets.len() == 0 + fn is_empty(&self) -> bool { + self.len() == 0 } - pub fn get(&self, key: &K) -> Option<&V> { + fn get(&self, key: &K) -> Option<&V> + where + K: Eq + Hash, + S: BuildHasher, + { if self.is_empty() { return None; } @@ -52,7 +81,11 @@ impl HashMap { } } - pub fn insert(&mut self, key: K, value: V) { + fn insert(&mut self, key: K, value: V) -> Option + where + K: Eq + Hash, + S: BuildHasher, + { if self.filled >= self.buckets.len() { self.grow(); } @@ -65,35 +98,13 @@ impl HashMap { if bucket.is_none() { self.filled += 1; } - *bucket = Some((key, value)); - return; + let before = std::mem::replace(bucket, Some((key, value))); + return before.map(|(_, v)| v); } else { self.grow(); } } } - - fn bucket_of_elem(&self, key: &K) -> usize { - assert_ne!(self.buckets.len(), 0, "cannot compute bucket of empty map"); - let hash = self.s.hash_one(&key) as usize; - hash % self.buckets.len() - } - - fn grow(&mut self) { - let len = self.buckets.len(); - let new = if len == 0 { 8 } else { len * 2 }; - let old = IntoIter::new(std::mem::take(&mut self.buckets)); - let new_buckets = (0..new).map(|_| None).collect(); - self.buckets = new_buckets; - self.extend(old); - } -} - -impl Extend<(K, V)> for HashMap { - fn extend>(&mut self, iter: T) { - iter.into_iter() - .for_each(|(key, value)| self.insert(key, value)); - } } pub struct IntoIter { @@ -116,7 +127,7 @@ impl Iterator for IntoIter { } } -impl IntoIterator for HashMap { +impl IntoIterator for SimpleOAHashMap { type Item = (K, V); type IntoIter = IntoIter; @@ -126,78 +137,15 @@ impl IntoIterator for HashMap { } } +pub struct SimpleOAHashMapFamily; +impl HashMapFamily for SimpleOAHashMapFamily { + type Map = SimpleOAHashMap; +} + #[cfg(test)] mod tests { - use std::hash::{BuildHasher, BuildHasherDefault, Hasher, RandomState}; - - use super::HashMap; - #[test] - fn get_empty() { - let m = HashMap::<&str, ()>::new(); - assert_eq!(m.get(&"uwu"), None); - assert_eq!(m.get(&"uwu"), None); - } - - #[test] - fn insert() { - let mut m = HashMap::new(); - m.insert("hello", "world"); - assert_eq!(m.get(&"hello"), Some(&"world")); - assert_eq!(m.len(), 1); - m.insert("aaa", "yes"); - assert_eq!(m.get(&"hello"), Some(&"world")); - assert_eq!(m.get(&"aaa"), Some(&"yes")); - assert_eq!(m.len(), 2); - } - - #[test] - fn overriding() { - let mut m = HashMap::new(); - m.insert("hello", "world"); - assert_eq!(m.get(&"hello"), Some(&"world")); - assert_eq!(m.len(), 1); - m.insert("hello", "no"); - assert_eq!(m.get(&"hello"), Some(&"no")); - assert_eq!(m.len(), 1); - } - - #[derive(Default)] - struct CollidingHasher; - impl Hasher for CollidingHasher { - fn finish(&self) -> u64 { - 0 - } - fn write(&mut self, _bytes: &[u8]) {} - } - - fn test_many(count: usize, h: H) { - let mut m = HashMap::with_hasher(h); - - for i in 0..count { - m.insert(i, i); - } - - let mut found = vec![false; count]; - for (k, v) in m.into_iter() { - assert_eq!(k, v); - assert!(!found[k], "duplicate element"); - found[k] = true; - } - for (i, found) in found.iter().enumerate() { - assert!(found, "element {i} was lost"); - } - } - - #[test] - fn many_elements() { - for count in [1, 10, 100, 1000, 10_000, 100_000] { - test_many(count, RandomState::new()); - } - } - - #[test] - fn many_many_collisions() { - test_many(5000, BuildHasherDefault::::default()); + fn do_tests() { + crate::hashmaps::tests::run_tests::(); } } diff --git a/src/lib.rs b/src/lib.rs index 4a66f44..53bd25b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,7 @@ #![feature(strict_provenance)] pub mod cfg_match; -pub mod hashmap; +pub mod hashmaps; pub mod innocent_linked_list; pub mod scratch; pub mod sendsync;