This commit is contained in:
nora 2023-12-30 16:03:19 +01:00
parent 9e4255f239
commit 71521a5235
3 changed files with 147 additions and 105 deletions

94
src/hashmaps/mod.rs Normal file
View file

@ -0,0 +1,94 @@
use std::hash::{BuildHasher, Hash};
pub mod simple_open_addressing;
trait HashMapFamily {
type Map<K, V, S>: HashMap<K, V, S>;
}
trait HashMap<K, V, S>: IntoIterator<Item = (K, V)> {
fn with_hasher(state: S) -> Self;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
fn get(&self, key: &K) -> Option<&V>
where
K: Eq + Hash,
S: BuildHasher;
fn insert(&mut self, key: K, value: V) -> Option<V>
where
K: Eq + Hash,
S: BuildHasher;
}
#[cfg(test)]
mod tests {
use std::hash::{BuildHasher, BuildHasherDefault, Hasher, RandomState};
use super::{HashMap, HashMapFamily};
#[derive(Default)]
struct CollidingHasher;
impl Hasher for CollidingHasher {
fn finish(&self) -> u64 {
0
}
fn write(&mut self, _bytes: &[u8]) {}
}
pub(super) fn run_tests<M>()
where
M: HashMapFamily,
{
let mk_str = || M::Map::<&str, &str, _>::with_hasher(RandomState::new());
let m = mk_str();
assert_eq!(m.get(&"uwu"), None);
assert_eq!(m.get(&"uwu"), None);
let mut m = mk_str();
m.insert("hello", "world");
assert_eq!(m.get(&"hello"), Some(&"world"));
assert_eq!(m.len(), 1);
m.insert("aaa", "yes");
assert_eq!(m.get(&"hello"), Some(&"world"));
assert_eq!(m.get(&"aaa"), Some(&"yes"));
assert_eq!(m.len(), 2);
let mut m = mk_str();
m.insert("hello", "world");
assert_eq!(m.get(&"hello"), Some(&"world"));
assert_eq!(m.len(), 1);
m.insert("hello", "no");
assert_eq!(m.get(&"hello"), Some(&"no"));
assert_eq!(m.len(), 1);
for count in [1, 10, 100, 1000, 10_000, 100_000] {
test_many::<M, _>(count, RandomState::new());
}
test_many::<M, _>(5000, BuildHasherDefault::<CollidingHasher>::default());
}
fn test_many<M: HashMapFamily, H: BuildHasher>(count: usize, h: H) {
let mut m = M::Map::with_hasher(h);
for i in 0..count {
m.insert(i, i);
}
let mut found = vec![false; count];
for (k, v) in m.into_iter() {
assert_eq!(k, v);
assert!(!found[k], "duplicate element");
found[k] = true;
}
for (i, found) in found.iter().enumerate() {
assert!(found, "element {i} was lost");
}
}
}

View file

@ -1,3 +1,4 @@
use super::{HashMap, HashMapFamily};
use std::{ use std::{
hash::{BuildHasher, Hash, RandomState}, hash::{BuildHasher, Hash, RandomState},
vec, vec,
@ -5,20 +6,44 @@ use std::{
type Entry<K, V> = Option<(K, V)>; type Entry<K, V> = Option<(K, V)>;
pub struct HashMap<K, V, S = RandomState> { pub struct SimpleOAHashMap<K, V, S = RandomState> {
buckets: Vec<Entry<K, V>>, buckets: Vec<Entry<K, V>>,
filled: usize, filled: usize,
s: S, s: S,
} }
impl<K: Eq + Hash, V> HashMap<K, V, RandomState> { impl<K: Eq + Hash, V> SimpleOAHashMap<K, V, RandomState> {
pub fn new() -> Self { pub fn new() -> Self {
Self::with_hasher(RandomState::new()) Self::with_hasher(RandomState::new())
} }
} }
impl<K: Eq + Hash, V, S: BuildHasher> HashMap<K, V, S> { impl<K: Eq + Hash, V, S: BuildHasher> SimpleOAHashMap<K, V, S> {
pub fn with_hasher(state: S) -> Self { fn bucket_of_elem(&self, key: &K) -> usize {
assert_ne!(self.buckets.len(), 0, "cannot compute bucket of empty map");
let hash = self.s.hash_one(&key) as usize;
hash % self.buckets.len()
}
fn grow(&mut self) {
let len = self.buckets.len();
let new = if len == 0 { 8 } else { len * 2 };
let old = IntoIter::new(std::mem::take(&mut self.buckets));
let new_buckets = (0..new).map(|_| None).collect();
self.buckets = new_buckets;
self.extend(old);
}
}
impl<K: Eq + Hash, V, S: BuildHasher> Extend<(K, V)> for SimpleOAHashMap<K, V, S> {
fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) {
iter.into_iter()
.for_each(|(key, value)| drop(self.insert(key, value)));
}
}
impl<K, V, S> super::HashMap<K, V, S> for SimpleOAHashMap<K, V, S> {
fn with_hasher(state: S) -> Self {
Self { Self {
buckets: Vec::new(), buckets: Vec::new(),
filled: 0, filled: 0,
@ -26,15 +51,19 @@ impl<K: Eq + Hash, V, S: BuildHasher> HashMap<K, V, S> {
} }
} }
pub fn len(&self) -> usize { fn len(&self) -> usize {
self.filled self.filled
} }
pub fn is_empty(&self) -> bool { fn is_empty(&self) -> bool {
self.buckets.len() == 0 self.len() == 0
} }
pub fn get(&self, key: &K) -> Option<&V> { fn get(&self, key: &K) -> Option<&V>
where
K: Eq + Hash,
S: BuildHasher,
{
if self.is_empty() { if self.is_empty() {
return None; return None;
} }
@ -52,7 +81,11 @@ impl<K: Eq + Hash, V, S: BuildHasher> HashMap<K, V, S> {
} }
} }
pub fn insert(&mut self, key: K, value: V) { fn insert(&mut self, key: K, value: V) -> Option<V>
where
K: Eq + Hash,
S: BuildHasher,
{
if self.filled >= self.buckets.len() { if self.filled >= self.buckets.len() {
self.grow(); self.grow();
} }
@ -65,35 +98,13 @@ impl<K: Eq + Hash, V, S: BuildHasher> HashMap<K, V, S> {
if bucket.is_none() { if bucket.is_none() {
self.filled += 1; self.filled += 1;
} }
*bucket = Some((key, value)); let before = std::mem::replace(bucket, Some((key, value)));
return; return before.map(|(_, v)| v);
} else { } else {
self.grow(); self.grow();
} }
} }
} }
fn bucket_of_elem(&self, key: &K) -> usize {
assert_ne!(self.buckets.len(), 0, "cannot compute bucket of empty map");
let hash = self.s.hash_one(&key) as usize;
hash % self.buckets.len()
}
fn grow(&mut self) {
let len = self.buckets.len();
let new = if len == 0 { 8 } else { len * 2 };
let old = IntoIter::new(std::mem::take(&mut self.buckets));
let new_buckets = (0..new).map(|_| None).collect();
self.buckets = new_buckets;
self.extend(old);
}
}
impl<K: Eq + Hash, V, S: BuildHasher> Extend<(K, V)> for HashMap<K, V, S> {
fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) {
iter.into_iter()
.for_each(|(key, value)| self.insert(key, value));
}
} }
pub struct IntoIter<K, V> { pub struct IntoIter<K, V> {
@ -116,7 +127,7 @@ impl<K, V> Iterator for IntoIter<K, V> {
} }
} }
impl<K, V, S> IntoIterator for HashMap<K, V, S> { impl<K, V, S> IntoIterator for SimpleOAHashMap<K, V, S> {
type Item = (K, V); type Item = (K, V);
type IntoIter = IntoIter<K, V>; type IntoIter = IntoIter<K, V>;
@ -126,78 +137,15 @@ impl<K, V, S> IntoIterator for HashMap<K, V, S> {
} }
} }
pub struct SimpleOAHashMapFamily;
impl HashMapFamily for SimpleOAHashMapFamily {
type Map<K, V, S> = SimpleOAHashMap<K, V, S>;
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::hash::{BuildHasher, BuildHasherDefault, Hasher, RandomState};
use super::HashMap;
#[test] #[test]
fn get_empty() { fn do_tests() {
let m = HashMap::<&str, ()>::new(); crate::hashmaps::tests::run_tests::<super::SimpleOAHashMapFamily>();
assert_eq!(m.get(&"uwu"), None);
assert_eq!(m.get(&"uwu"), None);
}
#[test]
fn insert() {
let mut m = HashMap::new();
m.insert("hello", "world");
assert_eq!(m.get(&"hello"), Some(&"world"));
assert_eq!(m.len(), 1);
m.insert("aaa", "yes");
assert_eq!(m.get(&"hello"), Some(&"world"));
assert_eq!(m.get(&"aaa"), Some(&"yes"));
assert_eq!(m.len(), 2);
}
#[test]
fn overriding() {
let mut m = HashMap::new();
m.insert("hello", "world");
assert_eq!(m.get(&"hello"), Some(&"world"));
assert_eq!(m.len(), 1);
m.insert("hello", "no");
assert_eq!(m.get(&"hello"), Some(&"no"));
assert_eq!(m.len(), 1);
}
#[derive(Default)]
struct CollidingHasher;
impl Hasher for CollidingHasher {
fn finish(&self) -> u64 {
0
}
fn write(&mut self, _bytes: &[u8]) {}
}
fn test_many<H: BuildHasher>(count: usize, h: H) {
let mut m = HashMap::with_hasher(h);
for i in 0..count {
m.insert(i, i);
}
let mut found = vec![false; count];
for (k, v) in m.into_iter() {
assert_eq!(k, v);
assert!(!found[k], "duplicate element");
found[k] = true;
}
for (i, found) in found.iter().enumerate() {
assert!(found, "element {i} was lost");
}
}
#[test]
fn many_elements() {
for count in [1, 10, 100, 1000, 10_000, 100_000] {
test_many(count, RandomState::new());
}
}
#[test]
fn many_many_collisions() {
test_many(5000, BuildHasherDefault::<CollidingHasher>::default());
} }
} }

View file

@ -5,7 +5,7 @@
#![feature(strict_provenance)] #![feature(strict_provenance)]
pub mod cfg_match; pub mod cfg_match;
pub mod hashmap; pub mod hashmaps;
pub mod innocent_linked_list; pub mod innocent_linked_list;
pub mod scratch; pub mod scratch;
pub mod sendsync; pub mod sendsync;