This commit is contained in:
nora 2023-12-30 16:03:19 +01:00
parent 9e4255f239
commit 71521a5235
3 changed files with 147 additions and 105 deletions

94
src/hashmaps/mod.rs Normal file
View file

@ -0,0 +1,94 @@
use std::hash::{BuildHasher, Hash};
pub mod simple_open_addressing;
trait HashMapFamily {
type Map<K, V, S>: HashMap<K, V, S>;
}
trait HashMap<K, V, S>: IntoIterator<Item = (K, V)> {
fn with_hasher(state: S) -> Self;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
fn get(&self, key: &K) -> Option<&V>
where
K: Eq + Hash,
S: BuildHasher;
fn insert(&mut self, key: K, value: V) -> Option<V>
where
K: Eq + Hash,
S: BuildHasher;
}
#[cfg(test)]
mod tests {
use std::hash::{BuildHasher, BuildHasherDefault, Hasher, RandomState};
use super::{HashMap, HashMapFamily};
#[derive(Default)]
struct CollidingHasher;
impl Hasher for CollidingHasher {
fn finish(&self) -> u64 {
0
}
fn write(&mut self, _bytes: &[u8]) {}
}
pub(super) fn run_tests<M>()
where
M: HashMapFamily,
{
let mk_str = || M::Map::<&str, &str, _>::with_hasher(RandomState::new());
let m = mk_str();
assert_eq!(m.get(&"uwu"), None);
assert_eq!(m.get(&"uwu"), None);
let mut m = mk_str();
m.insert("hello", "world");
assert_eq!(m.get(&"hello"), Some(&"world"));
assert_eq!(m.len(), 1);
m.insert("aaa", "yes");
assert_eq!(m.get(&"hello"), Some(&"world"));
assert_eq!(m.get(&"aaa"), Some(&"yes"));
assert_eq!(m.len(), 2);
let mut m = mk_str();
m.insert("hello", "world");
assert_eq!(m.get(&"hello"), Some(&"world"));
assert_eq!(m.len(), 1);
m.insert("hello", "no");
assert_eq!(m.get(&"hello"), Some(&"no"));
assert_eq!(m.len(), 1);
for count in [1, 10, 100, 1000, 10_000, 100_000] {
test_many::<M, _>(count, RandomState::new());
}
test_many::<M, _>(5000, BuildHasherDefault::<CollidingHasher>::default());
}
fn test_many<M: HashMapFamily, H: BuildHasher>(count: usize, h: H) {
let mut m = M::Map::with_hasher(h);
for i in 0..count {
m.insert(i, i);
}
let mut found = vec![false; count];
for (k, v) in m.into_iter() {
assert_eq!(k, v);
assert!(!found[k], "duplicate element");
found[k] = true;
}
for (i, found) in found.iter().enumerate() {
assert!(found, "element {i} was lost");
}
}
}

View file

@ -1,3 +1,4 @@
use super::{HashMap, HashMapFamily};
use std::{
hash::{BuildHasher, Hash, RandomState},
vec,
@ -5,20 +6,44 @@ use std::{
type Entry<K, V> = Option<(K, V)>;
pub struct HashMap<K, V, S = RandomState> {
pub struct SimpleOAHashMap<K, V, S = RandomState> {
buckets: Vec<Entry<K, V>>,
filled: usize,
s: S,
}
impl<K: Eq + Hash, V> HashMap<K, V, RandomState> {
impl<K: Eq + Hash, V> SimpleOAHashMap<K, V, RandomState> {
pub fn new() -> Self {
Self::with_hasher(RandomState::new())
}
}
impl<K: Eq + Hash, V, S: BuildHasher> HashMap<K, V, S> {
pub fn with_hasher(state: S) -> Self {
impl<K: Eq + Hash, V, S: BuildHasher> SimpleOAHashMap<K, V, S> {
fn bucket_of_elem(&self, key: &K) -> usize {
assert_ne!(self.buckets.len(), 0, "cannot compute bucket of empty map");
let hash = self.s.hash_one(&key) as usize;
hash % self.buckets.len()
}
fn grow(&mut self) {
let len = self.buckets.len();
let new = if len == 0 { 8 } else { len * 2 };
let old = IntoIter::new(std::mem::take(&mut self.buckets));
let new_buckets = (0..new).map(|_| None).collect();
self.buckets = new_buckets;
self.extend(old);
}
}
impl<K: Eq + Hash, V, S: BuildHasher> Extend<(K, V)> for SimpleOAHashMap<K, V, S> {
fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) {
iter.into_iter()
.for_each(|(key, value)| drop(self.insert(key, value)));
}
}
impl<K, V, S> super::HashMap<K, V, S> for SimpleOAHashMap<K, V, S> {
fn with_hasher(state: S) -> Self {
Self {
buckets: Vec::new(),
filled: 0,
@ -26,15 +51,19 @@ impl<K: Eq + Hash, V, S: BuildHasher> HashMap<K, V, S> {
}
}
pub fn len(&self) -> usize {
fn len(&self) -> usize {
self.filled
}
pub fn is_empty(&self) -> bool {
self.buckets.len() == 0
fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn get(&self, key: &K) -> Option<&V> {
fn get(&self, key: &K) -> Option<&V>
where
K: Eq + Hash,
S: BuildHasher,
{
if self.is_empty() {
return None;
}
@ -52,7 +81,11 @@ impl<K: Eq + Hash, V, S: BuildHasher> HashMap<K, V, S> {
}
}
pub fn insert(&mut self, key: K, value: V) {
fn insert(&mut self, key: K, value: V) -> Option<V>
where
K: Eq + Hash,
S: BuildHasher,
{
if self.filled >= self.buckets.len() {
self.grow();
}
@ -65,35 +98,13 @@ impl<K: Eq + Hash, V, S: BuildHasher> HashMap<K, V, S> {
if bucket.is_none() {
self.filled += 1;
}
*bucket = Some((key, value));
return;
let before = std::mem::replace(bucket, Some((key, value)));
return before.map(|(_, v)| v);
} else {
self.grow();
}
}
}
fn bucket_of_elem(&self, key: &K) -> usize {
assert_ne!(self.buckets.len(), 0, "cannot compute bucket of empty map");
let hash = self.s.hash_one(&key) as usize;
hash % self.buckets.len()
}
fn grow(&mut self) {
let len = self.buckets.len();
let new = if len == 0 { 8 } else { len * 2 };
let old = IntoIter::new(std::mem::take(&mut self.buckets));
let new_buckets = (0..new).map(|_| None).collect();
self.buckets = new_buckets;
self.extend(old);
}
}
impl<K: Eq + Hash, V, S: BuildHasher> Extend<(K, V)> for HashMap<K, V, S> {
fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) {
iter.into_iter()
.for_each(|(key, value)| self.insert(key, value));
}
}
pub struct IntoIter<K, V> {
@ -116,7 +127,7 @@ impl<K, V> Iterator for IntoIter<K, V> {
}
}
impl<K, V, S> IntoIterator for HashMap<K, V, S> {
impl<K, V, S> IntoIterator for SimpleOAHashMap<K, V, S> {
type Item = (K, V);
type IntoIter = IntoIter<K, V>;
@ -126,78 +137,15 @@ impl<K, V, S> IntoIterator for HashMap<K, V, S> {
}
}
pub struct SimpleOAHashMapFamily;
impl HashMapFamily for SimpleOAHashMapFamily {
type Map<K, V, S> = SimpleOAHashMap<K, V, S>;
}
#[cfg(test)]
mod tests {
use std::hash::{BuildHasher, BuildHasherDefault, Hasher, RandomState};
use super::HashMap;
#[test]
fn get_empty() {
let m = HashMap::<&str, ()>::new();
assert_eq!(m.get(&"uwu"), None);
assert_eq!(m.get(&"uwu"), None);
}
#[test]
fn insert() {
let mut m = HashMap::new();
m.insert("hello", "world");
assert_eq!(m.get(&"hello"), Some(&"world"));
assert_eq!(m.len(), 1);
m.insert("aaa", "yes");
assert_eq!(m.get(&"hello"), Some(&"world"));
assert_eq!(m.get(&"aaa"), Some(&"yes"));
assert_eq!(m.len(), 2);
}
#[test]
fn overriding() {
let mut m = HashMap::new();
m.insert("hello", "world");
assert_eq!(m.get(&"hello"), Some(&"world"));
assert_eq!(m.len(), 1);
m.insert("hello", "no");
assert_eq!(m.get(&"hello"), Some(&"no"));
assert_eq!(m.len(), 1);
}
#[derive(Default)]
struct CollidingHasher;
impl Hasher for CollidingHasher {
fn finish(&self) -> u64 {
0
}
fn write(&mut self, _bytes: &[u8]) {}
}
fn test_many<H: BuildHasher>(count: usize, h: H) {
let mut m = HashMap::with_hasher(h);
for i in 0..count {
m.insert(i, i);
}
let mut found = vec![false; count];
for (k, v) in m.into_iter() {
assert_eq!(k, v);
assert!(!found[k], "duplicate element");
found[k] = true;
}
for (i, found) in found.iter().enumerate() {
assert!(found, "element {i} was lost");
}
}
#[test]
fn many_elements() {
for count in [1, 10, 100, 1000, 10_000, 100_000] {
test_many(count, RandomState::new());
}
}
#[test]
fn many_many_collisions() {
test_many(5000, BuildHasherDefault::<CollidingHasher>::default());
fn do_tests() {
crate::hashmaps::tests::run_tests::<super::SimpleOAHashMapFamily>();
}
}

View file

@ -5,7 +5,7 @@
#![feature(strict_provenance)]
pub mod cfg_match;
pub mod hashmap;
pub mod hashmaps;
pub mod innocent_linked_list;
pub mod scratch;
pub mod sendsync;