From 9e4255f239993b5ed5ca17d5f34d47d354b56837 Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Sat, 30 Dec 2023 15:38:58 +0100 Subject: [PATCH] hashmap --- Cargo.lock | 16 ++-- src/hashmap.rs | 203 ++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/sendsync.rs | 1 + 4 files changed, 213 insertions(+), 8 deletions(-) create mode 100644 src/hashmap.rs diff --git a/Cargo.lock b/Cargo.lock index a863193..786c8b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,27 +13,27 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.49" +version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a8eca9f9c4ffde41714334dee777596264c7825420f521abc92b5b5deb63a5" +checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.23" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] [[package]] name = "syn" -version = "1.0.107" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2", "quote", @@ -42,9 +42,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.6" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "uwu" diff --git a/src/hashmap.rs b/src/hashmap.rs new file mode 100644 index 0000000..098d528 --- /dev/null +++ b/src/hashmap.rs @@ -0,0 +1,203 @@ +use std::{ + hash::{BuildHasher, Hash, RandomState}, + vec, +}; + +type Entry = Option<(K, V)>; + +pub struct HashMap { + buckets: Vec>, + filled: usize, + s: S, +} + +impl HashMap { + pub fn new() -> Self { + Self::with_hasher(RandomState::new()) + } +} + +impl HashMap { + pub fn with_hasher(state: S) -> Self { + Self { + buckets: Vec::new(), + filled: 0, + s: state, + } + } + + pub fn len(&self) -> usize { + self.filled + } + + pub fn is_empty(&self) -> bool { + self.buckets.len() == 0 + } + + pub fn get(&self, key: &K) -> Option<&V> { + if self.is_empty() { + return None; + } + let bucket = self.bucket_of_elem(&key); + + let result = self.buckets[bucket..] + .iter() + .take_while(|elem| elem.is_some()) + .find(|elem| matches!(elem, Some((elem_key, _)) if elem_key == key)); + + if let Some(Some((_, value))) = result { + Some(value) + } else { + None + } + } + + pub fn insert(&mut self, key: K, value: V) { + if self.filled >= self.buckets.len() { + self.grow(); + } + loop { + let bucket = self.bucket_of_elem(&key); + let bucket = self.buckets[bucket..].iter_mut().find(|bucket| { + bucket.is_none() || matches!(bucket, Some((elem_key, _)) if *elem_key == key) + }); + if let Some(bucket) = bucket { + if bucket.is_none() { + self.filled += 1; + } + *bucket = Some((key, value)); + return; + } else { + self.grow(); + } + } + } + + fn bucket_of_elem(&self, key: &K) -> usize { + assert_ne!(self.buckets.len(), 0, "cannot compute bucket of empty map"); + let hash = self.s.hash_one(&key) as usize; + hash % self.buckets.len() + } + + fn grow(&mut self) { + let len = self.buckets.len(); + let new = if len == 0 { 8 } else { len * 2 }; + let old = IntoIter::new(std::mem::take(&mut self.buckets)); + let new_buckets = (0..new).map(|_| None).collect(); + self.buckets = new_buckets; + self.extend(old); + } +} + +impl Extend<(K, V)> for HashMap { + fn extend>(&mut self, iter: T) { + iter.into_iter() + .for_each(|(key, value)| self.insert(key, value)); + } +} + +pub struct IntoIter { + buckets: std::iter::FilterMap>, fn(Entry) -> Option<(K, V)>>, +} + +impl IntoIter { + fn new(buckets: Vec>) -> Self { + IntoIter { + buckets: buckets.into_iter().filter_map(std::convert::identity), + } + } +} + +impl Iterator for IntoIter { + type Item = (K, V); + + fn next(&mut self) -> Option { + self.buckets.next() + } +} + +impl IntoIterator for HashMap { + type Item = (K, V); + + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self.buckets) + } +} + +#[cfg(test)] +mod tests { + use std::hash::{BuildHasher, BuildHasherDefault, Hasher, RandomState}; + + use super::HashMap; + + #[test] + fn get_empty() { + let m = HashMap::<&str, ()>::new(); + assert_eq!(m.get(&"uwu"), None); + assert_eq!(m.get(&"uwu"), None); + } + + #[test] + fn insert() { + let mut m = HashMap::new(); + m.insert("hello", "world"); + assert_eq!(m.get(&"hello"), Some(&"world")); + assert_eq!(m.len(), 1); + m.insert("aaa", "yes"); + assert_eq!(m.get(&"hello"), Some(&"world")); + assert_eq!(m.get(&"aaa"), Some(&"yes")); + assert_eq!(m.len(), 2); + } + + #[test] + fn overriding() { + let mut m = HashMap::new(); + m.insert("hello", "world"); + assert_eq!(m.get(&"hello"), Some(&"world")); + assert_eq!(m.len(), 1); + m.insert("hello", "no"); + assert_eq!(m.get(&"hello"), Some(&"no")); + assert_eq!(m.len(), 1); + } + + #[derive(Default)] + struct CollidingHasher; + impl Hasher for CollidingHasher { + fn finish(&self) -> u64 { + 0 + } + fn write(&mut self, _bytes: &[u8]) {} + } + + fn test_many(count: usize, h: H) { + let mut m = HashMap::with_hasher(h); + + for i in 0..count { + m.insert(i, i); + } + + let mut found = vec![false; count]; + for (k, v) in m.into_iter() { + assert_eq!(k, v); + assert!(!found[k], "duplicate element"); + found[k] = true; + } + for (i, found) in found.iter().enumerate() { + assert!(found, "element {i} was lost"); + } + } + + #[test] + fn many_elements() { + for count in [1, 10, 100, 1000, 10_000, 100_000] { + test_many(count, RandomState::new()); + } + } + + #[test] + fn many_many_collisions() { + test_many(5000, BuildHasherDefault::::default()); + } +} diff --git a/src/lib.rs b/src/lib.rs index 301a8c3..4a66f44 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,7 @@ #![feature(strict_provenance)] pub mod cfg_match; +pub mod hashmap; pub mod innocent_linked_list; pub mod scratch; pub mod sendsync; diff --git a/src/sendsync.rs b/src/sendsync.rs index 62c5043..98e2a61 100644 --- a/src/sendsync.rs +++ b/src/sendsync.rs @@ -1,4 +1,5 @@ #![cfg_attr(not(test), allow(unused))] +#![allow(dropping_copy_types)] use std::{ cell::{Cell, UnsafeCell},