struct Bitstream<'a> { data: &'a [u8], pos_bit: usize, } impl Bitstream<'_> { fn read_bits_normal(&mut self, mut len: usize) -> Option { assert!((len as u32) < u64::BITS); let mut result = 0; while len > 0 { if self.data.is_empty() { return None; } let to_read_from_current_byte = std::cmp::min(len % 8, 8 - self.pos_bit); result <<= to_read_from_current_byte; result |= ((self.data[0] >> self.pos_bit) & ((1 << to_read_from_current_byte) - 1)) as u64; len -= to_read_from_current_byte; self.pos_bit += to_read_from_current_byte; if self.pos_bit == 8 { self.data = &self.data[1..]; self.pos_bit = 0; } } Some(result) } } struct HuffmanTree { nodes: Vec, } #[derive(Debug)] enum HuffmanNode { Leaf(u16), Cont { zero: usize, one: usize }, } enum HuffmanLookupResult { Done(u16), Incomplete { next_state: usize }, } impl HuffmanTree { fn fixed_code_lengths() -> [u8; 288] { let mut codes = [0; 288]; codes[0..144].fill(8); codes[144..256].fill(9); codes[256..280].fill(7); codes[280..].fill(8); codes } fn from_lengths(lengths: &[u8]) -> Self { const MAX_BITS: usize = 10; let mut codes = vec![0; lengths.len()]; let mut bl_count = [0; MAX_BITS]; for l in lengths { bl_count[*l as usize] += 1; } let mut next_code = [0; MAX_BITS]; let mut code = 0; for bits in 1..MAX_BITS { code = (code + bl_count[bits - 1]) << 1; next_code[bits] = code; } for n in 0..lengths.len() { let len = lengths[n] as usize; if len != 0 { codes[n] = next_code[len]; next_code[len] += 1; } } let mut nodes = vec![HuffmanNode::Cont { zero: 0, one: 0 }]; for (i, &l) in lengths.iter().enumerate().filter(|(_, l)| **l != 0) { let code = codes[i]; let mut parent_node_idx = 0; for j in 0..l { let bit = (code >> (l - 1 - j)) & 0b1; let next_node_idx = nodes.len(); let HuffmanNode::Cont { zero, one } = &mut nodes[parent_node_idx] else { unreachable!() }; let this_ref = if bit == 0 { zero } else { one }; // are we done? if j == l - 1 { *this_ref = next_node_idx; nodes.push(HuffmanNode::Leaf(i as u16)); } else { // has this parent node been inserted already? if *this_ref > 0 { // just walk parent_node_idx = *this_ref; } else { // insert and walk *this_ref = next_node_idx; nodes.push(HuffmanNode::Cont { zero: 0, one: 0 }); parent_node_idx = next_node_idx; } } } } Self { nodes } } fn lookup_with_state(&self, state: usize, bit: u64) -> HuffmanLookupResult { let HuffmanNode::Cont { zero, one } = self.nodes[state] else { unreachable!("invalid state, should point at continuation node"); }; let next_state = if bit == 0 { zero } else { one }; match self.nodes[next_state] { HuffmanNode::Leaf(leaf) => HuffmanLookupResult::Done(leaf), HuffmanNode::Cont { .. } => HuffmanLookupResult::Incomplete { next_state }, } } fn to_dot(&self) -> String { use std::fmt::Write; let mut out = String::new(); let mut inner = |tree: &Self| { writeln!(out, "digraph huffman_tree {{")?; let root = &tree.nodes[0]; fn print( tree: &HuffmanTree, out: &mut String, parent: Option<&str>, choice: &str, this_node: &str, node: &HuffmanNode, ) -> std::fmt::Result { match node { HuffmanNode::Leaf(number) => { writeln!( out, "{parent} -> {number} [label={choice}]", parent = parent.unwrap(), )?; } HuffmanNode::Cont { zero, one } => { if let Some(parent) = parent { writeln!(out, "{parent} -> {this_node} [label={choice}]")?; } print( tree, out, Some(this_node), "0", &format!("{this_node}0"), &tree.nodes[*zero], )?; print( tree, out, Some(this_node), "1", &format!("{this_node}1"), &tree.nodes[*one], )?; } } Ok(()) } print(tree, &mut out, None, "", "_", root)?; writeln!(out, "}}") }; inner(self).unwrap(); out } } // https://datatracker.ietf.org/doc/html/rfc1951 pub fn inflate(data: &[u8], out: &mut Vec) { std::fs::write( "output.dot", HuffmanTree::from_lengths(&HuffmanTree::fixed_code_lengths()).to_dot(), ) .unwrap(); let mut data = Bitstream { data, pos_bit: 0 }; loop { let bfinal = data.read_bits_normal(1).unwrap(); let btype = data.read_bits_normal(2).unwrap(); assert_eq!(btype, 1, "not a static huffman tree construction"); let tree = HuffmanTree::from_lengths(&HuffmanTree::fixed_code_lengths()); let mut node_state = 0; loop { let bit = data.read_bits_normal(1).unwrap(); let result = tree.lookup_with_state(node_state, bit); match result { HuffmanLookupResult::Done(value) => { dbg!(value); node_state = 0; match value { 0..256 => { out.push(value as u8); } 256 => break, 257..286 => { let length = match value { 257..265 => value - (257 - 3), _ => todo!("lz77 more"), }; } 286.. => unreachable!("invalid byte"), } } HuffmanLookupResult::Incomplete { next_state } => node_state = next_state, } } if bfinal == 1 { break; } } } #[cfg(test)] mod tests { use crate::HuffmanTree; #[test] fn bitstream() { let bytes = [0b110_010_01_u8, 0b010_111_01]; let mut stream = super::Bitstream { data: &bytes, pos_bit: 0, }; assert_eq!(stream.read_bits_normal(2).unwrap(), 0b01); assert_eq!(stream.read_bits_normal(3).unwrap(), 0b010); assert_eq!(stream.read_bits_normal(5).unwrap(), 0b11001); assert_eq!(stream.read_bits_normal(3).unwrap(), 0b111); assert_eq!(stream.read_bits_normal(3).unwrap(), 0b010); } #[test] fn decode() { let lengths = HuffmanTree::fixed_code_lengths(); HuffmanTree::from_lengths(&lengths); } }