This commit is contained in:
nora 2025-08-31 16:28:58 +02:00
commit 00a411a728
5 changed files with 311 additions and 0 deletions

262
src/lib.rs Normal file
View file

@ -0,0 +1,262 @@
struct Bitstream<'a> {
data: &'a [u8],
pos_bit: usize,
}
impl Bitstream<'_> {
fn read_bits_normal(&mut self, mut len: usize) -> Option<u64> {
assert!((len as u32) < u64::BITS);
let mut result = 0;
while len > 0 {
if self.data.is_empty() {
return None;
}
let to_read_from_current_byte = std::cmp::min(len % 8, 8 - self.pos_bit);
result <<= to_read_from_current_byte;
result |=
((self.data[0] >> self.pos_bit) & ((1 << to_read_from_current_byte) - 1)) as u64;
len -= to_read_from_current_byte;
self.pos_bit += to_read_from_current_byte;
if self.pos_bit == 8 {
self.data = &self.data[1..];
self.pos_bit = 0;
}
}
Some(result)
}
}
struct HuffmanTree {
nodes: Vec<HuffmanNode>,
}
#[derive(Debug)]
enum HuffmanNode {
Leaf(u16),
Cont { zero: usize, one: usize },
}
enum HuffmanLookupResult {
Done(u16),
Incomplete { next_state: usize },
}
impl HuffmanTree {
fn fixed_code_lengths() -> [u8; 288] {
let mut codes = [0; 288];
codes[0..144].fill(8);
codes[144..256].fill(9);
codes[256..280].fill(7);
codes[280..].fill(8);
codes
}
fn from_lengths(lengths: &[u8]) -> Self {
const MAX_BITS: usize = 10;
let mut codes = vec![0; lengths.len()];
let mut bl_count = [0; MAX_BITS];
for l in lengths {
bl_count[*l as usize] += 1;
}
let mut next_code = [0; MAX_BITS];
let mut code = 0;
for bits in 1..MAX_BITS {
code = (code + bl_count[bits - 1]) << 1;
next_code[bits] = code;
}
for n in 0..lengths.len() {
let len = lengths[n] as usize;
if len != 0 {
codes[n] = next_code[len];
next_code[len] += 1;
}
}
let mut nodes = vec![HuffmanNode::Cont { zero: 0, one: 0 }];
for (i, &l) in lengths.iter().enumerate().filter(|(_, l)| **l != 0) {
let code = codes[i];
let mut parent_node_idx = 0;
for j in 0..l {
let bit = (code >> (l - 1 - j)) & 0b1;
let next_node_idx = nodes.len();
let HuffmanNode::Cont { zero, one } = &mut nodes[parent_node_idx] else {
unreachable!()
};
let this_ref = if bit == 0 { zero } else { one };
// are we done?
if j == l - 1 {
*this_ref = next_node_idx;
nodes.push(HuffmanNode::Leaf(i as u16));
} else {
// has this parent node been inserted already?
if *this_ref > 0 {
// just walk
parent_node_idx = *this_ref;
} else {
// insert and walk
*this_ref = next_node_idx;
nodes.push(HuffmanNode::Cont { zero: 0, one: 0 });
parent_node_idx = next_node_idx;
}
}
}
}
Self { nodes }
}
fn lookup_with_state(&self, state: usize, bit: u64) -> HuffmanLookupResult {
let HuffmanNode::Cont { zero, one } = self.nodes[state] else {
unreachable!("invalid state, should point at continuation node");
};
let next_state = if bit == 0 { zero } else { one };
match self.nodes[next_state] {
HuffmanNode::Leaf(leaf) => HuffmanLookupResult::Done(leaf),
HuffmanNode::Cont { .. } => HuffmanLookupResult::Incomplete { next_state },
}
}
fn to_dot(&self) -> String {
use std::fmt::Write;
let mut out = String::new();
let mut inner = |tree: &Self| {
writeln!(out, "digraph huffman_tree {{")?;
let root = &tree.nodes[0];
fn print(
tree: &HuffmanTree,
out: &mut String,
parent: Option<&str>,
choice: &str,
this_node: &str,
node: &HuffmanNode,
) -> std::fmt::Result {
match node {
HuffmanNode::Leaf(number) => {
writeln!(
out,
"{parent} -> {number} [label={choice}]",
parent = parent.unwrap(),
)?;
}
HuffmanNode::Cont { zero, one } => {
if let Some(parent) = parent {
writeln!(out, "{parent} -> {this_node} [label={choice}]")?;
}
print(
tree,
out,
Some(this_node),
"0",
&format!("{this_node}0"),
&tree.nodes[*zero],
)?;
print(
tree,
out,
Some(this_node),
"1",
&format!("{this_node}1"),
&tree.nodes[*one],
)?;
}
}
Ok(())
}
print(tree, &mut out, None, "", "_", root)?;
writeln!(out, "}}")
};
inner(self).unwrap();
out
}
}
// https://datatracker.ietf.org/doc/html/rfc1951
pub fn inflate(data: &[u8], out: &mut Vec<u8>) {
std::fs::write(
"output.dot",
HuffmanTree::from_lengths(&HuffmanTree::fixed_code_lengths()).to_dot(),
)
.unwrap();
let mut data = Bitstream { data, pos_bit: 0 };
loop {
let bfinal = data.read_bits_normal(1).unwrap();
let btype = data.read_bits_normal(2).unwrap();
assert_eq!(btype, 1, "not a static huffman tree construction");
let tree = HuffmanTree::from_lengths(&HuffmanTree::fixed_code_lengths());
let mut node_state = 0;
loop {
let bit = data.read_bits_normal(1).unwrap();
let result = tree.lookup_with_state(node_state, bit);
match result {
HuffmanLookupResult::Done(value) => {
dbg!(value);
node_state = 0;
match value {
0..256 => {
out.push(value as u8);
}
256 => break,
257..286 => {
let length = match value {
257..265 => value - (257 - 3),
_ => todo!("lz77 more"),
};
}
286.. => unreachable!("invalid byte"),
}
}
HuffmanLookupResult::Incomplete { next_state } => node_state = next_state,
}
}
if bfinal == 1 {
break;
}
}
}
#[cfg(test)]
mod tests {
use crate::HuffmanTree;
#[test]
fn bitstream() {
let bytes = [0b110_010_01_u8, 0b010_111_01];
let mut stream = super::Bitstream {
data: &bytes,
pos_bit: 0,
};
assert_eq!(stream.read_bits_normal(2).unwrap(), 0b01);
assert_eq!(stream.read_bits_normal(3).unwrap(), 0b010);
assert_eq!(stream.read_bits_normal(5).unwrap(), 0b11001);
assert_eq!(stream.read_bits_normal(3).unwrap(), 0b111);
assert_eq!(stream.read_bits_normal(3).unwrap(), 0b010);
}
#[test]
fn decode() {
let lengths = HuffmanTree::fixed_code_lengths();
HuffmanTree::from_lengths(&lengths);
}
}

32
src/main.rs Normal file
View file

@ -0,0 +1,32 @@
use std::ffi::CStr;
fn main() {
let gz = std::env::args().nth(1).unwrap();
let gz = std::fs::read(gz).unwrap();
assert_eq!(gz[0], 31, "ID");
assert_eq!(gz[1], 139, "ID");
assert_eq!(gz[2], 8, "compression method");
let flg = gz[3];
assert!(flg == 8 || flg == 0); // only FLG.FNAME
let mut data_start = 10;
if flg & 0b1000 != 0 {
let fname = CStr::from_bytes_until_nul(&gz[10..]).unwrap();
dbg!(fname);
data_start += fname.count_bytes() + 1;
}
let blocks = &gz[(data_start)..];
let blocks = &blocks[..(blocks.len() - 8)]; // crc32 and isize
let mut out = Vec::new();
zwergli::inflate(blocks, &mut out);
dbg!(&out);
dbg!(String::from_utf8(out)).ok();
}