mirror of
https://github.com/Noratrieb/zwergli.git
synced 2026-01-14 10:25:00 +01:00
262 lines
7.9 KiB
Rust
262 lines
7.9 KiB
Rust
struct Bitstream<'a> {
|
|
data: &'a [u8],
|
|
pos_bit: usize,
|
|
}
|
|
|
|
impl Bitstream<'_> {
|
|
fn read_bits_normal(&mut self, mut len: usize) -> Option<u64> {
|
|
assert!((len as u32) < u64::BITS);
|
|
|
|
let mut result = 0;
|
|
|
|
while len > 0 {
|
|
if self.data.is_empty() {
|
|
return None;
|
|
}
|
|
|
|
let to_read_from_current_byte = std::cmp::min(len % 8, 8 - self.pos_bit);
|
|
result <<= to_read_from_current_byte;
|
|
result |=
|
|
((self.data[0] >> self.pos_bit) & ((1 << to_read_from_current_byte) - 1)) as u64;
|
|
len -= to_read_from_current_byte;
|
|
self.pos_bit += to_read_from_current_byte;
|
|
|
|
if self.pos_bit == 8 {
|
|
self.data = &self.data[1..];
|
|
self.pos_bit = 0;
|
|
}
|
|
}
|
|
|
|
Some(result)
|
|
}
|
|
}
|
|
|
|
struct HuffmanTree {
|
|
nodes: Vec<HuffmanNode>,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
enum HuffmanNode {
|
|
Leaf(u16),
|
|
Cont { zero: usize, one: usize },
|
|
}
|
|
|
|
enum HuffmanLookupResult {
|
|
Done(u16),
|
|
Incomplete { next_state: usize },
|
|
}
|
|
|
|
impl HuffmanTree {
|
|
fn fixed_code_lengths() -> [u8; 288] {
|
|
let mut codes = [0; 288];
|
|
codes[0..144].fill(8);
|
|
codes[144..256].fill(9);
|
|
codes[256..280].fill(7);
|
|
codes[280..].fill(8);
|
|
codes
|
|
}
|
|
|
|
fn from_lengths(lengths: &[u8]) -> Self {
|
|
const MAX_BITS: usize = 10;
|
|
|
|
let mut codes = vec![0; lengths.len()];
|
|
|
|
let mut bl_count = [0; MAX_BITS];
|
|
for l in lengths {
|
|
bl_count[*l as usize] += 1;
|
|
}
|
|
|
|
let mut next_code = [0; MAX_BITS];
|
|
let mut code = 0;
|
|
for bits in 1..MAX_BITS {
|
|
code = (code + bl_count[bits - 1]) << 1;
|
|
next_code[bits] = code;
|
|
}
|
|
|
|
for n in 0..lengths.len() {
|
|
let len = lengths[n] as usize;
|
|
if len != 0 {
|
|
codes[n] = next_code[len];
|
|
next_code[len] += 1;
|
|
}
|
|
}
|
|
|
|
let mut nodes = vec![HuffmanNode::Cont { zero: 0, one: 0 }];
|
|
for (i, &l) in lengths.iter().enumerate().filter(|(_, l)| **l != 0) {
|
|
let code = codes[i];
|
|
let mut parent_node_idx = 0;
|
|
for j in 0..l {
|
|
let bit = (code >> (l - 1 - j)) & 0b1;
|
|
let next_node_idx = nodes.len();
|
|
let HuffmanNode::Cont { zero, one } = &mut nodes[parent_node_idx] else {
|
|
unreachable!()
|
|
};
|
|
let this_ref = if bit == 0 { zero } else { one };
|
|
// are we done?
|
|
if j == l - 1 {
|
|
*this_ref = next_node_idx;
|
|
nodes.push(HuffmanNode::Leaf(i as u16));
|
|
} else {
|
|
// has this parent node been inserted already?
|
|
if *this_ref > 0 {
|
|
// just walk
|
|
parent_node_idx = *this_ref;
|
|
} else {
|
|
// insert and walk
|
|
*this_ref = next_node_idx;
|
|
nodes.push(HuffmanNode::Cont { zero: 0, one: 0 });
|
|
parent_node_idx = next_node_idx;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Self { nodes }
|
|
}
|
|
|
|
fn lookup_with_state(&self, state: usize, bit: u64) -> HuffmanLookupResult {
|
|
let HuffmanNode::Cont { zero, one } = self.nodes[state] else {
|
|
unreachable!("invalid state, should point at continuation node");
|
|
};
|
|
let next_state = if bit == 0 { zero } else { one };
|
|
match self.nodes[next_state] {
|
|
HuffmanNode::Leaf(leaf) => HuffmanLookupResult::Done(leaf),
|
|
HuffmanNode::Cont { .. } => HuffmanLookupResult::Incomplete { next_state },
|
|
}
|
|
}
|
|
|
|
fn to_dot(&self) -> String {
|
|
use std::fmt::Write;
|
|
|
|
let mut out = String::new();
|
|
let mut inner = |tree: &Self| {
|
|
writeln!(out, "digraph huffman_tree {{")?;
|
|
|
|
let root = &tree.nodes[0];
|
|
|
|
fn print(
|
|
tree: &HuffmanTree,
|
|
out: &mut String,
|
|
parent: Option<&str>,
|
|
choice: &str,
|
|
this_node: &str,
|
|
node: &HuffmanNode,
|
|
) -> std::fmt::Result {
|
|
match node {
|
|
HuffmanNode::Leaf(number) => {
|
|
writeln!(
|
|
out,
|
|
"{parent} -> {number} [label={choice}]",
|
|
parent = parent.unwrap(),
|
|
)?;
|
|
}
|
|
HuffmanNode::Cont { zero, one } => {
|
|
if let Some(parent) = parent {
|
|
writeln!(out, "{parent} -> {this_node} [label={choice}]")?;
|
|
}
|
|
print(
|
|
tree,
|
|
out,
|
|
Some(this_node),
|
|
"0",
|
|
&format!("{this_node}0"),
|
|
&tree.nodes[*zero],
|
|
)?;
|
|
print(
|
|
tree,
|
|
out,
|
|
Some(this_node),
|
|
"1",
|
|
&format!("{this_node}1"),
|
|
&tree.nodes[*one],
|
|
)?;
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
print(tree, &mut out, None, "", "_", root)?;
|
|
|
|
writeln!(out, "}}")
|
|
};
|
|
inner(self).unwrap();
|
|
out
|
|
}
|
|
}
|
|
|
|
// https://datatracker.ietf.org/doc/html/rfc1951
|
|
pub fn inflate(data: &[u8], out: &mut Vec<u8>) {
|
|
std::fs::write(
|
|
"output.dot",
|
|
HuffmanTree::from_lengths(&HuffmanTree::fixed_code_lengths()).to_dot(),
|
|
)
|
|
.unwrap();
|
|
|
|
let mut data = Bitstream { data, pos_bit: 0 };
|
|
|
|
loop {
|
|
let bfinal = data.read_bits_normal(1).unwrap();
|
|
|
|
let btype = data.read_bits_normal(2).unwrap();
|
|
|
|
assert_eq!(btype, 1, "not a static huffman tree construction");
|
|
|
|
let tree = HuffmanTree::from_lengths(&HuffmanTree::fixed_code_lengths());
|
|
|
|
let mut node_state = 0;
|
|
loop {
|
|
let bit = data.read_bits_normal(1).unwrap();
|
|
let result = tree.lookup_with_state(node_state, bit);
|
|
match result {
|
|
HuffmanLookupResult::Done(value) => {
|
|
dbg!(value);
|
|
node_state = 0;
|
|
|
|
match value {
|
|
0..256 => {
|
|
out.push(value as u8);
|
|
}
|
|
256 => break,
|
|
257..286 => {
|
|
let length = match value {
|
|
257..265 => value - (257 - 3),
|
|
_ => todo!("lz77 more"),
|
|
};
|
|
}
|
|
286.. => unreachable!("invalid byte"),
|
|
}
|
|
}
|
|
HuffmanLookupResult::Incomplete { next_state } => node_state = next_state,
|
|
}
|
|
}
|
|
|
|
if bfinal == 1 {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use crate::HuffmanTree;
|
|
|
|
#[test]
|
|
fn bitstream() {
|
|
let bytes = [0b110_010_01_u8, 0b010_111_01];
|
|
let mut stream = super::Bitstream {
|
|
data: &bytes,
|
|
pos_bit: 0,
|
|
};
|
|
assert_eq!(stream.read_bits_normal(2).unwrap(), 0b01);
|
|
assert_eq!(stream.read_bits_normal(3).unwrap(), 0b010);
|
|
assert_eq!(stream.read_bits_normal(5).unwrap(), 0b11001);
|
|
assert_eq!(stream.read_bits_normal(3).unwrap(), 0b111);
|
|
assert_eq!(stream.read_bits_normal(3).unwrap(), 0b010);
|
|
}
|
|
|
|
#[test]
|
|
fn decode() {
|
|
let lengths = HuffmanTree::fixed_code_lengths();
|
|
HuffmanTree::from_lengths(&lengths);
|
|
}
|
|
}
|