mirror of
https://github.com/Noratrieb/zwergli.git
synced 2026-01-14 10:25:00 +01:00
init
This commit is contained in:
commit
00a411a728
5 changed files with 311 additions and 0 deletions
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
/target
|
||||
*.gz
|
||||
*.dot
|
||||
*.svg
|
||||
7
Cargo.lock
generated
Normal file
7
Cargo.lock
generated
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "zwergli"
|
||||
version = "0.1.0"
|
||||
6
Cargo.toml
Normal file
6
Cargo.toml
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
[package]
|
||||
name = "zwergli"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
262
src/lib.rs
Normal file
262
src/lib.rs
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
struct Bitstream<'a> {
|
||||
data: &'a [u8],
|
||||
pos_bit: usize,
|
||||
}
|
||||
|
||||
impl Bitstream<'_> {
|
||||
fn read_bits_normal(&mut self, mut len: usize) -> Option<u64> {
|
||||
assert!((len as u32) < u64::BITS);
|
||||
|
||||
let mut result = 0;
|
||||
|
||||
while len > 0 {
|
||||
if self.data.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let to_read_from_current_byte = std::cmp::min(len % 8, 8 - self.pos_bit);
|
||||
result <<= to_read_from_current_byte;
|
||||
result |=
|
||||
((self.data[0] >> self.pos_bit) & ((1 << to_read_from_current_byte) - 1)) as u64;
|
||||
len -= to_read_from_current_byte;
|
||||
self.pos_bit += to_read_from_current_byte;
|
||||
|
||||
if self.pos_bit == 8 {
|
||||
self.data = &self.data[1..];
|
||||
self.pos_bit = 0;
|
||||
}
|
||||
}
|
||||
|
||||
Some(result)
|
||||
}
|
||||
}
|
||||
|
||||
struct HuffmanTree {
|
||||
nodes: Vec<HuffmanNode>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum HuffmanNode {
|
||||
Leaf(u16),
|
||||
Cont { zero: usize, one: usize },
|
||||
}
|
||||
|
||||
enum HuffmanLookupResult {
|
||||
Done(u16),
|
||||
Incomplete { next_state: usize },
|
||||
}
|
||||
|
||||
impl HuffmanTree {
|
||||
fn fixed_code_lengths() -> [u8; 288] {
|
||||
let mut codes = [0; 288];
|
||||
codes[0..144].fill(8);
|
||||
codes[144..256].fill(9);
|
||||
codes[256..280].fill(7);
|
||||
codes[280..].fill(8);
|
||||
codes
|
||||
}
|
||||
|
||||
fn from_lengths(lengths: &[u8]) -> Self {
|
||||
const MAX_BITS: usize = 10;
|
||||
|
||||
let mut codes = vec![0; lengths.len()];
|
||||
|
||||
let mut bl_count = [0; MAX_BITS];
|
||||
for l in lengths {
|
||||
bl_count[*l as usize] += 1;
|
||||
}
|
||||
|
||||
let mut next_code = [0; MAX_BITS];
|
||||
let mut code = 0;
|
||||
for bits in 1..MAX_BITS {
|
||||
code = (code + bl_count[bits - 1]) << 1;
|
||||
next_code[bits] = code;
|
||||
}
|
||||
|
||||
for n in 0..lengths.len() {
|
||||
let len = lengths[n] as usize;
|
||||
if len != 0 {
|
||||
codes[n] = next_code[len];
|
||||
next_code[len] += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let mut nodes = vec![HuffmanNode::Cont { zero: 0, one: 0 }];
|
||||
for (i, &l) in lengths.iter().enumerate().filter(|(_, l)| **l != 0) {
|
||||
let code = codes[i];
|
||||
let mut parent_node_idx = 0;
|
||||
for j in 0..l {
|
||||
let bit = (code >> (l - 1 - j)) & 0b1;
|
||||
let next_node_idx = nodes.len();
|
||||
let HuffmanNode::Cont { zero, one } = &mut nodes[parent_node_idx] else {
|
||||
unreachable!()
|
||||
};
|
||||
let this_ref = if bit == 0 { zero } else { one };
|
||||
// are we done?
|
||||
if j == l - 1 {
|
||||
*this_ref = next_node_idx;
|
||||
nodes.push(HuffmanNode::Leaf(i as u16));
|
||||
} else {
|
||||
// has this parent node been inserted already?
|
||||
if *this_ref > 0 {
|
||||
// just walk
|
||||
parent_node_idx = *this_ref;
|
||||
} else {
|
||||
// insert and walk
|
||||
*this_ref = next_node_idx;
|
||||
nodes.push(HuffmanNode::Cont { zero: 0, one: 0 });
|
||||
parent_node_idx = next_node_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Self { nodes }
|
||||
}
|
||||
|
||||
fn lookup_with_state(&self, state: usize, bit: u64) -> HuffmanLookupResult {
|
||||
let HuffmanNode::Cont { zero, one } = self.nodes[state] else {
|
||||
unreachable!("invalid state, should point at continuation node");
|
||||
};
|
||||
let next_state = if bit == 0 { zero } else { one };
|
||||
match self.nodes[next_state] {
|
||||
HuffmanNode::Leaf(leaf) => HuffmanLookupResult::Done(leaf),
|
||||
HuffmanNode::Cont { .. } => HuffmanLookupResult::Incomplete { next_state },
|
||||
}
|
||||
}
|
||||
|
||||
fn to_dot(&self) -> String {
|
||||
use std::fmt::Write;
|
||||
|
||||
let mut out = String::new();
|
||||
let mut inner = |tree: &Self| {
|
||||
writeln!(out, "digraph huffman_tree {{")?;
|
||||
|
||||
let root = &tree.nodes[0];
|
||||
|
||||
fn print(
|
||||
tree: &HuffmanTree,
|
||||
out: &mut String,
|
||||
parent: Option<&str>,
|
||||
choice: &str,
|
||||
this_node: &str,
|
||||
node: &HuffmanNode,
|
||||
) -> std::fmt::Result {
|
||||
match node {
|
||||
HuffmanNode::Leaf(number) => {
|
||||
writeln!(
|
||||
out,
|
||||
"{parent} -> {number} [label={choice}]",
|
||||
parent = parent.unwrap(),
|
||||
)?;
|
||||
}
|
||||
HuffmanNode::Cont { zero, one } => {
|
||||
if let Some(parent) = parent {
|
||||
writeln!(out, "{parent} -> {this_node} [label={choice}]")?;
|
||||
}
|
||||
print(
|
||||
tree,
|
||||
out,
|
||||
Some(this_node),
|
||||
"0",
|
||||
&format!("{this_node}0"),
|
||||
&tree.nodes[*zero],
|
||||
)?;
|
||||
print(
|
||||
tree,
|
||||
out,
|
||||
Some(this_node),
|
||||
"1",
|
||||
&format!("{this_node}1"),
|
||||
&tree.nodes[*one],
|
||||
)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
print(tree, &mut out, None, "", "_", root)?;
|
||||
|
||||
writeln!(out, "}}")
|
||||
};
|
||||
inner(self).unwrap();
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
// https://datatracker.ietf.org/doc/html/rfc1951
|
||||
pub fn inflate(data: &[u8], out: &mut Vec<u8>) {
|
||||
std::fs::write(
|
||||
"output.dot",
|
||||
HuffmanTree::from_lengths(&HuffmanTree::fixed_code_lengths()).to_dot(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut data = Bitstream { data, pos_bit: 0 };
|
||||
|
||||
loop {
|
||||
let bfinal = data.read_bits_normal(1).unwrap();
|
||||
|
||||
let btype = data.read_bits_normal(2).unwrap();
|
||||
|
||||
assert_eq!(btype, 1, "not a static huffman tree construction");
|
||||
|
||||
let tree = HuffmanTree::from_lengths(&HuffmanTree::fixed_code_lengths());
|
||||
|
||||
let mut node_state = 0;
|
||||
loop {
|
||||
let bit = data.read_bits_normal(1).unwrap();
|
||||
let result = tree.lookup_with_state(node_state, bit);
|
||||
match result {
|
||||
HuffmanLookupResult::Done(value) => {
|
||||
dbg!(value);
|
||||
node_state = 0;
|
||||
|
||||
match value {
|
||||
0..256 => {
|
||||
out.push(value as u8);
|
||||
}
|
||||
256 => break,
|
||||
257..286 => {
|
||||
let length = match value {
|
||||
257..265 => value - (257 - 3),
|
||||
_ => todo!("lz77 more"),
|
||||
};
|
||||
}
|
||||
286.. => unreachable!("invalid byte"),
|
||||
}
|
||||
}
|
||||
HuffmanLookupResult::Incomplete { next_state } => node_state = next_state,
|
||||
}
|
||||
}
|
||||
|
||||
if bfinal == 1 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::HuffmanTree;
|
||||
|
||||
#[test]
|
||||
fn bitstream() {
|
||||
let bytes = [0b110_010_01_u8, 0b010_111_01];
|
||||
let mut stream = super::Bitstream {
|
||||
data: &bytes,
|
||||
pos_bit: 0,
|
||||
};
|
||||
assert_eq!(stream.read_bits_normal(2).unwrap(), 0b01);
|
||||
assert_eq!(stream.read_bits_normal(3).unwrap(), 0b010);
|
||||
assert_eq!(stream.read_bits_normal(5).unwrap(), 0b11001);
|
||||
assert_eq!(stream.read_bits_normal(3).unwrap(), 0b111);
|
||||
assert_eq!(stream.read_bits_normal(3).unwrap(), 0b010);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode() {
|
||||
let lengths = HuffmanTree::fixed_code_lengths();
|
||||
HuffmanTree::from_lengths(&lengths);
|
||||
}
|
||||
}
|
||||
32
src/main.rs
Normal file
32
src/main.rs
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
use std::ffi::CStr;
|
||||
|
||||
fn main() {
|
||||
let gz = std::env::args().nth(1).unwrap();
|
||||
let gz = std::fs::read(gz).unwrap();
|
||||
|
||||
assert_eq!(gz[0], 31, "ID");
|
||||
assert_eq!(gz[1], 139, "ID");
|
||||
assert_eq!(gz[2], 8, "compression method");
|
||||
|
||||
let flg = gz[3];
|
||||
|
||||
assert!(flg == 8 || flg == 0); // only FLG.FNAME
|
||||
|
||||
let mut data_start = 10;
|
||||
|
||||
if flg & 0b1000 != 0 {
|
||||
let fname = CStr::from_bytes_until_nul(&gz[10..]).unwrap();
|
||||
dbg!(fname);
|
||||
data_start += fname.count_bytes() + 1;
|
||||
}
|
||||
|
||||
let blocks = &gz[(data_start)..];
|
||||
let blocks = &blocks[..(blocks.len() - 8)]; // crc32 and isize
|
||||
|
||||
let mut out = Vec::new();
|
||||
|
||||
zwergli::inflate(blocks, &mut out);
|
||||
|
||||
dbg!(&out);
|
||||
dbg!(String::from_utf8(out)).ok();
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue