add non-compressed roundtrip tests

This commit is contained in:
nora 2025-03-23 11:34:51 +01:00
parent fd10c93034
commit d369850db1
5 changed files with 322 additions and 25 deletions

87
Cargo.lock generated
View file

@ -2,6 +2,12 @@
# It is not intended for manual editing.
version = 4
[[package]]
name = "adler2"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
[[package]]
name = "bitflags"
version = "2.9.0"
@ -14,6 +20,15 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "crc32fast"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
dependencies = [
"cfg-if",
]
[[package]]
name = "errno"
version = "0.3.10"
@ -40,6 +55,16 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "flate2"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "getrandom"
version = "0.3.1"
@ -70,6 +95,32 @@ version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db9c683daf087dc577b7506e9695b3d556a9f3849903fa28186283afd6809e9"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "miniz_oxide"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5"
dependencies = [
"adler2",
]
[[package]]
name = "object"
version = "0.36.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
dependencies = [
"flate2",
"memchr",
"ruzstd",
]
[[package]]
name = "once_cell"
version = "1.20.3"
@ -106,17 +157,35 @@ dependencies = [
"tempfile",
]
[[package]]
name = "ruzstd"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fad02996bfc73da3e301efe90b1837be9ed8f4a462b6ed410aa35d00381de89f"
dependencies = [
"twox-hash",
]
[[package]]
name = "rvdc"
version = "0.1.0"
version = "0.1.1"
dependencies = [
"object",
"tempfile",
]
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "tempfile"
version = "3.18.0"
version = "3.19.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c317e0a526ee6120d8dabad239c8dadca62b24b6f168914bbbc8e2fb1f0e567"
checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf"
dependencies = [
"cfg-if",
"fastrand",
"getrandom",
"once_cell",
@ -124,6 +193,16 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "twox-hash"
version = "1.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
dependencies = [
"cfg-if",
"static_assertions",
]
[[package]]
name = "wasi"
version = "0.13.3+wasi-0.2.2"

View file

@ -1,3 +1,11 @@
## 0.1.1
- Add `Fence::is_tso`
- Several fixes to disassembly
- Forbid overflow for `slli` immediates
- Add `Zihintpause` extension to README (it was already implemented in disassembly)
- Add exhaustive tests
## 0.1.0
Initial release.

View file

@ -1,6 +1,6 @@
[package]
name = "rvdc"
version = "0.1.0"
version = "0.1.1"
description = "RISC-V instruction decoder"
repository = "https://github.com/Noratrieb/rustv32i"
edition = "2024"
@ -12,3 +12,7 @@ license = "MIT OR Apache-2.0"
[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(slow_tests)'] }
[dev-dependencies]
object = "0.36.7"
tempfile = "3.19.1"

View file

@ -1,7 +1,7 @@
RISC-V instruction decoder.
The main function is [`Inst::decode`], which will decode an instruction into the [`Inst`] enum.
The [`std::fmt::Display`] impl of [`Inst`] provides disassembly functionality
The [`core::fmt::Display`] impl of [`Inst`] provides disassembly functionality
(note that the precise output of that implementation is not considered stable).
# Register size support
@ -25,6 +25,7 @@ The decoder currently supports the following instructions:
- [x] Zalrsc standard extension
- [x] Zaamo standard extension
- [x] C standard extension
- [x] Zihintpause standard extension
More extensions may be implemented in the future.
@ -62,8 +63,9 @@ This crate supports `no_std` without the `alloc` crate.
# Testing
This crate is currently tested as part of an emulator, which tests many different kinds of instructions.
In the future, more tests of the decoder specifically may be added.
This crate is tested by exhaustively going through all 32 bit values that are valid instructions and roundtripping the disassembly through the clang assembler, ensuring it remains the same.
Additionally, it's also tested as part of an emulator, which tests many different kinds of instructions.
# MSRV

View file

@ -317,6 +317,29 @@ pub struct DecodeError {
}
impl Fence {
/// Whether this is a `fence.tso`.
/// `fm=0b1000` and `RW,RW`
pub fn is_tso(&self) -> bool {
matches!(
self,
Self {
fm: 0b1000,
pred: FenceSet {
device_input: _,
device_output: _,
memory_read: true,
memory_write: true
},
succ: FenceSet {
device_input: _,
device_output: _,
memory_read: true,
memory_write: true
},
..
}
)
}
/// Whether this fence indicates a `pause` assembler pseudoinstruction.
pub fn is_pause(&self) -> bool {
self.pred
@ -397,10 +420,12 @@ impl Display for Inst {
Inst::Sh { offset, src, base } => write!(f, "sh {src}, {}({base})", offset as i32),
Inst::Sw { offset, src, base } => write!(f, "sw {src}, {}({base})", offset as i32),
Inst::Addi { imm, dest, src1 } => {
if dest.0 == 0 && src1.0 == 0 {
if dest.0 == 0 && src1.0 == 0 && imm == 0 {
write!(f, "nop")
} else if src1.0 == 0 {
write!(f, "li {dest}, {}", imm as i32)
} else if imm == 0 {
write!(f, "mv {dest}, {src1}")
} else {
write!(f, "addi {dest}, {src1}, {}", imm as i32)
}
@ -446,12 +471,8 @@ impl Display for Inst {
src1: rs1,
} => write!(f, "srai {dest}, {rs1}, {}", imm as i32),
Inst::Add { dest, src1, src2 } => {
if src1.0 == 0 {
write!(f, "mv {dest}, {src2}")
} else {
write!(f, "add {dest}, {src1}, {src2}")
}
}
Inst::Sub { dest, src1, src2 } => write!(f, "sub {dest}, {src1}, {src2}"),
Inst::Sll { dest, src1, src2 } => write!(f, "sll {dest}, {src1}, {src2}"),
Inst::Slt { dest, src1, src2 } => write!(f, "slt {dest}, {src1}, {src2}"),
@ -462,7 +483,7 @@ impl Display for Inst {
Inst::Or { dest, src1, src2 } => write!(f, "or {dest}, {src1}, {src2}"),
Inst::And { dest, src1, src2 } => write!(f, "and {dest}, {src1}, {src2}"),
Inst::Fence { fence } => match fence.fm {
0b1000 => write!(f, "fence.TSO"),
_ if fence.is_tso() => write!(f, "fence.tso"),
0b0000 if fence.is_pause() => {
write!(f, "pause")
}
@ -491,25 +512,33 @@ impl Display for Inst {
dest,
addr,
src,
} => write!(f, "am{op}.w{order} {dest}, {src}, ({addr})",),
} => write!(f, "amo{op}.w{order} {dest}, {src}, ({addr})",),
}
}
}
impl Display for FenceSet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut has = false;
if self.device_input {
has = true;
write!(f, "i")?;
}
if self.device_output {
has = true;
write!(f, "o")?;
}
if self.memory_read {
has = true;
write!(f, "r")?;
}
if self.memory_write {
has = true;
write!(f, "w")?;
}
if !has {
write!(f, "0")?;
}
Ok(())
}
}
@ -1163,11 +1192,16 @@ impl Inst {
dest: code.rd(),
src1: code.rs1(),
},
0b001 => Inst::Slli {
0b001 => {
if code.funct7() != 0 {
return Err(decode_error(code, "slli shift overflow"));
}
Inst::Slli {
imm: code.imm_i(),
dest: code.rd(),
src1: code.rs1(),
},
}
}
0b101 => match code.funct7() {
0b0000000 => Inst::Srli {
imm: code.rs2_imm(),
@ -1323,12 +1357,21 @@ impl Inst {
#[cfg(test)]
mod tests {
extern crate std;
use std::io::Write;
use std::prelude::rust_2024::*;
use std::fmt::Write as _;
use std::io::Write as _;
use object::Object;
use object::ObjectSection;
use crate::Fence;
use crate::FenceSet;
use crate::Inst;
use crate::Reg;
#[test]
#[cfg_attr(not(slow_tests), ignore)]
#[cfg_attr(not(slow_tests), ignore = "cfg(slow_tests) not enabled")]
fn exhaustive_decode_no_panic() {
for i in 0..u32::MAX {
if (i % (2 << 25)) == 0 {
@ -1337,13 +1380,174 @@ mod tests {
std::print!("\r{}{}", "#".repeat(done), "-".repeat(100 - done));
std::io::stdout().flush().unwrap();
}
let _ = super::Inst::decode(i);
let _ = Inst::decode(i);
}
let _ = super::Inst::decode(u32::MAX);
let _ = Inst::decode(u32::MAX);
}
#[test]
fn size_of_instruction() {
assert!(size_of::<Inst>() <= 12);
}
const TEST_SECTION_NAME: &str = ".text.rvdctest";
/// Some instruction fields are reserved and not printed in the assembly,
/// but should be ignored instead of rejected by the decoder.
/// We filter out non-canonical forms of these instructions as they do not roundtrip.
fn is_inst_supposed_to_roundtrip(inst: &Inst) -> bool {
match inst {
// Canonical fence.tso
Inst::Fence {
fence:
Fence {
fm: 0b1000,
pred:
FenceSet {
device_input: false,
device_output: false,
memory_read: true,
memory_write: true,
},
succ:
FenceSet {
device_input: false,
device_output: false,
memory_read: true,
memory_write: true,
},
src: Reg::ZERO,
dest: Reg::ZERO,
},
} => true,
// Only canonical normal fence with x0 and x0
Inst::Fence {
fence:
Fence {
fm: 0b0000,
pred: _,
succ: _,
src: Reg::ZERO,
dest: Reg::ZERO,
},
} => true,
// All other fences are reserved
Inst::Fence { .. } => false,
_ => true,
}
}
// turn this up for debugging, only run the test directly in these cases
const SKIP_CHUNKS: u32 = 0;
#[test]
fn ensure_no_chunks_are_skipped() {
assert_eq!(SKIP_CHUNKS, 0);
}
#[test]
#[cfg_attr(not(slow_tests), ignore = "cfg(slow_tests) not enabled")]
fn normal_clang_roundtrip() {
const CHUNKS: u32 = 128;
const CHUNK_SIZE: u32 = u32::MAX / CHUNKS;
for (chunk_idx, start) in ((SKIP_CHUNKS * CHUNK_SIZE)..u32::MAX)
.step_by(CHUNK_SIZE as usize)
.enumerate()
{
let chunk_idx = chunk_idx + SKIP_CHUNKS as usize;
let start_time = std::time::Instant::now();
let insts = (start..=(start + CHUNK_SIZE))
.filter_map(|code| Some((code, Inst::decode_normal(code).ok()?)))
.filter(|(_, inst)| is_inst_supposed_to_roundtrip(inst))
.collect::<Vec<_>>();
let mut text = std::format!(".section {TEST_SECTION_NAME}\n.globl _start\n_start:\n");
for (_, inst) in &insts {
writeln!(text, " {inst}").unwrap();
}
let data = clang_assemble(&text, "-march=rv32ima_zihintpause");
for (i, result_code) in data.chunks(4).enumerate() {
let result_code = u32::from_le_bytes(result_code.try_into().unwrap());
assert_eq!(
insts[i].0, result_code,
"failed to rountrip!\n\
instruction `{:0>32b}` failed to rountrip\n\
resulted in `{:0>32b}` instead.\n\
disassembly of original instruction: `{}`",
insts[i].0, result_code, insts[i].1
);
}
let elapsed = start_time.elapsed();
let chunk_number = chunk_idx + 1;
let remaining = elapsed * (CHUNKS - (chunk_number as u32));
writeln!(
std::io::stdout(),
"Completed chunk {chunk_number}/{CHUNKS} (estimated {remaining:?} remaining)",
)
.unwrap();
}
}
#[test]
#[ignore = "clang doesn't always generate compressed instructions?"]
fn compressed_clang_roundtrip() {
let insts = (0..=u16::MAX)
.filter_map(|code| Some((code, Inst::decode_compressed(code).ok()?)))
.collect::<Vec<_>>();
let mut text = std::format!(".section {TEST_SECTION_NAME}\n.globl _start\n_start:\n");
for (_, inst) in &insts {
writeln!(text, " {inst}").unwrap();
}
let data = clang_assemble(&text, "-march=rv32imac");
for (i, result_code) in data.chunks(2).enumerate() {
let result_code = u16::from_le_bytes(result_code.try_into().unwrap());
assert_eq!(
insts[i].0, result_code,
"failed to rountrip!\n\
instruction `{:0>32b}` failed to rountrip\n\
resulted in `{:0>32b}` instead.\n\
disassembly of original instruction: `{}`",
insts[i].0, result_code, insts[i].1
);
}
}
fn clang_assemble(text: &str, march_flag: &str) -> Vec<u8> {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("16.s");
let bin_path = tmp.path().join("16.o");
std::fs::write(&path, text).unwrap();
let mut clang = std::process::Command::new("clang");
clang.args(["-target", "riscv32-unknown-none-elf", march_flag, "-c"]);
clang.arg(path);
clang.arg("-o");
clang.arg(&bin_path);
let out = clang.output().unwrap();
if !out.status.success() {
panic!(
"failed to run clang:\n{}",
String::from_utf8_lossy(&out.stderr)
);
}
let obj = std::fs::read(bin_path).unwrap();
let obj = object::File::parse(&*obj).unwrap();
let section = obj.section_by_name(TEST_SECTION_NAME).unwrap();
let data = section.data().unwrap();
data.to_owned()
}
}