Compare commits

...

4 commits

Author SHA1 Message Date
92dc3cb349 try to fix
Some checks failed
Rust / build (push) Has been cancelled
2025-03-23 12:21:39 +01:00
218968acb0 try compressed 2025-03-23 12:19:27 +01:00
16bd9072df fix overflow 2025-03-23 11:43:04 +01:00
d369850db1 add non-compressed roundtrip tests 2025-03-23 11:34:51 +01:00
5 changed files with 398 additions and 32 deletions

87
Cargo.lock generated
View file

@ -2,6 +2,12 @@
# It is not intended for manual editing.
version = 4
[[package]]
name = "adler2"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
[[package]]
name = "bitflags"
version = "2.9.0"
@ -14,6 +20,15 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "crc32fast"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
dependencies = [
"cfg-if",
]
[[package]]
name = "errno"
version = "0.3.10"
@ -40,6 +55,16 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "flate2"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "getrandom"
version = "0.3.1"
@ -70,6 +95,32 @@ version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db9c683daf087dc577b7506e9695b3d556a9f3849903fa28186283afd6809e9"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "miniz_oxide"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5"
dependencies = [
"adler2",
]
[[package]]
name = "object"
version = "0.36.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
dependencies = [
"flate2",
"memchr",
"ruzstd",
]
[[package]]
name = "once_cell"
version = "1.20.3"
@ -106,17 +157,35 @@ dependencies = [
"tempfile",
]
[[package]]
name = "ruzstd"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fad02996bfc73da3e301efe90b1837be9ed8f4a462b6ed410aa35d00381de89f"
dependencies = [
"twox-hash",
]
[[package]]
name = "rvdc"
version = "0.1.0"
version = "0.1.1"
dependencies = [
"object",
"tempfile",
]
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "tempfile"
version = "3.18.0"
version = "3.19.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c317e0a526ee6120d8dabad239c8dadca62b24b6f168914bbbc8e2fb1f0e567"
checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf"
dependencies = [
"cfg-if",
"fastrand",
"getrandom",
"once_cell",
@ -124,6 +193,16 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "twox-hash"
version = "1.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
dependencies = [
"cfg-if",
"static_assertions",
]
[[package]]
name = "wasi"
version = "0.13.3+wasi-0.2.2"

View file

@ -1,3 +1,11 @@
## 0.1.1
- Add `Fence::is_tso`
- Several fixes to disassembly
- Forbid overflow for `slli` immediates
- Add `Zihintpause` extension to README (it was already implemented in disassembly)
- Add exhaustive tests
## 0.1.0
Initial release.

View file

@ -1,6 +1,6 @@
[package]
name = "rvdc"
version = "0.1.0"
version = "0.1.1"
description = "RISC-V instruction decoder"
repository = "https://github.com/Noratrieb/rustv32i"
edition = "2024"
@ -12,3 +12,7 @@ license = "MIT OR Apache-2.0"
[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(slow_tests)'] }
[dev-dependencies]
object = "0.36.7"
tempfile = "3.19.1"

View file

@ -1,7 +1,7 @@
RISC-V instruction decoder.
The main function is [`Inst::decode`], which will decode an instruction into the [`Inst`] enum.
The [`std::fmt::Display`] impl of [`Inst`] provides disassembly functionality
The [`core::fmt::Display`] impl of [`Inst`] provides disassembly functionality
(note that the precise output of that implementation is not considered stable).
# Register size support
@ -25,6 +25,7 @@ The decoder currently supports the following instructions:
- [x] Zalrsc standard extension
- [x] Zaamo standard extension
- [x] C standard extension
- [x] Zihintpause standard extension
More extensions may be implemented in the future.
@ -62,8 +63,10 @@ This crate supports `no_std` without the `alloc` crate.
# Testing
This crate is currently tested as part of an emulator, which tests many different kinds of instructions.
In the future, more tests of the decoder specifically may be added.
This crate is tested by exhaustively going through all 32 bit values that are valid instructions and roundtripping the disassembly through the clang assembler, ensuring it remains the same.
This is not yet done for compressed instructions.
Additionally, it's also tested as part of an emulator, which tests many different kinds of instructions.
# MSRV

View file

@ -317,6 +317,29 @@ pub struct DecodeError {
}
impl Fence {
/// Whether this is a `fence.tso`.
/// `fm=0b1000` and `RW,RW`
pub fn is_tso(&self) -> bool {
matches!(
self,
Self {
fm: 0b1000,
pred: FenceSet {
device_input: _,
device_output: _,
memory_read: true,
memory_write: true
},
succ: FenceSet {
device_input: _,
device_output: _,
memory_read: true,
memory_write: true
},
..
}
)
}
/// Whether this fence indicates a `pause` assembler pseudoinstruction.
pub fn is_pause(&self) -> bool {
self.pred
@ -397,10 +420,12 @@ impl Display for Inst {
Inst::Sh { offset, src, base } => write!(f, "sh {src}, {}({base})", offset as i32),
Inst::Sw { offset, src, base } => write!(f, "sw {src}, {}({base})", offset as i32),
Inst::Addi { imm, dest, src1 } => {
if dest.0 == 0 && src1.0 == 0 {
if dest.0 == 0 && src1.0 == 0 && imm == 0 {
write!(f, "nop")
} else if src1.0 == 0 {
write!(f, "li {dest}, {}", imm as i32)
} else if imm == 0 {
write!(f, "mv {dest}, {src1}")
} else {
write!(f, "addi {dest}, {src1}, {}", imm as i32)
}
@ -446,11 +471,7 @@ impl Display for Inst {
src1: rs1,
} => write!(f, "srai {dest}, {rs1}, {}", imm as i32),
Inst::Add { dest, src1, src2 } => {
if src1.0 == 0 {
write!(f, "mv {dest}, {src2}")
} else {
write!(f, "add {dest}, {src1}, {src2}")
}
write!(f, "add {dest}, {src1}, {src2}")
}
Inst::Sub { dest, src1, src2 } => write!(f, "sub {dest}, {src1}, {src2}"),
Inst::Sll { dest, src1, src2 } => write!(f, "sll {dest}, {src1}, {src2}"),
@ -462,7 +483,7 @@ impl Display for Inst {
Inst::Or { dest, src1, src2 } => write!(f, "or {dest}, {src1}, {src2}"),
Inst::And { dest, src1, src2 } => write!(f, "and {dest}, {src1}, {src2}"),
Inst::Fence { fence } => match fence.fm {
0b1000 => write!(f, "fence.TSO"),
_ if fence.is_tso() => write!(f, "fence.tso"),
0b0000 if fence.is_pause() => {
write!(f, "pause")
}
@ -491,25 +512,33 @@ impl Display for Inst {
dest,
addr,
src,
} => write!(f, "am{op}.w{order} {dest}, {src}, ({addr})",),
} => write!(f, "amo{op}.w{order} {dest}, {src}, ({addr})",),
}
}
}
impl Display for FenceSet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut has = false;
if self.device_input {
has = true;
write!(f, "i")?;
}
if self.device_output {
has = true;
write!(f, "o")?;
}
if self.memory_read {
has = true;
write!(f, "r")?;
}
if self.memory_write {
has = true;
write!(f, "w")?;
}
if !has {
write!(f, "0")?;
}
Ok(())
}
}
@ -718,6 +747,22 @@ fn decode_error(instruction: impl Into<InstCode>, unexpected_field: &'static str
impl Inst {
/// Whether the first byte of an instruction indicates a compressed or uncompressed instruction.
///
/// # Examples
///
/// ```rust
/// // addi sp, sp, -0x20 (compressed)
/// let x = 0x1101_u32;
/// assert!(rvdc::Inst::first_byte_is_compressed(x.to_le_bytes()[0]));
/// let x = 0x1101_u16;
/// assert!(rvdc::Inst::first_byte_is_compressed(x.to_le_bytes()[0]));
/// ```
///
/// ```rust
/// // auipc t1, 0xa
/// let x = 0x0000a317_u32;
/// assert!(!rvdc::Inst::first_byte_is_compressed(x.to_le_bytes()[0]));
/// ```
pub fn first_byte_is_compressed(byte: u8) -> bool {
(byte & 0b11) != 0b11
}
@ -759,11 +804,18 @@ impl Inst {
// C0
0b00 => match code.funct3() {
// C.ADDI4SPN -> addi \rd', sp, \imm
0b000 => Inst::Addi {
imm: code.immediate_u(&[(5..=5, 3), (6..=6, 2), (7..=10, 6), (11..=12, 4)]),
dest: code.rs2_short(),
src1: Reg::SP,
},
0b000 => {
let imm =
code.immediate_u(&[(5..=5, 3), (6..=6, 2), (7..=10, 6), (11..=12, 4)]);
if imm == 0 {
return Err(decode_error(code, "uimm=0 for C.ADDISPN is reserved"));
}
Inst::Addi {
imm,
dest: code.rs2_short(),
src1: Reg::SP,
}
}
// C.LW -> lw \dest \offset(\base)
0b010 => Inst::Lw {
offset: code.immediate_u(&[(10..=12, 3), (5..=5, 6), (6..=6, 2)]),
@ -948,7 +1000,7 @@ impl Inst {
// C.SLLI -> slli \rd, \rd, \imm
0b000 => {
if code.extract(12..=12) != 0 {
return Err(decode_error(code, "imm"));
return Err(decode_error(code, "C.SLLI shift amount must be zero"));
}
Inst::Slli {
imm: code.immediate_u(&[(2..=6, 0), (12..=12, 5)]),
@ -1016,7 +1068,7 @@ impl Inst {
},
_ => return Err(decode_error(code, "funct3")),
},
_ => return Err(decode_error(code, "op")),
_ => return Err(decode_error(code, "instruction is not compressed")),
};
Ok(inst)
}
@ -1163,11 +1215,16 @@ impl Inst {
dest: code.rd(),
src1: code.rs1(),
},
0b001 => Inst::Slli {
imm: code.imm_i(),
dest: code.rd(),
src1: code.rs1(),
},
0b001 => {
if code.funct7() != 0 {
return Err(decode_error(code, "slli shift overflow"));
}
Inst::Slli {
imm: code.imm_i(),
dest: code.rd(),
src1: code.rs1(),
}
}
0b101 => match code.funct7() {
0b0000000 => Inst::Srli {
imm: code.rs2_imm(),
@ -1323,12 +1380,21 @@ impl Inst {
#[cfg(test)]
mod tests {
extern crate std;
use std::io::Write;
use std::prelude::rust_2024::*;
use std::fmt::Write as _;
use std::io::Write as _;
use object::Object;
use object::ObjectSection;
use crate::Fence;
use crate::FenceSet;
use crate::Inst;
use crate::Reg;
#[test]
#[cfg_attr(not(slow_tests), ignore)]
#[cfg_attr(not(slow_tests), ignore = "cfg(slow_tests) not enabled")]
fn exhaustive_decode_no_panic() {
for i in 0..u32::MAX {
if (i % (2 << 25)) == 0 {
@ -1337,13 +1403,219 @@ mod tests {
std::print!("\r{}{}", "#".repeat(done), "-".repeat(100 - done));
std::io::stdout().flush().unwrap();
}
let _ = super::Inst::decode(i);
let _ = Inst::decode(i);
}
let _ = super::Inst::decode(u32::MAX);
let _ = Inst::decode(u32::MAX);
}
#[test]
fn size_of_instruction() {
assert!(size_of::<Inst>() <= 12);
}
const TEST_SECTION_NAME: &str = ".text.rvdctest";
/// Some instruction fields are reserved and not printed in the assembly,
/// but should be ignored instead of rejected by the decoder.
/// We filter out non-canonical forms of these instructions as they do not roundtrip.
fn is_inst_supposed_to_roundtrip(inst: &Inst) -> bool {
match inst {
// Canonical fence.tso
Inst::Fence {
fence:
Fence {
fm: 0b1000,
pred:
FenceSet {
device_input: false,
device_output: false,
memory_read: true,
memory_write: true,
},
succ:
FenceSet {
device_input: false,
device_output: false,
memory_read: true,
memory_write: true,
},
src: Reg::ZERO,
dest: Reg::ZERO,
},
} => true,
// Only canonical normal fence with x0 and x0
Inst::Fence {
fence:
Fence {
fm: 0b0000,
pred: _,
succ: _,
src: Reg::ZERO,
dest: Reg::ZERO,
},
} => true,
// All other fences are reserved
Inst::Fence { .. } => false,
_ => true,
}
}
fn is_compressed_inst_supposed_to_roundtrip(inst: &Inst) -> bool {
match inst {
// HINT
Inst::Addi {
dest: Reg::ZERO, ..
} => false,
// This does roundtrip, but only through C.MV, not C.ADDI
Inst::Addi { imm: 0, .. } => false,
// This does rountrip, but not through C.ADDI
Inst::Addi {
dest: Reg::SP,
src1: Reg::SP,
..
} => false,
// HINT
Inst::Slli {
dest: Reg::ZERO, ..
}
| Inst::Slli { imm: 0, .. }
| Inst::Srli {
dest: Reg::ZERO, ..
}
| Inst::Srli { imm: 0, .. }
| Inst::Srai {
dest: Reg::ZERO, ..
}
| Inst::Srai { imm: 0, .. } => false,
// HINT
Inst::Lui {
dest: Reg::ZERO, ..
} => false,
_ => true,
}
}
// turn this up for debugging, only run the test directly in these cases
const SKIP_CHUNKS: u32 = 0;
#[test]
fn ensure_no_chunks_are_skipped() {
assert_eq!(SKIP_CHUNKS, 0);
}
#[test]
#[cfg_attr(not(slow_tests), ignore = "cfg(slow_tests) not enabled")]
fn normal_clang_roundtrip() {
const CHUNKS: u32 = 128;
const CHUNK_SIZE: u32 = u32::MAX / CHUNKS;
for (chunk_idx, start) in ((SKIP_CHUNKS * CHUNK_SIZE)..u32::MAX)
.step_by(CHUNK_SIZE as usize)
.enumerate()
{
let chunk_idx = chunk_idx + SKIP_CHUNKS as usize;
let start_time = std::time::Instant::now();
let insts = (start..=start.saturating_add(CHUNK_SIZE))
.filter_map(|code| Some((code, Inst::decode_normal(code).ok()?)))
.filter(|(_, inst)| is_inst_supposed_to_roundtrip(inst))
.collect::<Vec<_>>();
let mut text = std::format!(".section {TEST_SECTION_NAME}\n.globl _start\n_start:\n");
for (_, inst) in &insts {
writeln!(text, " {inst}").unwrap();
}
let data = clang_assemble(&text, "-march=rv32ima_zihintpause");
for (i, result_code) in data.chunks(4).enumerate() {
let result_code = u32::from_le_bytes(result_code.try_into().unwrap());
assert_eq!(
insts[i].0, result_code,
"failed to rountrip!\n\
instruction `{:0>32b}` failed to rountrip\n\
resulted in `{:0>32b}` instead.\n\
disassembly of original instruction: `{}`",
insts[i].0, result_code, insts[i].1
);
}
let elapsed = start_time.elapsed();
let chunk_number = chunk_idx + 1;
let remaining = elapsed * (CHUNKS.saturating_sub(chunk_number as u32));
writeln!(
std::io::stdout(),
"Completed chunk {chunk_number}/{CHUNKS} (estimated {remaining:?} remaining)",
)
.unwrap();
}
}
#[test]
#[ignore = "this doesn't quite work yet because there is often a non-canonical encoding"]
fn compressed_clang_roundtrip() {
let insts = (0..=u16::MAX)
.filter_map(|code| Some((code, Inst::decode_compressed(code).ok()?)))
.filter(|(_, inst)| is_compressed_inst_supposed_to_roundtrip(inst))
.collect::<Vec<_>>();
let mut text = std::format!(".section {TEST_SECTION_NAME}\n.globl _start\n_start:\n");
for (_, inst) in &insts {
writeln!(text, " {inst}").unwrap();
}
let data = clang_assemble(&text, "-march=rv32imac");
for (i, result_code) in data.chunks(2).enumerate() {
assert!(
Inst::first_byte_is_compressed(result_code[0]),
"failed to roundtrip {i}th instruction!\n\
instruction `{:0>16b}` resulted in an uncompressed instruction from clang\n\
disassembly of original instruction: `{}`",
insts[i].0,
insts[i].1
);
let result_code = u16::from_le_bytes(result_code.try_into().unwrap());
assert_eq!(
insts[i].0, result_code,
"failed to rountrip {i}th instruction!\n\
instruction `{:0>16b}` failed to rountrip\n\
resulted in `{:0>16b}` instead.\n\
disassembly of original instruction: `{}`",
insts[i].0, result_code, insts[i].1
);
}
}
fn clang_assemble(text: &str, march_flag: &str) -> Vec<u8> {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("16.s");
let bin_path = tmp.path().join("16.o");
std::fs::write(&path, text).unwrap();
let mut clang = std::process::Command::new("clang");
clang.args(["-target", "riscv32-unknown-none-elf", march_flag, "-c"]);
clang.arg(path);
clang.arg("-o");
clang.arg(&bin_path);
let out = clang.output().unwrap();
if !out.status.success() {
panic!(
"failed to run clang:\n{}",
String::from_utf8_lossy(&out.stderr)
);
}
let obj = std::fs::read(bin_path).unwrap();
let obj = object::File::parse(&*obj).unwrap();
let section = obj.section_by_name(TEST_SECTION_NAME).unwrap();
let data = section.data().unwrap();
data.to_owned()
}
}