diff --git a/Cargo.lock b/Cargo.lock index 0c37f38..237b555 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,12 +2,6 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "adler2" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" - [[package]] name = "bitflags" version = "2.9.0" @@ -20,15 +14,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "crc32fast" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" -dependencies = [ - "cfg-if", -] - [[package]] name = "errno" version = "0.3.10" @@ -55,16 +40,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "flate2" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - [[package]] name = "getrandom" version = "0.3.1" @@ -95,32 +70,6 @@ version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db9c683daf087dc577b7506e9695b3d556a9f3849903fa28186283afd6809e9" -[[package]] -name = "memchr" -version = "2.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" - -[[package]] -name = "miniz_oxide" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5" -dependencies = [ - "adler2", -] - -[[package]] -name = "object" -version = "0.36.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" -dependencies = [ - "flate2", - "memchr", - "ruzstd", -] - [[package]] name = "once_cell" version = "1.20.3" @@ -157,35 +106,17 @@ dependencies = [ "tempfile", ] -[[package]] -name = "ruzstd" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad02996bfc73da3e301efe90b1837be9ed8f4a462b6ed410aa35d00381de89f" -dependencies = [ - "twox-hash", -] - [[package]] name = "rvdc" -version = "0.1.1" -dependencies = [ - "object", - "tempfile", -] - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +version = "0.1.0" [[package]] name = "tempfile" -version = "3.19.1" +version = "3.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" +checksum = "2c317e0a526ee6120d8dabad239c8dadca62b24b6f168914bbbc8e2fb1f0e567" dependencies = [ + "cfg-if", "fastrand", "getrandom", "once_cell", @@ -193,16 +124,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "twox-hash" -version = "1.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" -dependencies = [ - "cfg-if", - "static_assertions", -] - [[package]] name = "wasi" version = "0.13.3+wasi-0.2.2" diff --git a/rvdc/CHANGELOG.md b/rvdc/CHANGELOG.md index 90efab4..bd05a0f 100644 --- a/rvdc/CHANGELOG.md +++ b/rvdc/CHANGELOG.md @@ -1,11 +1,3 @@ -## 0.1.1 - -- Add `Fence::is_tso` -- Several fixes to disassembly -- Forbid overflow for `slli` immediates -- Add `Zihintpause` extension to README (it was already implemented in disassembly) -- Add exhaustive tests - ## 0.1.0 Initial release. diff --git a/rvdc/Cargo.toml b/rvdc/Cargo.toml index de87c41..b3bdb0e 100644 --- a/rvdc/Cargo.toml +++ b/rvdc/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rvdc" -version = "0.1.1" +version = "0.1.0" description = "RISC-V instruction decoder" repository = "https://github.com/Noratrieb/rustv32i" edition = "2024" @@ -12,7 +12,3 @@ license = "MIT OR Apache-2.0" [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ['cfg(slow_tests)'] } - -[dev-dependencies] -object = "0.36.7" -tempfile = "3.19.1" diff --git a/rvdc/README.md b/rvdc/README.md index 3462ecd..f7093de 100644 --- a/rvdc/README.md +++ b/rvdc/README.md @@ -1,7 +1,7 @@ RISC-V instruction decoder. The main function is [`Inst::decode`], which will decode an instruction into the [`Inst`] enum. -The [`core::fmt::Display`] impl of [`Inst`] provides disassembly functionality +The [`std::fmt::Display`] impl of [`Inst`] provides disassembly functionality (note that the precise output of that implementation is not considered stable). # Register size support @@ -25,7 +25,6 @@ The decoder currently supports the following instructions: - [x] Zalrsc standard extension - [x] Zaamo standard extension - [x] C standard extension -- [x] Zihintpause standard extension More extensions may be implemented in the future. @@ -63,10 +62,8 @@ This crate supports `no_std` without the `alloc` crate. # Testing -This crate is tested by exhaustively going through all 32 bit values that are valid instructions and roundtripping the disassembly through the clang assembler, ensuring it remains the same. -This is not yet done for compressed instructions. - -Additionally, it's also tested as part of an emulator, which tests many different kinds of instructions. +This crate is currently tested as part of an emulator, which tests many different kinds of instructions. +In the future, more tests of the decoder specifically may be added. # MSRV diff --git a/rvdc/src/lib.rs b/rvdc/src/lib.rs index f097254..2f82845 100644 --- a/rvdc/src/lib.rs +++ b/rvdc/src/lib.rs @@ -317,29 +317,6 @@ pub struct DecodeError { } impl Fence { - /// Whether this is a `fence.tso`. - /// `fm=0b1000` and `RW,RW` - pub fn is_tso(&self) -> bool { - matches!( - self, - Self { - fm: 0b1000, - pred: FenceSet { - device_input: _, - device_output: _, - memory_read: true, - memory_write: true - }, - succ: FenceSet { - device_input: _, - device_output: _, - memory_read: true, - memory_write: true - }, - .. - } - ) - } /// Whether this fence indicates a `pause` assembler pseudoinstruction. pub fn is_pause(&self) -> bool { self.pred @@ -420,12 +397,10 @@ impl Display for Inst { Inst::Sh { offset, src, base } => write!(f, "sh {src}, {}({base})", offset as i32), Inst::Sw { offset, src, base } => write!(f, "sw {src}, {}({base})", offset as i32), Inst::Addi { imm, dest, src1 } => { - if dest.0 == 0 && src1.0 == 0 && imm == 0 { + if dest.0 == 0 && src1.0 == 0 { write!(f, "nop") } else if src1.0 == 0 { write!(f, "li {dest}, {}", imm as i32) - } else if imm == 0 { - write!(f, "mv {dest}, {src1}") } else { write!(f, "addi {dest}, {src1}, {}", imm as i32) } @@ -471,7 +446,11 @@ impl Display for Inst { src1: rs1, } => write!(f, "srai {dest}, {rs1}, {}", imm as i32), Inst::Add { dest, src1, src2 } => { - write!(f, "add {dest}, {src1}, {src2}") + if src1.0 == 0 { + write!(f, "mv {dest}, {src2}") + } else { + write!(f, "add {dest}, {src1}, {src2}") + } } Inst::Sub { dest, src1, src2 } => write!(f, "sub {dest}, {src1}, {src2}"), Inst::Sll { dest, src1, src2 } => write!(f, "sll {dest}, {src1}, {src2}"), @@ -483,7 +462,7 @@ impl Display for Inst { Inst::Or { dest, src1, src2 } => write!(f, "or {dest}, {src1}, {src2}"), Inst::And { dest, src1, src2 } => write!(f, "and {dest}, {src1}, {src2}"), Inst::Fence { fence } => match fence.fm { - _ if fence.is_tso() => write!(f, "fence.tso"), + 0b1000 => write!(f, "fence.TSO"), 0b0000 if fence.is_pause() => { write!(f, "pause") } @@ -512,33 +491,25 @@ impl Display for Inst { dest, addr, src, - } => write!(f, "amo{op}.w{order} {dest}, {src}, ({addr})",), + } => write!(f, "am{op}.w{order} {dest}, {src}, ({addr})",), } } } impl Display for FenceSet { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut has = false; if self.device_input { - has = true; write!(f, "i")?; } if self.device_output { - has = true; write!(f, "o")?; } if self.memory_read { - has = true; write!(f, "r")?; } if self.memory_write { - has = true; write!(f, "w")?; } - if !has { - write!(f, "0")?; - } Ok(()) } } @@ -747,22 +718,6 @@ fn decode_error(instruction: impl Into, unexpected_field: &'static str impl Inst { /// Whether the first byte of an instruction indicates a compressed or uncompressed instruction. - /// - /// # Examples - /// - /// ```rust - /// // addi sp, sp, -0x20 (compressed) - /// let x = 0x1101_u32; - /// assert!(rvdc::Inst::first_byte_is_compressed(x.to_le_bytes()[0])); - /// let x = 0x1101_u16; - /// assert!(rvdc::Inst::first_byte_is_compressed(x.to_le_bytes()[0])); - /// ``` - /// - /// ```rust - /// // auipc t1, 0xa - /// let x = 0x0000a317_u32; - /// assert!(!rvdc::Inst::first_byte_is_compressed(x.to_le_bytes()[0])); - /// ``` pub fn first_byte_is_compressed(byte: u8) -> bool { (byte & 0b11) != 0b11 } @@ -804,18 +759,11 @@ impl Inst { // C0 0b00 => match code.funct3() { // C.ADDI4SPN -> addi \rd', sp, \imm - 0b000 => { - let imm = - code.immediate_u(&[(5..=5, 3), (6..=6, 2), (7..=10, 6), (11..=12, 4)]); - if imm == 0 { - return Err(decode_error(code, "uimm=0 for C.ADDISPN is reserved")); - } - Inst::Addi { - imm, - dest: code.rs2_short(), - src1: Reg::SP, - } - } + 0b000 => Inst::Addi { + imm: code.immediate_u(&[(5..=5, 3), (6..=6, 2), (7..=10, 6), (11..=12, 4)]), + dest: code.rs2_short(), + src1: Reg::SP, + }, // C.LW -> lw \dest \offset(\base) 0b010 => Inst::Lw { offset: code.immediate_u(&[(10..=12, 3), (5..=5, 6), (6..=6, 2)]), @@ -1000,7 +948,7 @@ impl Inst { // C.SLLI -> slli \rd, \rd, \imm 0b000 => { if code.extract(12..=12) != 0 { - return Err(decode_error(code, "C.SLLI shift amount must be zero")); + return Err(decode_error(code, "imm")); } Inst::Slli { imm: code.immediate_u(&[(2..=6, 0), (12..=12, 5)]), @@ -1068,7 +1016,7 @@ impl Inst { }, _ => return Err(decode_error(code, "funct3")), }, - _ => return Err(decode_error(code, "instruction is not compressed")), + _ => return Err(decode_error(code, "op")), }; Ok(inst) } @@ -1215,16 +1163,11 @@ impl Inst { dest: code.rd(), src1: code.rs1(), }, - 0b001 => { - if code.funct7() != 0 { - return Err(decode_error(code, "slli shift overflow")); - } - Inst::Slli { - imm: code.imm_i(), - dest: code.rd(), - src1: code.rs1(), - } - } + 0b001 => Inst::Slli { + imm: code.imm_i(), + dest: code.rd(), + src1: code.rs1(), + }, 0b101 => match code.funct7() { 0b0000000 => Inst::Srli { imm: code.rs2_imm(), @@ -1380,21 +1323,12 @@ impl Inst { #[cfg(test)] mod tests { extern crate std; - use std::prelude::rust_2024::*; + use std::io::Write; - use std::fmt::Write as _; - use std::io::Write as _; - - use object::Object; - use object::ObjectSection; - - use crate::Fence; - use crate::FenceSet; use crate::Inst; - use crate::Reg; #[test] - #[cfg_attr(not(slow_tests), ignore = "cfg(slow_tests) not enabled")] + #[cfg_attr(not(slow_tests), ignore)] fn exhaustive_decode_no_panic() { for i in 0..u32::MAX { if (i % (2 << 25)) == 0 { @@ -1403,219 +1337,13 @@ mod tests { std::print!("\r{}{}", "#".repeat(done), "-".repeat(100 - done)); std::io::stdout().flush().unwrap(); } - let _ = Inst::decode(i); + let _ = super::Inst::decode(i); } - let _ = Inst::decode(u32::MAX); + let _ = super::Inst::decode(u32::MAX); } #[test] fn size_of_instruction() { assert!(size_of::() <= 12); } - - const TEST_SECTION_NAME: &str = ".text.rvdctest"; - - /// Some instruction fields are reserved and not printed in the assembly, - /// but should be ignored instead of rejected by the decoder. - /// We filter out non-canonical forms of these instructions as they do not roundtrip. - fn is_inst_supposed_to_roundtrip(inst: &Inst) -> bool { - match inst { - // Canonical fence.tso - Inst::Fence { - fence: - Fence { - fm: 0b1000, - pred: - FenceSet { - device_input: false, - device_output: false, - memory_read: true, - memory_write: true, - }, - succ: - FenceSet { - device_input: false, - device_output: false, - memory_read: true, - memory_write: true, - }, - src: Reg::ZERO, - dest: Reg::ZERO, - }, - } => true, - // Only canonical normal fence with x0 and x0 - Inst::Fence { - fence: - Fence { - fm: 0b0000, - pred: _, - succ: _, - src: Reg::ZERO, - dest: Reg::ZERO, - }, - } => true, - // All other fences are reserved - Inst::Fence { .. } => false, - _ => true, - } - } - - fn is_compressed_inst_supposed_to_roundtrip(inst: &Inst) -> bool { - match inst { - // HINT - Inst::Addi { - dest: Reg::ZERO, .. - } => false, - // This does roundtrip, but only through C.MV, not C.ADDI - Inst::Addi { imm: 0, .. } => false, - // This does rountrip, but not through C.ADDI - Inst::Addi { - dest: Reg::SP, - src1: Reg::SP, - .. - } => false, - // HINT - Inst::Slli { - dest: Reg::ZERO, .. - } - | Inst::Slli { imm: 0, .. } - | Inst::Srli { - dest: Reg::ZERO, .. - } - | Inst::Srli { imm: 0, .. } - | Inst::Srai { - dest: Reg::ZERO, .. - } - | Inst::Srai { imm: 0, .. } => false, - // HINT - Inst::Lui { - dest: Reg::ZERO, .. - } => false, - _ => true, - } - } - - // turn this up for debugging, only run the test directly in these cases - const SKIP_CHUNKS: u32 = 0; - - #[test] - fn ensure_no_chunks_are_skipped() { - assert_eq!(SKIP_CHUNKS, 0); - } - - #[test] - #[cfg_attr(not(slow_tests), ignore = "cfg(slow_tests) not enabled")] - fn normal_clang_roundtrip() { - const CHUNKS: u32 = 128; - const CHUNK_SIZE: u32 = u32::MAX / CHUNKS; - - for (chunk_idx, start) in ((SKIP_CHUNKS * CHUNK_SIZE)..u32::MAX) - .step_by(CHUNK_SIZE as usize) - .enumerate() - { - let chunk_idx = chunk_idx + SKIP_CHUNKS as usize; - - let start_time = std::time::Instant::now(); - - let insts = (start..=start.saturating_add(CHUNK_SIZE)) - .filter_map(|code| Some((code, Inst::decode_normal(code).ok()?))) - .filter(|(_, inst)| is_inst_supposed_to_roundtrip(inst)) - .collect::>(); - - let mut text = std::format!(".section {TEST_SECTION_NAME}\n.globl _start\n_start:\n"); - for (_, inst) in &insts { - writeln!(text, " {inst}").unwrap(); - } - - let data = clang_assemble(&text, "-march=rv32ima_zihintpause"); - - for (i, result_code) in data.chunks(4).enumerate() { - let result_code = u32::from_le_bytes(result_code.try_into().unwrap()); - - assert_eq!( - insts[i].0, result_code, - "failed to rountrip!\n\ - instruction `{:0>32b}` failed to rountrip\n\ - resulted in `{:0>32b}` instead.\n\ - disassembly of original instruction: `{}`", - insts[i].0, result_code, insts[i].1 - ); - } - - let elapsed = start_time.elapsed(); - let chunk_number = chunk_idx + 1; - let remaining = elapsed * (CHUNKS.saturating_sub(chunk_number as u32)); - - writeln!( - std::io::stdout(), - "Completed chunk {chunk_number}/{CHUNKS} (estimated {remaining:?} remaining)", - ) - .unwrap(); - } - } - - #[test] - #[ignore = "this doesn't quite work yet because there is often a non-canonical encoding"] - fn compressed_clang_roundtrip() { - let insts = (0..=u16::MAX) - .filter_map(|code| Some((code, Inst::decode_compressed(code).ok()?))) - .filter(|(_, inst)| is_compressed_inst_supposed_to_roundtrip(inst)) - .collect::>(); - - let mut text = std::format!(".section {TEST_SECTION_NAME}\n.globl _start\n_start:\n"); - for (_, inst) in &insts { - writeln!(text, " {inst}").unwrap(); - } - - let data = clang_assemble(&text, "-march=rv32imac"); - - for (i, result_code) in data.chunks(2).enumerate() { - assert!( - Inst::first_byte_is_compressed(result_code[0]), - "failed to roundtrip {i}th instruction!\n\ - instruction `{:0>16b}` resulted in an uncompressed instruction from clang\n\ - disassembly of original instruction: `{}`", - insts[i].0, - insts[i].1 - ); - - let result_code = u16::from_le_bytes(result_code.try_into().unwrap()); - - assert_eq!( - insts[i].0, result_code, - "failed to rountrip {i}th instruction!\n\ - instruction `{:0>16b}` failed to rountrip\n\ - resulted in `{:0>16b}` instead.\n\ - disassembly of original instruction: `{}`", - insts[i].0, result_code, insts[i].1 - ); - } - } - - fn clang_assemble(text: &str, march_flag: &str) -> Vec { - let tmp = tempfile::tempdir().unwrap(); - - let path = tmp.path().join("16.s"); - let bin_path = tmp.path().join("16.o"); - std::fs::write(&path, text).unwrap(); - - let mut clang = std::process::Command::new("clang"); - clang.args(["-target", "riscv32-unknown-none-elf", march_flag, "-c"]); - clang.arg(path); - clang.arg("-o"); - clang.arg(&bin_path); - let out = clang.output().unwrap(); - if !out.status.success() { - panic!( - "failed to run clang:\n{}", - String::from_utf8_lossy(&out.stderr) - ); - } - - let obj = std::fs::read(bin_path).unwrap(); - let obj = object::File::parse(&*obj).unwrap(); - let section = obj.section_by_name(TEST_SECTION_NAME).unwrap(); - let data = section.data().unwrap(); - data.to_owned() - } }