From ee1a18f7c3d67a00a0b680d8337197fbf0ccee19 Mon Sep 17 00:00:00 2001 From: Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> Date: Sat, 11 Feb 2023 18:33:34 +0100 Subject: [PATCH] parse header --- Cargo.lock | 97 +++++++++++++++++++++++++++++ elven-parser/Cargo.toml | 3 + elven-parser/src/lib.rs | 2 +- elven-parser/src/raw.rs | 114 ++++++++++++++++++++++++++++++++++ test_data/.gitignore | 1 + test_data/create_test_data.sh | 8 +++ test_data/hello_world.rs | 3 + 7 files changed, 227 insertions(+), 1 deletion(-) create mode 100644 elven-parser/src/raw.rs create mode 100644 test_data/.gitignore create mode 100755 test_data/create_test_data.sh create mode 100644 test_data/hello_world.rs diff --git a/Cargo.lock b/Cargo.lock index 3db960a..312547c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,3 +5,100 @@ version = 3 [[package]] name = "elven-parser" version = "0.1.0" +dependencies = [ + "memmap2", + "nom", + "thiserror", +] + +[[package]] +name = "libc" +version = "0.2.139" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "memmap2" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc" +dependencies = [ + "libc", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "proc-macro2" +version = "1.0.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" diff --git a/elven-parser/Cargo.toml b/elven-parser/Cargo.toml index ba8d4ab..ee797ba 100644 --- a/elven-parser/Cargo.toml +++ b/elven-parser/Cargo.toml @@ -6,3 +6,6 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +memmap2 = "0.5.8" +nom = "7.1.3" +thiserror = "1.0.38" diff --git a/elven-parser/src/lib.rs b/elven-parser/src/lib.rs index 4c70946..1a3f766 100644 --- a/elven-parser/src/lib.rs +++ b/elven-parser/src/lib.rs @@ -1 +1 @@ -pub fn yeet() {} +pub mod raw; diff --git a/elven-parser/src/raw.rs b/elven-parser/src/raw.rs new file mode 100644 index 0000000..620974c --- /dev/null +++ b/elven-parser/src/raw.rs @@ -0,0 +1,114 @@ +//! Structures and parsers for ELF64. ELF32 can knock itself out. + +use std::mem; + +#[derive(Debug)] +pub struct Addr(u64); +#[derive(Debug)] +pub struct Offset(u64); +#[derive(Debug)] +pub struct Section(u16); +#[derive(Debug)] +pub struct Versym(u16); + +#[derive(Debug, Clone, thiserror::Error)] +pub enum ElfParseError { + #[error("The file is too small. Expected at least {0} bytes, found {1} bytes")] + FileTooSmall(usize, usize), + #[error("The input is not aligned in memory. Expected align {0}, found align {1}")] + UnalignedInput(usize, usize), + #[error("The magic of the file did not match. Maybe it's not an ELF file?. Found {0:x?}")] + WrongMagic([u8; 4]), +} + +/// A raw ELF. Does not come with cute ears for now. +#[derive(Debug)] +pub struct Elf<'a> { + pub header: &'a ElfHeader, +} + +#[derive(Debug)] +#[repr(C)] +pub struct ElfHeader { + pub ident: [u8; 16], + pub r#type: u16, + pub machine: u16, + pub version: u32, + pub entry: Addr, + pub phoff: Offset, + pub shoff: Offset, + pub flags: u32, + pub ehsize: u16, + pub phentsize: u16, + pub phnum: u16, + pub shentsize: u16, + pub shnum: u16, + pub shstrndex: u16, +} + +impl<'a> Elf<'a> { + pub fn parse(input: &'a [u8]) -> Result { + const HEADER_SIZE: usize = mem::size_of::(); + const HEADER_ALIGN: usize = mem::align_of::(); + + if input.len() < HEADER_SIZE { + return Err(ElfParseError::FileTooSmall(HEADER_SIZE, input.len())); + } + + let input_ptr = input as *const [u8]; + + let input_addr = input_ptr as *const u8 as usize; + let align = input_addr.trailing_zeros() as usize; + + if align < HEADER_ALIGN { + return Err(ElfParseError::UnalignedInput(HEADER_ALIGN, align)); + } + + let header = input_ptr as *const ElfHeader; + // SAFETY: We checked that the size is enough. We checked that the alignment matches. + // ElfHeader is POD. + let header = unsafe { &*header }; + + let magic = header.ident[..4].try_into().unwrap(); + + if magic != [0x7f, b'E', b'L', b'F'] { + return Err(ElfParseError::WrongMagic(magic)); + } + + Ok(Elf { header }) + } +} + +#[cfg(test)] +mod tests { + use std::{fs, path::Path}; + + use memmap2::Mmap; + + use super::*; + + fn load_test_file(file_name: impl AsRef) -> Mmap { + let name = file_name.as_ref(); + let this_file_path = Path::new(env!("CARGO_MANIFEST_DIR")).join(file!()); + let double_this_crate = this_file_path.parent().unwrap().parent().unwrap(); + let workspace_root = double_this_crate.parent().unwrap().parent().unwrap(); + + let file_path = workspace_root.join("test_data").join("out").join(name); + + let file = fs::File::open(&file_path).expect(&format!( + "Failed to open test file {} at path {}. Consider running `test_data/create_test_data.sh` to create the test data files", + name.display(), + file_path.display() + )); + + unsafe { Mmap::map(&file).unwrap() } + } + + #[test] + fn rust_hello_world_bin() { + let file = load_test_file("hello_world"); + let elf = Elf::parse(&file).unwrap(); + dbg!(elf); + panic!() + } +} diff --git a/test_data/.gitignore b/test_data/.gitignore new file mode 100644 index 0000000..981aeb8 --- /dev/null +++ b/test_data/.gitignore @@ -0,0 +1 @@ +/out \ No newline at end of file diff --git a/test_data/create_test_data.sh b/test_data/create_test_data.sh new file mode 100755 index 0000000..74b54ee --- /dev/null +++ b/test_data/create_test_data.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +for rust_file in $SCRIPT_DIR/*.rs; do + # Use -Cprefer-dynamic to keep the binary small + rustc --edition 2021 "$rust_file" -Cprefer-dynamic -Copt-level=3 --out-dir="$SCRIPT_DIR/out" +done \ No newline at end of file diff --git a/test_data/hello_world.rs b/test_data/hello_world.rs new file mode 100644 index 0000000..f029519 --- /dev/null +++ b/test_data/hello_world.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, World!"); +} \ No newline at end of file