diff --git a/elven-parser/src/raw.rs b/elven-parser/src/raw.rs index 9e78f70..a134673 100644 --- a/elven-parser/src/raw.rs +++ b/elven-parser/src/raw.rs @@ -5,7 +5,13 @@ pub mod consts; use consts as c; -use std::{ffi::CStr, fmt::Debug, mem, ops, slice::SliceIndex}; +use std::{ + ffi::CStr, + fmt::Debug, + mem, + ops::{self}, + slice::SliceIndex, +}; use bytemuck::{Pod, PodCastError, Zeroable}; @@ -50,6 +56,8 @@ pub enum ElfParseError { IndexOutOfBounds(&'static str, usize), #[error("String in string table does not end with a nul terminator: String offset: {0}")] NoStringNulTerm(usize), + #[error("The SHT_SYMTAB section was not found")] + SymtabNotFound, } type Result = std::result::Result; @@ -82,13 +90,13 @@ pub struct ElfHeader { #[derive(Debug, Clone, Copy, Zeroable, Pod)] #[repr(C)] pub struct ElfIdent { - magic: [u8; c::SELFMAG], - class: u8, - data: u8, - version: u8, - osabi: u8, - abiversion: u8, - _pad: [u8; 7], + pub magic: [u8; c::SELFMAG], + pub class: c::Class, + pub data: c::Data, + pub version: u8, + pub osabi: c::OsAbi, + pub abiversion: u8, + pub _pad: [u8; 7], } const _: [u8; c::EI_NIDENT] = [0; mem::size_of::()]; @@ -121,6 +129,52 @@ pub struct Shdr { pub entsize: u64, } +#[derive(Debug, Clone, Copy, Zeroable, Pod)] +#[repr(C)] +pub struct Sym { + pub name: u32, + pub info: u8, + pub other: u8, + pub shndx: u16, + pub value: Addr, + pub size: u64, +} + +#[derive(Debug, Clone, Copy, Zeroable, Pod)] +#[repr(C)] +pub struct Rel { + pub offset: Addr, + pub info: RelInfo, +} + +#[derive(Debug, Clone, Copy, Zeroable, Pod)] +#[repr(C)] +pub struct Rela { + pub offset: Addr, + pub info: RelInfo, + pub addend: u64, +} + +#[derive(Clone, Copy, Zeroable, Pod)] +#[repr(transparent)] +pub struct RelInfo(u64); + +impl RelInfo { + pub fn sym(&self) -> u32 { + (self.0 >> 32) as u32 + } + + pub fn r#type(&self) -> u32 { + (self.0 & 0xffffffff) as u32 + } +} + +impl Debug for RelInfo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?} @ {}", c::RX86_64(self.r#type()), self.sym()) + } +} + impl<'a> Elf<'a> { pub fn new(data: &'a [u8]) -> Result { let magic = data[..c::SELFMAG].try_into().unwrap(); @@ -141,6 +195,10 @@ impl<'a> Elf<'a> { pub fn program_headers(&self) -> Result<&[Phdr]> { let header = self.header()?; + if header.phnum == 0 { + return Ok(&[]); + } + let expected_ent_size = mem::size_of::(); let actual_ent_size = usize::from(header.phentsize); if actual_ent_size != expected_ent_size { @@ -160,6 +218,10 @@ impl<'a> Elf<'a> { pub fn section_headers(&self) -> Result<&[Shdr]> { let header = self.header()?; + if header.shnum == 0 { + return Ok(&[]); + } + let expected_ent_size = mem::size_of::(); let actual_ent_size = usize::from(header.shentsize); if actual_ent_size != expected_ent_size { @@ -222,6 +284,33 @@ impl<'a> Elf<'a> { .ok_or(ElfParseError::NoStringNulTerm(idx))?; Ok(CStr::from_bytes_with_nul(&indexed[..=end]).unwrap()) } + + pub fn relas(&self) -> Result> { + Ok(self + .section_headers()? + .iter() + .filter(|sh| sh.r#type == c::SHT_RELA) + .map(|sh| { + let content = self.section_content(sh)?; + let relas = load_slice::(content, content.len() / mem::size_of::())?; + Ok((sh, relas)) + }) + .collect::>>()? + .into_iter() + .flat_map(|(sh, relas)| relas.iter().map(move |rela| (sh, rela)))) + } + + pub fn symbols(&self) -> Result<&[Sym]> { + let sh = self + .section_headers()? + .iter() + .find(|sh| sh.r#type == c::SHT_SYMTAB) + .ok_or(ElfParseError::SymtabNotFound)?; + + let data = self.section_content(sh)?; + + load_slice(data, data.len() / mem::size_of::()) + } } fn load_ref(data: &[u8]) -> Result<&T> { @@ -334,4 +423,34 @@ mod tests { Ok(()) } + + #[test] + fn c_hello_world_object() -> super::Result<()> { + let file = load_test_file("hello_world_obj"); + let elf = Elf::new(&file)?; + let header = elf.header()?; + + assert_eq!(header.ident.class, c::ELFCLASS64); + assert_eq!(header.ident.data, c::ELFDATA2LSB); + assert_eq!(header.ident.osabi, c::ELFOSABI_SYSV); + assert_eq!(header.r#type, c::ET_REL); + assert_eq!(header.entry, Addr(0)); + + elf.program_headers()?; + elf.section_headers()?; + + for sh in elf.section_headers()? { + let name = elf.string(sh.name as usize)?.to_str().unwrap(); + println!("{name:20} {:5} {:?}", sh.size, sh.r#type); + } + + println!("Relocations:"); + + for (sh, rela) in elf.relas()? { + let section_name = elf.string(sh.name as usize)?.to_str().unwrap(); + println!("{section_name:20} {:?}", rela); + } + + Ok(()) + } } diff --git a/elven-parser/src/raw/consts.rs b/elven-parser/src/raw/consts.rs index 006e6f7..b1947a3 100644 --- a/elven-parser/src/raw/consts.rs +++ b/elven-parser/src/raw/consts.rs @@ -22,10 +22,16 @@ macro_rules! const_group_with_fmt { $( $value => f.write_str(stringify!($name)), )* - a => write!(f, "Invalid {}: {a}", $group_name,) + a => write!(f, "Other {}: {a}", $group_name,) } } } + + impl PartialEq<$ty> for $struct_name { + fn eq(&self, other: &$ty) -> bool { + self.0 == *other + } + } }; } @@ -38,36 +44,48 @@ pub const ELFMAG: &[u8; SELFMAG] = b"\x7fELF"; pub const SELFMAG: usize = 4; pub const EI_CLASS: usize = 4; /* File class byte index */ -pub const ELFCLASSNONE: u8 = 0; /* Invalid class */ -pub const ELFCLASS32: u8 = 1; /* 32-bit objects */ -pub const ELFCLASS64: u8 = 2; /* 64-bit objects */ +const_group_with_fmt! { + pub struct Class(u8): "Class" + + pub const ELFCLASSNONE = 0; /* Invalid class */ + pub const ELFCLASS32 = 1; /* 32-bit objects */ + pub const ELFCLASS64 = 2; /* 64-bit objects */ +} pub const ELFCLASSNUM: u8 = 3; pub const EI_DATA: usize = 5; /* Data encoding byte index */ -pub const ELFDATANONE: u8 = 0; /* Invalid data encoding */ -pub const ELFDATA2LSB: u8 = 1; /* 2's complement, little endian */ -pub const ELFDATA2MSB: u8 = 2; /* 2's complement, big endian */ +const_group_with_fmt! { + pub struct Data(u8): "Data" + + pub const ELFDATANONE = 0; /* Invalid data encoding */ + pub const ELFDATA2LSB = 1; /* 2's complement, little endian */ + pub const ELFDATA2MSB = 2; /* 2's complement, big endian */ +} pub const ELFDATANUM: u8 = 3; pub const EI_VERSION: usize = 6; /* File version byte index */ pub const EI_OSABI: usize = 7; /* OS ABI identification */ +const_group_with_fmt! { + pub struct OsAbi(u8): "OS ABI" + + pub const ELFOSABI_SYSV = 0; /* Alias. */ + pub const ELFOSABI_HPUX = 1; /* HP-UX */ + pub const ELFOSABI_NETBSD = 2; /* NetBSD. */ + pub const ELFOSABI_GNU = 3; /* Object uses GNU ELF extensions. */ + pub const ELFOSABI_SOLARIS = 6; /* Sun Solaris. */ + pub const ELFOSABI_AIX = 7; /* IBM AIX. */ + pub const ELFOSABI_IRIX = 8; /* SGI Irix. */ + pub const ELFOSABI_FREEBSD = 9; /* FreeBSD. */ + pub const ELFOSABI_TRU64 = 10; /* Compaq TRU64 UNIX. */ + pub const ELFOSABI_MODESTO = 11; /* Novell Modesto. */ + pub const ELFOSABI_OPENBSD = 12; /* OpenBSD. */ + pub const ELFOSABI_ARM_AEABI = 64; /* ARM EABI */ + pub const ELFOSABI_ARM = 97; /* ARM */ + pub const ELFOSABI_STANDALONE = 255; /* Standalone (embedded) application */ +} pub const ELFOSABI_NONE: u8 = 0; /* UNIX System V ABI */ -pub const ELFOSABI_SYSV: u8 = 0; /* Alias. */ -pub const ELFOSABI_HPUX: u8 = 1; /* HP-UX */ -pub const ELFOSABI_NETBSD: u8 = 2; /* NetBSD. */ -pub const ELFOSABI_GNU: u8 = 3; /* Object uses GNU ELF extensions. */ -pub const ELFOSABI_LINUX: u8 = ELFOSABI_GNU; /* Compatibility alias. */ -pub const ELFOSABI_SOLARIS: u8 = 6; /* Sun Solaris. */ -pub const ELFOSABI_AIX: u8 = 7; /* IBM AIX. */ -pub const ELFOSABI_IRIX: u8 = 8; /* SGI Irix. */ -pub const ELFOSABI_FREEBSD: u8 = 9; /* FreeBSD. */ -pub const ELFOSABI_TRU64: u8 = 10; /* Compaq TRU64 UNIX. */ -pub const ELFOSABI_MODESTO: u8 = 11; /* Novell Modesto. */ -pub const ELFOSABI_OPENBSD: u8 = 12; /* OpenBSD. */ -pub const ELFOSABI_ARM_AEABI: u8 = 64; /* ARM EABI */ -pub const ELFOSABI_ARM: u8 = 97; /* ARM */ -pub const ELFOSABI_STANDALONE: u8 = 255; /* Standalone (embedded) application */ +pub const ELFOSABI_LINUX: u8 = 3; /* Compatibility alias. */ pub const EI_ABIVERSION: usize = 8; /* ABI version */ @@ -144,3 +162,52 @@ pub const SHT_LOOS: u32 = 0x60000000; /* Start OS-specific. */ pub const SHT_LOSUNW: u32 = 0x6ffffffa; /* Sun-specific low bound. */ pub const SHT_HISUNW: u32 = 0x6fffffff; /* Sun-specific high bound. */ pub const SHT_HIOS: u32 = 0x6fffffff; /* End OS-specific type */ + +const_group_with_fmt! { + pub struct RX86_64(u32): "x86_64 Relocation type" + + pub const R_X86_64_NONE = 0; /* No reloc */ + pub const R_X86_64_64 = 1; /* Direct 64 bit */ + pub const R_X86_64_PC32 = 2; /* PC relative 32 bit signed */ + pub const R_X86_64_GOT32 = 3; /* 32 bit GOT entry */ + pub const R_X86_64_PLT32 = 4; /* 32 bit PLT address */ + pub const R_X86_64_COPY = 5; /* Copy symbol at runtime */ + pub const R_X86_64_GLOB_DAT = 6; /* Create GOT entry */ + pub const R_X86_64_JUMP_SLOT = 7; /* Create PLT entry */ + pub const R_X86_64_RELATIVE = 8; /* Adjust by program base */ + pub const R_X86_64_GOTPCREL = 9; /* 32 bit signed PC relative offset to GOT */ + pub const R_X86_64_32 = 10; /* Direct 32 bit zero extended */ + pub const R_X86_64_32S = 11; /* Direct 32 bit sign extended */ + pub const R_X86_64_16 = 12; /* Direct 16 bit zero extended */ + pub const R_X86_64_PC16 = 13; /* 16 bit sign extended pc relative */ + pub const R_X86_64_8 = 14; /* Direct 8 bit sign extended */ + pub const R_X86_64_PC8 = 15; /* 8 bit sign extended pc relative */ + pub const R_X86_64_DTPMOD64 = 16; /* ID of module containing symbol */ + pub const R_X86_64_DTPOFF64 = 17; /* Offset in module's TLS block */ + pub const R_X86_64_TPOFF64 = 18; /* Offset in initial TLS block */ + pub const R_X86_64_TLSGD = 19; /* 32 bit signed PC relative offset to two GOT entries for GD symbol */ + pub const R_X86_64_TLSLD = 20; /* 32 bit signed PC relative offset to two GOT entries for LD symbol */ + pub const R_X86_64_DTPOFF32 = 21; /* Offset in TLS block */ + pub const R_X86_64_GOTTPOFF = 22; /* 32 bit signed PC relative offset to GOT entry for IE symbol */ + pub const R_X86_64_TPOFF32 = 23; /* Offset in initial TLS block */ + pub const R_X86_64_PC64 = 24; /* PC relative 64 bit */ + pub const R_X86_64_GOTOFF64 = 25; /* 64 bit offset to GOT */ + pub const R_X86_64_GOTPC32 = 26; /* 32 bit signed pc relative offset to GOT */ + pub const R_X86_64_GOT64 = 27; /* 64-bit GOT entry offset */ + pub const R_X86_64_GOTPCREL64 = 28; /* 64-bit PC relative offset to GOT entry */ + pub const R_X86_64_GOTPC64 = 29; /* 64-bit PC relative offset to GOT */ + pub const R_X86_64_GOTPLT64 = 30; /* like GOT64, says PLT entry needed */ + pub const R_X86_64_PLTOFF64 = 31; /* 64-bit GOT relative offset to PLT entry */ + pub const R_X86_64_SIZE32 = 32; /* Size of symbol plus 32-bit addend */ + pub const R_X86_64_SIZE64 = 33; /* Size of symbol plus 64-bit addend */ + pub const R_X86_64_GOTPC32_TLSDESC = 34; /* GOT offset for TLS descriptor. */ + pub const R_X86_64_TLSDESC_CALL = 35; /* Marker for call through TLS descriptor. */ + pub const R_X86_64_TLSDESC = 36; /* TLS descriptor. */ + pub const R_X86_64_IRELATIVE = 37; /* Adjust indirectly by program base */ + pub const R_X86_64_RELATIVE64 = 38; /* 64-bit adjust by program base */ + /* 39 Reserved was R_X86_64_PC32_BND */ + /* 40 Reserved was R_X86_64_PLT32_BND */ + pub const R_X86_64_GOTPCRELX = 41; /* Load from 32 bit signed pc relative offset to GOT entry without REX prefix, relaxable. */ + pub const R_X86_64_REX_GOTPCRELX = 42; /* Load from 32 bit signed pc relative offset to GOT entry with REX prefix, relaxable. */ + pub const R_X86_64_NUM = 43; +} diff --git a/test_data/create_test_data.sh b/test_data/create_test_data.sh index 74b54ee..5718db9 100755 --- a/test_data/create_test_data.sh +++ b/test_data/create_test_data.sh @@ -5,4 +5,9 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) for rust_file in $SCRIPT_DIR/*.rs; do # Use -Cprefer-dynamic to keep the binary small rustc --edition 2021 "$rust_file" -Cprefer-dynamic -Copt-level=3 --out-dir="$SCRIPT_DIR/out" -done \ No newline at end of file +done + +for c_obj_file in $SCRIPT_DIR/*_obj.c; do + echo $c_obj_file + cc "$c_obj_file" -c -o "$SCRIPT_DIR/out/$(basename $c_obj_file .c)" +done diff --git a/test_data/hello_world_obj.c b/test_data/hello_world_obj.c new file mode 100644 index 0000000..c866607 --- /dev/null +++ b/test_data/hello_world_obj.c @@ -0,0 +1,6 @@ +#include + +int main(int argc, char **argv) { + puts("Hello, World!"); + return 0; +}