diff --git a/Cargo.lock b/Cargo.lock index 82be555..20adbde 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,12 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + [[package]] name = "bitflags" version = "1.3.2" @@ -115,9 +121,11 @@ dependencies = [ name = "elven-parser" version = "0.1.0" dependencies = [ + "bitflags", "bstr", "bytemuck", "memmap2", + "memoffset", "thiserror", ] @@ -245,6 +253,15 @@ dependencies = [ "libc", ] +[[package]] +name = "memoffset" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" +dependencies = [ + "autocfg", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" diff --git a/elven-forest/src/main.rs b/elven-forest/src/main.rs index b3f9895..d32b48d 100644 --- a/elven-forest/src/main.rs +++ b/elven-forest/src/main.rs @@ -2,7 +2,7 @@ use std::{fmt::Display, fs::File}; use anyhow::Context; use elven_parser::{ - consts::{self as c, DynamicTag, PhType, ShType, SymbolVisibility, RX86_64}, + consts::{self as c, DynamicTag, PhFlags, PhType, ShType, SymbolVisibility, RX86_64}, read::{Addr, ElfReadError, ElfReader, Offset, Sym, SymInfo}, }; use memmap2::Mmap; @@ -34,7 +34,7 @@ struct SectionTable { struct ProgramHeaderTable { #[tabled(rename = "type")] r#type: PhType, - flags: u32, + flags: PhFlags, offset: Addr, virtual_addr: Addr, phys_addr: Addr, diff --git a/elven-parser/Cargo.toml b/elven-parser/Cargo.toml index 17274f4..9917871 100644 --- a/elven-parser/Cargo.toml +++ b/elven-parser/Cargo.toml @@ -6,7 +6,9 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +bitflags = "1.3.2" bstr = "1.2.0" bytemuck = { version = "1.13.0", features = ["derive", "min_const_generics"] } memmap2 = "0.5.8" +memoffset = "0.8.0" thiserror = "1.0.38" diff --git a/elven-parser/src/consts.rs b/elven-parser/src/consts.rs index 5672abd..d546061 100644 --- a/elven-parser/src/consts.rs +++ b/elven-parser/src/consts.rs @@ -1,6 +1,11 @@ #![allow(non_upper_case_globals)] #![allow(clippy::unreadable_literal)] +use std::fmt::Display; + +use bitflags::bitflags; +use bytemuck::{Pod, Zeroable}; + macro_rules! const_group_with_fmt { ( pub struct $struct_name:ident($ty:ty): $group_name:literal @@ -63,6 +68,19 @@ macro_rules! const_group_with_fmt { self.partial_cmp(&other.0) } } + + impl From<$ty> for $struct_name { + fn from(ty: $ty) -> $struct_name { + $struct_name(ty) + } + } + + + impl From<$struct_name> for $ty { + fn from(wrap: $struct_name) -> $ty { + wrap.0 + } + } }; } @@ -238,6 +256,22 @@ pub const PT_HIOS: u32 = 0x6fffffff; /* End of OS-specific */ pub const PT_LOPROC: u32 = 0x70000000; /* Start of processor-specific */ pub const PT_HIPROC: u32 = 0x7fffffff; /* End of processor-specific */ +bitflags! { + #[derive(Zeroable, Pod)] + #[repr(transparent)] + pub struct PhFlags: u32 { + const PF_X = (1 << 0); /* Segment is executable */ + const PF_W = (1 << 1); /* Segment is writable */ + const PF_R = (1 << 2); /* Segment is readable */ + } +} + +impl Display for PhFlags { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + // ------------------ // Symbols // ------------------ diff --git a/elven-parser/src/read.rs b/elven-parser/src/read.rs index 1ae112b..f37b9b5 100644 --- a/elven-parser/src/read.rs +++ b/elven-parser/src/read.rs @@ -107,7 +107,7 @@ pub struct ElfHeader { pub shstrndex: c::SectionIdx, } -pub(crate) const HEADER_ENTRY_OFFSET: usize = 24; +pub const HEADER_ENTRY_OFFSET: usize = 24; #[test] fn elf_header_entry_offset() { @@ -135,7 +135,7 @@ const _: [u8; c::EI_NIDENT] = [0; mem::size_of::()]; #[repr(C)] pub struct Phdr { pub r#type: c::PhType, - pub flags: u32, + pub flags: c::PhFlags, pub offset: Offset, pub vaddr: Addr, pub paddr: Addr, @@ -489,7 +489,7 @@ fn load_ref<'a, T: Pod>(data: &'a [u8], kind: impl Into) -> Result<&'a T load_slice(data, 1, kind).map(|slice| &slice[0]) } -fn load_slice<'a, T: Pod>( +pub(crate) fn load_slice<'a, T: Pod>( data: &'a [u8], amount_of_elems: usize, kind: impl Into, diff --git a/elven-parser/src/write.rs b/elven-parser/src/write.rs index 3c6be68..df515d9 100644 --- a/elven-parser/src/write.rs +++ b/elven-parser/src/write.rs @@ -1,4 +1,4 @@ -use crate::consts::{Machine, SectionIdx, ShType, Type, SHT_NULL, SHT_STRTAB}; +use crate::consts::{Machine, PhFlags, PhType, SectionIdx, ShType, Type, SHT_NULL, SHT_STRTAB}; use crate::read::{self, Addr, ElfIdent, Offset, ShStringIdx}; use std::io; use std::mem::size_of; @@ -17,8 +17,7 @@ pub type Result = std::result::Result; #[derive(Debug, Clone)] pub struct ElfWriter { header: read::ElfHeader, - entry: SectionRelativeAbsoluteAddr, - sections_headers: Vec
, + sections: Vec
, programs_headers: Vec, } @@ -29,7 +28,7 @@ pub struct Header { pub machine: Machine, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Copy)] pub struct SectionRelativeAbsoluteAddr { pub section: SectionIdx, pub rel_offset: Offset, @@ -45,7 +44,16 @@ pub struct Section { } #[derive(Debug, Clone)] -pub struct ProgramHeader {} +pub struct ProgramHeader { + pub r#type: PhType, + pub flags: PhFlags, + pub offset: SectionRelativeAbsoluteAddr, + pub vaddr: Addr, + pub paddr: Addr, + pub filesz: u64, + pub memsz: u64, + pub align: u64, +} const SH_STRTAB: usize = 1; @@ -90,39 +98,45 @@ impl ElfWriter { Self { header, - entry: SectionRelativeAbsoluteAddr { - section: SectionIdx(0), - rel_offset: Offset(0), - }, - sections_headers: vec![null_section, shstrtab], + sections: vec![null_section, shstrtab], programs_headers: Vec::new(), } } - pub fn set_entry(&mut self, entry: SectionRelativeAbsoluteAddr) { - self.entry = entry; + pub fn set_entry(&mut self, entry: Addr) { + self.header.entry = entry; } pub fn add_sh_string(&mut self, content: &[u8]) -> ShStringIdx { - let shstrtab = &mut self.sections_headers[SH_STRTAB]; + let shstrtab = &mut self.sections[SH_STRTAB]; let idx = shstrtab.content.len(); shstrtab.content.extend(content); shstrtab.content.push(0); ShStringIdx(idx as u32) } - pub fn add_section(&mut self, section: Section) { - self.sections_headers.push(section); + pub fn add_section(&mut self, section: Section) -> Result { + let len = self.sections.len(); + self.sections.push(section); + Ok(SectionIdx( + len.try_into() + .map_err(|_| WriteElfError::TooMany("sections"))?, + )) + } + + pub fn add_program_header(&mut self, ph: ProgramHeader) { + self.programs_headers.push(ph); } } mod writing { use bytemuck::Pod; - use crate::read::{Addr, ElfHeader, Offset, Shdr, HEADER_ENTRY_OFFSET}; + use super::{ElfWriter, Result, WriteElfError}; + use crate::read::{Addr, ElfHeader, Offset, Phdr, Shdr}; use std::{io::Write, mem::size_of, num::NonZeroU64}; - use super::{ElfWriter, Result, WriteElfError}; + const SH_OFFSET_OFFSET: usize = memoffset::offset_of!(Shdr, offset); impl ElfWriter { pub fn write(&self) -> Result> { @@ -133,7 +147,7 @@ mod writing { let mut header = self.header; header.shnum = self - .sections_headers + .sections .len() .try_into() .map_err(|_| WriteElfError::TooMany("sections"))?; @@ -147,25 +161,32 @@ mod writing { // We know the size of the header. current_known_position += size_of::() as u64; - // We put the section headers directly after the header. - if !self.sections_headers.is_empty() { - header.shoff = Offset(current_known_position); - } + // ld orderes it ph/sh apparently so we will do the same - // There will be all the section headers right after the header. - current_known_position += (header.shentsize * header.shnum) as u64; - - // We put the program headers directly after the section headers. if !self.programs_headers.is_empty() { header.phoff = Offset(current_known_position); } - // There will be all the program headers right after the section headers. - current_known_position += (header.phentsize * header.phnum) as u64; + // There will be all the program headers right after the header. + let program_headers_start = current_known_position; + let all_ph_size = (header.phentsize as u64) * (header.phnum as u64); + current_known_position += all_ph_size; + + if !self.sections.is_empty() { + header.shoff = Offset(current_known_position); + } + + // There will be all the section headers right after the program headers. + let section_headers_start = current_known_position; + let section_headers_size = header.shentsize as u64 * header.shnum as u64; + current_known_position += section_headers_size; write_pod(&header, &mut output); - for (sh_idx, section) in self.sections_headers.iter().enumerate() { + // Reserve some space for the program headers + output.extend(std::iter::repeat(0).take(all_ph_size as usize)); + + for section in &self.sections { let header = Shdr { name: section.name, r#type: section.r#type, @@ -179,14 +200,6 @@ mod writing { entsize: section.fixed_entsize.map(NonZeroU64::get).unwrap_or(0), }; - if sh_idx == self.entry.section.0 as usize { - let base = current_known_position; - let entry = base + self.entry.rel_offset.0; - let entry_pos = &mut output[HEADER_ENTRY_OFFSET..][..size_of::()]; - let entry_ref = bytemuck::cast_slice_mut::(entry_pos); - entry_ref[0] = entry; - } - // We will write the content for this section at that offset and also make sure to align the next one. // FIXME: Align to the alignment of the next section. current_known_position += align_up(section.content.len() as u64, 8); @@ -194,9 +207,7 @@ mod writing { write_pod(&header, &mut output); } - assert_eq!(self.programs_headers.len(), 0); // FIXME: yeah - - for section in &self.sections_headers { + for section in &self.sections { let section_size = section.content.len() as u64; let aligned_size = align_up(section_size, 8); let padding = aligned_size - section_size; @@ -207,6 +218,42 @@ mod writing { } } + // We know have a few clues about section offsets, so write the program headers. + for (i, program_header) in self.programs_headers.iter().enumerate() { + let rel_offset = program_header.offset; + let section_base_offset = section_headers_start as usize + + header.shentsize as usize * rel_offset.section.0 as usize; + + let section_offset_offset = section_base_offset + SH_OFFSET_OFFSET; + let section_content_offset_bytes = output[section_offset_offset..] + [..size_of::()] + .try_into() + .unwrap(); + let section_content_offset = u64::from_ne_bytes(section_content_offset_bytes); + + let offset = Offset(section_content_offset + rel_offset.rel_offset.0); + + let ph = Phdr { + r#type: program_header.r#type, + flags: program_header.flags, + offset, + vaddr: program_header.vaddr, + paddr: program_header.paddr, + filesz: program_header.filesz, + memsz: program_header.memsz, + align: program_header.align, + }; + + let program_header_start = + program_headers_start as usize + header.phentsize as usize * i as usize; + let space = &mut output[program_header_start..][..header.phentsize as usize]; + let ph_bytes = bytemuck::cast_slice::(std::slice::from_ref(&ph)); + + space.copy_from_slice(ph_bytes); + + write_pod(&ph, &mut output); + } + Ok(output) } } diff --git a/elven-wald/src/lib.rs b/elven-wald/src/lib.rs index 4715714..88ad86f 100644 --- a/elven-wald/src/lib.rs +++ b/elven-wald/src/lib.rs @@ -4,9 +4,9 @@ extern crate tracing; use anyhow::{bail, Context, Result}; use clap::Parser; use elven_parser::{ - consts::{self as c, ShType, SHT_PROGBITS}, - read::{ElfIdent, ElfReader, Offset}, - write::{self, ElfWriter, SectionRelativeAbsoluteAddr, Section}, + consts::{self as c, PhFlags, SectionIdx, ShType, PT_LOAD, SHT_PROGBITS}, + read::{Addr, ElfIdent, ElfReader, Offset}, + write::{self, ElfWriter, ProgramHeader, Section, SectionRelativeAbsoluteAddr}, }; use memmap2::Mmap; use std::{ @@ -58,19 +58,15 @@ pub fn run(opts: Opts) -> Result<()> { let _start_sym = elf.symbol_by_name(b"_start")?; - let section = _start_sym.shndx; - - let entry = SectionRelativeAbsoluteAddr { - section, - rel_offset: Offset(_start_sym.value.0), - }; - - write_output(text_content, entry)?; + write_output(text_content, _start_sym.value.0)?; Ok(()) } -fn write_output(text: &[u8], entry: SectionRelativeAbsoluteAddr) -> Result<()> { +pub const BASE_EXEC_ADDR: Addr = Addr(0x400000); // whatever ld does +pub const DEFAULT_PROGRAM_HEADER_ALIGN_THAT_LD_USES_HERE: u64 = 0x1000; + +fn write_output(text: &[u8], entry_offset_from_text: u64) -> Result<()> { let ident = ElfIdent { magic: *c::ELFMAG, class: c::Class(c::ELFCLASS64), @@ -90,15 +86,51 @@ fn write_output(text: &[u8], entry: SectionRelativeAbsoluteAddr) -> Result<()> { let mut write = ElfWriter::new(header); let text_name = write.add_sh_string(b".text"); - write.add_section(Section { + let text_section = write.add_section(Section { name: text_name, r#type: ShType(SHT_PROGBITS), flags: 0, fixed_entsize: None, content: text.to_vec(), - }); + })?; - write.set_entry(entry); + let elf_header_and_program_headers = ProgramHeader { + r#type: PT_LOAD.into(), + flags: PhFlags::PF_R, + offset: SectionRelativeAbsoluteAddr { + section: SectionIdx(0), + rel_offset: Offset(0), + }, + vaddr: BASE_EXEC_ADDR, + paddr: BASE_EXEC_ADDR, + filesz: 176, // FIXME: Do not hardocde this lol + memsz: 176, + align: DEFAULT_PROGRAM_HEADER_ALIGN_THAT_LD_USES_HERE, + }; + + write.add_program_header(elf_header_and_program_headers); + + let entry_addr = Addr( + BASE_EXEC_ADDR.0 + DEFAULT_PROGRAM_HEADER_ALIGN_THAT_LD_USES_HERE + entry_offset_from_text, + ); + + let text_program_header = ProgramHeader { + r#type: PT_LOAD.into(), + flags: PhFlags::PF_X | PhFlags::PF_R, + offset: SectionRelativeAbsoluteAddr { + section: text_section, + rel_offset: Offset(0), + }, + vaddr: entry_addr, + paddr: entry_addr, + filesz: text.len() as u64, + memsz: text.len() as u64, + align: DEFAULT_PROGRAM_HEADER_ALIGN_THAT_LD_USES_HERE, + }; + + write.add_program_header(text_program_header); + + write.set_entry(entry_addr); let output = write.write().context("writing output file")?;