diff --git a/shell.nix b/shell.nix index 9ea89b7..72c7903 100644 --- a/shell.nix +++ b/shell.nix @@ -1,13 +1,9 @@ { pkgs ? import { } }: pkgs.mkShell { buildInputs = with pkgs; [ - rustup gcc - clang_16 - llvmPackages_16.bintools + clang_21 + llvmPackages_21.bintools ]; - shellHook = '' - export PATH=$PATH:''${CARGO_HOME:-~/.cargo}/bin - ''; packages = (with pkgs; [ gef rust-bindgen diff --git a/src/dwarf/divination.rs b/src/dwarf/divination.rs index 958f5d7..2d8596d 100644 --- a/src/dwarf/divination.rs +++ b/src/dwarf/divination.rs @@ -43,6 +43,7 @@ struct EhFrameHeader { eh_frame_ptr_enc: Encoding, fde_count_enc: Encoding, table_enc: Encoding, + rest: (), } #[instrument] @@ -93,9 +94,9 @@ pub(crate) fn eh_frame(addr: Addr) -> Option<*const u8> { trace!("eh_frame_hdr: {:?}", header); - let (read_size, eh_frame_ptr) = read_encoded(ptr, header.eh_frame_ptr_enc); + let (read_size, eh_frame_ptr) = read_encoded(ptr, header.eh_frame_ptr_enc, None); let ptr = ptr.add(read_size); - let (_read_size, fde_count) = read_encoded(ptr, header.fde_count_enc); + let (_read_size, fde_count) = read_encoded(ptr, header.fde_count_enc, None); trace!("eh_frame: {eh_frame_ptr:?}"); trace!("fde_count: {fde_count:?}"); @@ -108,3 +109,96 @@ pub(crate) fn eh_frame(addr: Addr) -> Option<*const u8> { Some(eh_frame_ptr as *const u8) } } + +#[instrument] +pub(crate) fn frame_info(addr: Addr) -> Option<()> { + unsafe { + let header_ptr = eh_frame_hdr_ptr(addr)?; + let eh_frame_header_addr = header_ptr.addr(); + let header = header_ptr.read(); + + let ptr = (&raw const (*header_ptr).rest).cast::(); + let (eh_frame_ptr_size, eh_frame_ptr) = read_encoded(ptr, header.eh_frame_ptr_enc, None); + let ptr = ptr.add(eh_frame_ptr_size); + + let (fde_count_size, fde_count) = + read_encoded(ptr, header.fde_count_enc, Some(eh_frame_ptr)); + + trace!(?header.table_enc); + + let table_ptr = ptr.add(fde_count_size); + + let mut walk_table_ptr = table_ptr; + for i in 0..fde_count { + let (read, initial_loc) = + read_encoded(walk_table_ptr, header.table_enc, Some(eh_frame_header_addr)); + walk_table_ptr = walk_table_ptr.add(read); + let (read, address) = + read_encoded(walk_table_ptr, header.table_enc, Some(eh_frame_header_addr)); + walk_table_ptr = walk_table_ptr.add(read); + + trace!(idx = ?i, "eh_frame_hdr table initial_loc={initial_loc:x} address={address:x}"); + } + + let table_half_entry_size = header.table_enc.size(); + + let mut base = 0; + let mut len = fde_count; + let found_fde; + loop { + if len == 1 { + found_fde = Some(base); + break; + } + + let mid = base + len / 2; + let mid_ptr = table_ptr.byte_add(mid * table_half_entry_size * 2); + + let (_, value) = read_encoded(mid_ptr, header.table_enc, Some(eh_frame_header_addr)); + + debug!( + ?base, + ?len, + ?mid, + "binary searching for {addr:?}: {value:x}" + ); + + match addr.addr().cmp(&value) { + core::cmp::Ordering::Less => { + len = mid - base; + } + core::cmp::Ordering::Equal => { + found_fde = Some(mid); + break; + } + core::cmp::Ordering::Greater => { + len = len - (mid - base); + base = mid; + } + } + } + + debug!("found FDE idx in binary search {found_fde:?}"); + + let fde_table_ptr = table_ptr.byte_add(found_fde.unwrap() * table_half_entry_size * 2); + let (_, fde_address) = read_encoded( + fde_table_ptr.byte_add(table_half_entry_size), + header.table_enc, + Some(eh_frame_header_addr), + ); + + trace!("found FDE at address {fde_address:x}"); + + let fde_ptr = core::ptr::with_exposed_provenance::(fde_address); + + fde_ptr.read_volatile(); + + trace!("ptr is valid"); + + trace!("FDE offset to .eh_frame: {:x}", fde_ptr.addr() - (eh_frame_ptr)); + + let fde = crate::dwarf::parse::parse_fde_from_ptr(fde_ptr, eh_frame_ptr).unwrap(); + + todo!() + } +} diff --git a/src/dwarf/mod.rs b/src/dwarf/mod.rs index 957f146..d3808f7 100644 --- a/src/dwarf/mod.rs +++ b/src/dwarf/mod.rs @@ -5,12 +5,13 @@ //! interpreter (yes, that shit is basically a programming language). //! See https://dwarfstd.org/doc/DWARF5.pdf ยง6.4 for more information if more information is desired. //! Note that https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html -//! contains more details on the precise format, which is slightly different from .debug_frame from DWARF. +//! contains more details on the precise format, which is slightly different +//! from .debug_frame from DWARF. mod divination; mod parse; -pub(crate) use divination::eh_frame; +pub(crate) use divination::{eh_frame, frame_info}; /// The `.eh_frame` section contains a list of call frame information records. /// Each CFI contains a CIE followed be one or more FDE records. diff --git a/src/dwarf/parse.rs b/src/dwarf/parse.rs index 017f56c..1a3b916 100644 --- a/src/dwarf/parse.rs +++ b/src/dwarf/parse.rs @@ -362,23 +362,29 @@ pub unsafe fn parse_cfi(mut ptr: *const u8) { struct Cursor<'a>(&'a [u8]); /// Returns `(read_size, value)` -pub(super) unsafe fn read_encoded(ptr: *const u8, encoding: Encoding) -> (usize, usize) { +pub(super) unsafe fn read_encoded( + ptr: *const u8, + encoding: Encoding, + datarel_base: Option, +) -> (usize, usize) { let (read_size, value) = match encoding.format() { ValueFormat::DW_EH_PE_uleb128 => todo!("uleb128"), ValueFormat::DW_EH_PE_udata2 => (2, ptr.cast::().read_unaligned() as usize), ValueFormat::DW_EH_PE_udata4 => (4, ptr.cast::().read_unaligned() as usize), ValueFormat::DW_EH_PE_udata8 => (8, ptr.cast::().read_unaligned() as usize), ValueFormat::DW_EH_PE_sleb128 => todo!("sleb128"), - ValueFormat::DW_EH_PE_sdata2 => (2, ptr.cast::().read_unaligned() as usize), - ValueFormat::DW_EH_PE_sdata4 => (4, ptr.cast::().read_unaligned() as usize), - ValueFormat::DW_EH_PE_sdata8 => (8, ptr.cast::().read_unaligned() as usize), + ValueFormat::DW_EH_PE_sdata2 => (2, ptr.cast::().read_unaligned() as isize as usize), + ValueFormat::DW_EH_PE_sdata4 => (4, ptr.cast::().read_unaligned() as isize as usize), + ValueFormat::DW_EH_PE_sdata8 => (8, ptr.cast::().read_unaligned() as isize as usize), }; let value = match encoding.application() { ValueApplication::DW_EH_PE_absptr => value, ValueApplication::DW_EH_PE_pcrel => ((value as isize) + (ptr as isize)) as usize, ValueApplication::DW_EH_PE_textrel => todo!("textrel"), - ValueApplication::DW_EH_PE_datarel => todo!("datarel"), + ValueApplication::DW_EH_PE_datarel => { + ((value as isize) + (datarel_base.unwrap() as isize)) as usize + } ValueApplication::DW_EH_PE_funcrel => todo!("funcrel"), ValueApplication::DW_EH_PE_aligned => todo!("aligned"), }; @@ -387,6 +393,7 @@ pub(super) unsafe fn read_encoded(ptr: *const u8, encoding: Encoding) -> (usize, } #[derive(PartialEq, Clone, Copy)] +#[repr(transparent)] pub(super) struct Encoding(u8); impl Encoding { fn format(&self) -> ValueFormat { @@ -413,6 +420,18 @@ impl Encoding { v => panic!("invalid header value application: {v}"), } } + pub(crate) fn size(&self) -> usize { + match self.format() { + ValueFormat::DW_EH_PE_uleb128 => panic!("uleb128 has no known size"), + ValueFormat::DW_EH_PE_udata2 => 2, + ValueFormat::DW_EH_PE_udata4 => 4, + ValueFormat::DW_EH_PE_udata8 => 8, + ValueFormat::DW_EH_PE_sleb128 => panic!("sleb128 has no known size"), + ValueFormat::DW_EH_PE_sdata2 => 2, + ValueFormat::DW_EH_PE_sdata4 => 4, + ValueFormat::DW_EH_PE_sdata8 => 8, + } + } } impl fmt::Debug for Encoding { @@ -456,7 +475,8 @@ enum ValueApplication { DW_EH_PE_pcrel = 0x10, /// Value is relative to the beginning of the .text section. DW_EH_PE_textrel = 0x20, - /// Value is relative to the beginning of the .got or .eh_frame_hdr section. + /// Value is relative to the beginning of the .got or .eh_frame_hdr + /// section. DW_EH_PE_datarel = 0x30, /// Value is relative to the beginning of the function. DW_EH_PE_funcrel = 0x40, @@ -556,10 +576,11 @@ unsafe fn parse_frame_info<'a>( unsafe fn parse_frame_head<'a>(ptr: *const u8) -> Result<(u32, &'a [u8], *const u8)> { let len = ptr.cast::().read(); if len == 0xffffffff { + // be careful, if you handle this you need to adjust the callers offsets lol lmao todo!("loooong dwarf, cannot handle."); } let data = &mut Cursor(core::slice::from_raw_parts(ptr.add(4), len as usize)); - trace!("frame info entry: {:x?}", data.0); + trace!("frame info entry (without len): {:x?}", data.0); let cie_id = read_u32(data)?; let new_ptr = ptr.add(4).add(len as _); @@ -606,6 +627,37 @@ fn parse_cie<'a>(data: &mut Cursor<'a>) -> Result> { Ok(cie) } +pub(crate) unsafe fn parse_fde_from_ptr<'a>( + ptr: *const u8, + eh_frame_base: usize, +) -> Result> { + let (fde_cie_id, fde_data, _) = parse_frame_head(ptr)?; + let fde_data = &mut Cursor(fde_data); + + if fde_cie_id == 0 { + return Err(Error(format!("FDE's CIE Pointer is 0"))); + } + trace!("FDE's CIE pointer: {fde_cie_id}"); + + let cie_ptr = ptr.byte_add(4 /* length */).byte_sub(fde_cie_id as usize); + + trace!( + "CIE offset to .eh_frame: {:x}", + cie_ptr.addr() - (eh_frame_base) + ); + + let (cie_cie_id, cie_data, _) = parse_frame_head(cie_ptr)?; + if cie_cie_id != 0 { + return Err(Error(format!("CIE must have cie_id=0"))); + } + let cie_data = &mut Cursor(cie_data); + let cie = parse_cie(cie_data)?; + + let fde = parse_fde(fde_data, fde_cie_id, &cie)?; + + Ok(fde) +} + #[instrument(skip(data))] fn parse_fde<'a>(data: &mut Cursor<'a>, cie_id: u32, cie: &Cie<'_>) -> Result> { trace!("FDE {:x?}", data.0); @@ -621,10 +673,10 @@ fn parse_fde<'a>(data: &mut Cursor<'a>, cie_id: u32, cie: &Cie<'_>) -> Result Result { trace!("P"); let encoding = Encoding(read_u8(data)?); - let (read_size, value) = unsafe { read_encoded(data.0.as_ptr(), encoding) }; + let (read_size, value) = unsafe { read_encoded(data.0.as_ptr(), encoding, None) }; data.0 = &data.0[read_size..]; aug_data.personality = Some(value); } diff --git a/src/lib.rs b/src/lib.rs index c36fd3a..f4ffe8c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,14 +25,20 @@ impl Addr { fn voidptr(self) -> *const ffi::c_void { self.0.cast() } + + fn addr(&self) -> usize { + self.0.addr() + } } #[allow(nonstandard_style)] -pub unsafe extern "C" fn _UnwindRaiseException( +pub unsafe extern "C-unwind" fn _UnwindRaiseException( exception_object: *mut uw::_Unwind_Exception, ) -> uw::_Unwind_Reason_Code { let _span = info_span!("_UnwindRaiseException", ?exception_object).entered(); + let frame = crate::dwarf::frame_info(arch::get_rip()); + let eh_frame = crate::dwarf::eh_frame(arch::get_rip()).unwrap(); crate::dwarf::uwutables(eh_frame); diff --git a/test-program/src/main.rs b/test-program/src/main.rs index d117962..50d7fdf 100644 --- a/test-program/src/main.rs +++ b/test-program/src/main.rs @@ -1,5 +1,4 @@ -use tracing_subscriber::{layer::SubscriberExt, EnvFilter}; -use tracing_subscriber::util::SubscriberInitExt; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; use uwuwind::uw; #[repr(C)]