diff --git a/Cargo.lock b/Cargo.lock index 3723bc5..ea23a4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,71 +2,15 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "fallible-iterator" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" - -[[package]] -name = "gimli" -version = "0.27.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" -dependencies = [ - "fallible-iterator", - "indexmap", - "stable_deref_trait", -] - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - -[[package]] -name = "indexmap" -version = "1.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" -dependencies = [ - "autocfg", - "hashbrown", -] - [[package]] name = "libc" version = "0.2.140" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" -[[package]] -name = "memmap2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" -dependencies = [ - "libc", -] - -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - [[package]] name = "uwuwind" version = "0.1.0" dependencies = [ - "gimli", "libc", - "memmap2", ] diff --git a/Cargo.toml b/Cargo.toml index c053f7c..feccdf1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,9 +8,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -gimli = "0.27.2" libc = { version = "0.2.140", default-features = false, features = ["extra_traits"] } -memmap2 = "0.5.10" [profile.dev] panic = "abort" diff --git a/README.md b/README.md new file mode 100644 index 0000000..e6e526b --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +# uwuwind + +have you ever thrown an exception? caught an exception? panicked? + +then this code is for you. + +## references + +https://gitlab.com/x86-psABIs/x86-64-ABI/-/jobs/artifacts/master/raw/x86-64-ABI/abi.pdf?job=build \ No newline at end of file diff --git a/src/dwarf.rs b/src/dwarf/mod.rs similarity index 54% rename from src/dwarf.rs rename to src/dwarf/mod.rs index 555292b..7fd9445 100644 --- a/src/dwarf.rs +++ b/src/dwarf/mod.rs @@ -1,7 +1,17 @@ -use core::ffi; -use std::{ffi::CStr, fmt::Debug}; +//! this implements the stuff necessary to get the uwutables for actual unwinding +//! +//! # how it works +//! first, we ask the dynamic linker to give us the `.eh_frame` for the current binary using +//! the GNU extension (`_dl_find_object`)[https://www.gnu.org/software/libc/manual/html_node/Dynamic-Linker-Introspection.html]. +//! then, we parse that as beautiful DWARF call frame information, as god (or rather, the x86-64 psABI) intended. +//! +//! for this we need a DWARF parser and a DWARF call frame information interpreter (yes, that shit is basically a programming +//! language). See https://dwarfstd.org/doc/DWARF5.pdf for more information if more information is desired. -use gimli::UnwindTable; +mod parse; + +use core::ffi; +use std::fmt::Debug; #[allow(non_camel_case_types)] #[repr(C)] @@ -13,11 +23,12 @@ struct dl_find_object { dlfo_eh_frame: *const ffi::c_void, } +// extern "C" { fn _dl_find_object(address: *const ffi::c_void, result: *mut dl_find_object) -> ffi::c_int; } -#[derive(Debug)] +#[derive(Debug, Clone, Copy)] pub struct DwarfInfo { /// The text segment map: *const [u8], @@ -31,6 +42,7 @@ pub fn dwarf_info(addr: *const ffi::c_void) -> Option { let ret = _dl_find_object(addr, &mut out); trace!("dl_find_object returned {ret}"); if ret != 0 { + trace!("dl_find_object error: {}", std::io::Error::last_os_error()); return None; } if out.dlfo_eh_frame.is_null() { @@ -38,13 +50,18 @@ pub fn dwarf_info(addr: *const ffi::c_void) -> Option { } let text_len = out.dlfo_map_end as usize - out.dlfo_map_start as usize; - trace!( "dwarf info; map: ({:p}, {:x}), dwarf: {:p}", out.dlfo_map_start, text_len, out.dlfo_eh_frame ); + + if !(out.dlfo_map_start..out.dlfo_map_end).contains(&addr) { + trace!("dl_find_object returned object out of range for addr: {addr:p}"); + return None; + } + Some(DwarfInfo { map: core::ptr::slice_from_raw_parts(out.dlfo_map_start as _, text_len), dwarf: out.dlfo_eh_frame as _, @@ -52,6 +69,4 @@ pub fn dwarf_info(addr: *const ffi::c_void) -> Option { } } -pub fn uwutables() { - //let UnwindTable; -} \ No newline at end of file +pub fn uwutables(_dwarf_info: DwarfInfo) {} diff --git a/src/dwarf/parse.rs b/src/dwarf/parse.rs new file mode 100644 index 0000000..10c025f --- /dev/null +++ b/src/dwarf/parse.rs @@ -0,0 +1,444 @@ +//! Implements parsing and processing of DWARF call frame information. +//! +//! Source: https://dwarfstd.org/doc/DWARF5.pdf §6.4 Call Frame Information +//! +//! The CFI is a very large table of the following structure: +//! ```text +//! LOC CFA R0 R1 ... RN +//! L0 +//! L1 +//! ... +//! LN +//! ``` +//! +//! The first column is the address for every location that contains code in a program +//! (a relative offset in shared object files). The remaining columns contain unwinding rules +//! that are associated with the indicated location. +//! +//! The CFA column defines the rule which computes the Canonical Frame Address +//! value; it may be either a register and a signed offset that are added together, or a +//! DWARF expression that is evaluated. +//! +//! The remaining columns describe register numbers that indicate whether a register has been saved +//! and the rule to find the value for the previous frame. +#![allow(non_upper_case_globals)] + +struct Expr; + +enum RegisterRule { + /// A register that has this rule has no recoverable value in the previous frame. + /// (By convention, it is not preserved by a callee.) + Undefined, + /// This register has not been modified from the previous frame. + /// (By convention, it is preserved by the callee, but the callee has not modified it.) + SameValue, + /// The previous value of this register is saved at the address CFA+N where CFA + /// is the current CFA value and N is a signed offset + Offset(isize), + /// The previous value of this register is the value CFA+N where CFA is the current CFA value + /// and N is a signed offset. + ValOffset(isize), + /// The previous value of this register is stored in another register numbered R. + Register(u16), + /// The previous value of this register is located at the address produced by + /// executing the DWARF expression E (see Section 2.5 on page 26) + Expression(Expr), + /// The previous value of this register is the value produced by executing the DWARF + /// expression E (see Section 2.5 on page 26). + ValExpression(Expr), + /// The rule is defined externally to this specification by the augmenter. + Architectural, +} + +type Id = u32; + +struct ULeb128(u128); +impl ULeb128 { + fn parse() -> Self { + todo!() + } +} +struct ILeb128(i128); +impl ILeb128 { + fn parse() -> Self { + todo!() + } +} + +/// Common Information Entry +struct Cie<'a> { + /// A constant that gives the number of bytes of the CIE structure, not including + /// the length field itself (see Section 7.2.2 on page 184). The size of the length + /// field plus the value of length must be an integral multiple of the address size. + length: usize, + /// A constant that is used to distinguish CIEs from FDEs. + cie_id: Id, + /// A version number (see Section 7.24 on page 238). This number is specific to + /// the call frame information and is independent of the DWARF version number. + version: u8, + /// A null-terminated UTF-8 string that identifies the augmentation to this CIE or + /// to the FDEs that use it. If a reader encounters an augmentation string that is + /// unexpected, then only the following fields can be read: + /// - CIE: length, CIE_id, version, augmentation + /// - FDE: length, CIE_pointer, initial_location, address_range + /// + /// If there is no augmentation, this value is a zero byte. + /// + /// The augmentation string allows users to indicate that there is additional + /// target-specific information in the CIE or FDE which is needed to virtually unwind a + /// stack frame. For example, this might be information about dynamically allocated data + /// which needs to be freed on exit from the routine. + /// + /// Because the .debug_frame section is useful independently of any .debug_info + /// section, the augmentation string always uses UTF-8 encoding. + augmentation: &'a str, + /// The size of a target address in this CIE and any FDEs that use it, in bytes. If a + /// compilation unit exists for this frame, its address size must match the address + /// size here. + address_size: u8, + /// The size of a segment selector in this CIE and any FDEs that use it, in bytes. + segment_selector_size: u8, + /// A constant that is factored out of all advance location instructions (see + /// Section 6.4.2.1 on page 177). The resulting value is + /// (operand * code_alignment_factor). + code_alignment_factor: ULeb128, + /// A constant that is factored out of certain offset instructions (see + /// Sections 6.4.2.2 on page 177 and 6.4.2.3 on page 179). The resulting value is + /// (operand * data_alignment_factor). + data_alignment_factor: ILeb128, + /// An unsigned LEB128 constant that indicates which column in the rule table + /// represents the return address of the function. Note that this column might not + /// correspond to an actual machine register. + return_address_register: ULeb128, + /// A sequence of rules that are interpreted to create the initial setting of each + /// column in the table. + /// The default rule for all columns before interpretation of the initial instructions + /// is the undefined rule. However, an ABI authoring body or a compilation + /// system authoring body may specify an alternate default value for any or all + /// columns. + initial_instructions: &'a [u8], +} + +/// Frame Description Entry +struct Fde<'a> { + /// A constant that gives the number of bytes of the header and instruction + /// stream for this function, not including the length field itself (see Section 7.2.2 + /// on page 184). The size of the length field plus the value of length must be an + /// integral multiple of the address size. + length: usize, + /// A constant offset into the .debug_frame section that denotes the CIE that is + /// associated with this FDE. + cie_pointer: Id, + /// The address of the first location associated with this table entry. If the + /// segment_selector_size field of this FDE’s CIE is non-zero, the initial + /// location is preceded by a segment selector of the given length. + initial_location: usize, + /// The number of bytes of program instructions described by this entry. + address_range: usize, + /// A sequence of table defining instructions that are described in Section 6.4.2. + instructions: &'a [u8], +} + +enum Instruction { + //-------- 6.4.2.1 Row Creation Instructions + // + /// The DW_CFA_set_loc instruction takes a single operand that represents a + /// target address. The required action is to create a new table row using the + /// specified address as the location. All other values in the new row are initially + /// identical to the current row. The new location value is always greater than the + /// current one. If the segment_selector_size field of this FDE’s CIE is non-zero, + /// the initial location is preceded by a segment selector of the given length. + SetLoc(usize), + /// The DW_CFA_advance_loc instruction takes a single operand (encoded with + /// the opcode) that represents a constant delta. The required action is to create a + /// new table row with a location value that is computed by taking the current + /// entry’s location value and adding the value of delta * code_alignment_factor. + /// All other values in the new row are initially identical to the current row + AdvanceLoc(u8), + /// The DW_CFA_advance_loc1 instruction takes a single ubyte operand that + /// represents a constant delta. This instruction is identical to + /// DW_CFA_advance_loc except for the encoding and size of the delta operand + AdvanceLoc1(u8), + /// The DW_CFA_advance_loc2 instruction takes a single uhalf operand that + /// represents a constant delta. This instruction is identical to + /// DW_CFA_advance_loc except for the encoding and size of the delta operand + AdvanceLoc2(u16), + /// The DW_CFA_advance_loc4 instruction takes a single uword operand that + /// represents a constant delta. This instruction is identical to + /// DW_CFA_advance_loc except for the encoding and size of the delta operand + AdvanceLoc4(u32), + // + //-------- 6.4.2.2 CFA Definition Instructions + // + /// The DW_CFA_def_cfa instruction takes two unsigned LEB128 operands + /// representing a register number and a (non-factored) offset. The required + /// action is to define the current CFA rule to use the provided register and offset + DefCfa { + register_number: ULeb128, + offset: ULeb128, + }, + /// The DW_CFA_def_cfa_sf instruction takes two operands: an unsigned + /// LEB128 value representing a register number and a signed LEB128 factored + /// offset. This instruction is identical to DW_CFA_def_cfa except that the second + /// operand is signed and factored. The resulting offset is factored_offset * + /// data_alignment_factor. + DefCfaSf { + register_number: ULeb128, + offset: ULeb128, + }, + /// The DW_CFA_def_cfa_register instruction takes a single unsigned LEB128 + /// operand representing a register number. The required action is to define the + /// current CFA rule to use the provided register (but to keep the old offset). This + /// operation is valid only if the current CFA rule is defined to use a register and + /// offset. + DefCfaRegister(ULeb128), + /// The DW_CFA_def_cfa_offset instruction takes a single unsigned LEB128 + /// operand representing a (non-factored) offset. The required action is to define + /// the current CFA rule to use the provided offset (but to keep the old register). + /// This operation is valid only if the current CFA rule is defined to use a register + /// and offset. + DefCfaOffset(ULeb128), + /// The DW_CFA_def_cfa_offset_sf instruction takes a signed LEB128 operand + /// representing a factored offset. This instruction is identical to + /// DW_CFA_def_cfa_offset except that the operand is signed and factored. The + /// resulting offset is factored_offset * data_alignment_factor. This operation is + /// valid only if the current CFA rule is defined to use a register and offset. + DefCfaOffsetSf(ULeb128), + /// The DW_CFA_def_cfa_expression instruction takes a single operand encoded + /// as a DW_FORM_exprloc value representing a DWARF expression. The + /// required action is to establish that expression as the means by which the + /// current CFA is computed. + DefCfaExpression(Expr), + // + //-------- 6.4.2.3 Register Rule Instructions + // + /// The DW_CFA_undefined instruction takes a single unsigned LEB128 operand + /// that represents a register number. The required action is to set the rule for the + /// specified register to “undefined.” + Undefined(ULeb128), + /// The DW_CFA_same_value instruction takes a single unsigned LEB128 + /// operand that represents a register number. The required action is to set the + /// rule for the specified register to “same value.” + SameValue(ULeb128), + /// The DW_CFA_offset instruction takes two operands: a register number + /// (encoded with the opcode) and an unsigned LEB128 constant representing a + /// factored offset. The required action is to change the rule for the register + /// indicated by the register number to be an offset(N) rule where the value of N + /// is factored offset * data_alignment_factor. + Offset { + register_number: usize, + factored_offset: ULeb128, + }, + /// The DW_CFA_offset_extended instruction takes two unsigned LEB128 + /// operands representing a register number and a factored offset. This + /// instruction is identical to DW_CFA_offset except for the encoding and size of + /// the register operand. + OffsetExtended { + register_number: ULeb128, + factored_offset: ULeb128, + }, + /// The DW_CFA_offset_extended_sf instruction takes two operands: an + /// unsigned LEB128 value representing a register number and a signed LEB128 + /// factored offset. This instruction is identical to DW_CFA_offset_extended + /// except that the second operand is signed and factored. The resulting offset is + /// factored_offset * data_alignment_factor. + OffsetExtendedSf { + register_number: ULeb128, + factored_offste: ULeb128, + }, + /// The DW_CFA_val_offset instruction takes two unsigned LEB128 operands + /// representing a register number and a factored offset. The required action is to + /// change the rule for the register indicated by the register number to be a + /// val_offset(N) rule where the value of N is factored_offset * + /// data_alignment_factor. + ValOffset { + register_number: ULeb128, + factored_offste: ULeb128, + }, + /// The DW_CFA_val_offset_sf instruction takes two operands: an unsigned + /// LEB128 value representing a register number and a signed LEB128 factored + /// offset. This instruction is identical to DW_CFA_val_offset except that the + /// second operand is signed and factored. The resulting offset is factored_offset * + /// data_alignment_factor. + ValOffsetSf { + register_number: ULeb128, + factored_offste: ULeb128, + }, + /// The DW_CFA_register instruction takes two unsigned LEB128 operands + /// representing register numbers. The required action is to set the rule for the + /// first register to be register(R) where R is the second register. + Register { + target_register: ULeb128, + from_register: ULeb128, + }, + /// The DW_CFA_expression instruction takes two operands: an unsigned + /// LEB128 value representing a register number, and a DW_FORM_block value + /// representing a DWARF expression. The required action is to change the rule + /// for the register indicated by the register number to be an expression(E) rule + /// where E is the DWARF expression. That is, the DWARF expression computes + /// the address. The value of the CFA is pushed on the DWARF evaluation stack + /// prior to execution of the DWARF expression. + /// See Section 6.4.2 on page 176 regarding restrictions on the DWARF expression + /// operators that can be used. + Expression { register: ULeb128, expr: Expr }, + /// The DW_CFA_val_expression instruction takes two operands: an unsigned + /// LEB128 value representing a register number, and a DW_FORM_block value + /// representing a DWARF expression. The required action is to change the rule + /// for the register indicated by the register number to be a val_expression(E) + /// rule where E is the DWARF expression. That is, the DWARF expression + /// computes the value of the given register. The value of the CFA is pushed on + /// the DWARF evaluation stack prior to execution of the DWARF expression. + /// See Section 6.4.2 on page 176 regarding restrictions on the DWARF expression + /// operators that can be used. + ValExpression { register: ULeb128, expr: Expr }, + /// The DW_CFA_restore instruction takes a single operand (encoded with the + /// opcode) that represents a register number. The required action is to change + /// the rule for the indicated register to the rule assigned it by the + /// initial_instructions in the CIE. + Restore(usize), + /// The DW_CFA_restore_extended instruction takes a single unsigned LEB128 + /// operand that represents a register number. This instruction is identical to + /// DW_CFA_restore except for the encoding and size of the register operand. + RestoreExtended(ULeb128), + // + //-------- 6.4.2.4 Row State Instructions + // + /// The DW_CFA_remember_state instruction takes no operands. The required + /// action is to push the set of rules for every register onto an implicit stack. + RememberState, + /// The DW_CFA_restore_state instruction takes no operands. The required + /// action is to pop the set of rules off the implicit stack and place them in the + /// current row. + RestoreState, + // + //-------- 6.4.2.5 Padding Instruction + // + /// The DW_CFA_nop instruction has no operands and no required actions. It is + /// used as padding to make a CIE or FDE an appropriate size. + Nop, +} + +struct InstrIter<'a> { + data: &'a [u8], +} + +impl<'a> InstrIter<'a> { + fn advance(&mut self) -> Option { + let (&first, rest) = self.data.split_first()?; + self.data = rest; + Some(first) + } + + fn uleb128(&mut self) -> ULeb128 { + ULeb128::parse() + } +} + +const DW_CFA_advance_loc_hi: u8 = 0x01; +const DW_CFA_offset_hi: u8 = 0x02; +const DW_CFA_restore_hi: u8 = 0x03; + +const DW_CFA_nop: u8 = 0; +const DW_CFA_set_loc: u8 = 0x01; +const DW_CFA_advance_loc1: u8 = 0x02; +const DW_CFA_advance_loc2: u8 = 0x03; +const DW_CFA_advance_loc4: u8 = 0x04; +const DW_CFA_offset_extended: u8 = 0x05; +const DW_CFA_restore_extended: u8 = 0x06; +const DW_CFA_undefined: u8 = 0x07; +const DW_CFA_same_value: u8 = 0x08; +const DW_CFA_register: u8 = 0x09; +const DW_CFA_remember_state: u8 = 0x0a; +const DW_CFA_restore_state: u8 = 0x0b; +const DW_CFA_def_cfa: u8 = 0x0c; +const DW_CFA_def_cfa_register: u8 = 0x0d; +const DW_CFA_def_cfa_offset: u8 = 0x0e; +const DW_CFA_def_cfa_expression: u8 = 0x0f; +const DW_CFA_expression: u8 = 0x10; +const DW_CFA_offset_extended_sf: u8 = 0x11; +const DW_CFA_def_cfa_sf: u8 = 0x12; +const DW_CFA_def_cfa_offset_sf: u8 = 0x13; +const DW_CFA_val_offset: u8 = 0x14; +const DW_CFA_val_offset_sf: u8 = 0x15; +const DW_CFA_val_expression: u8 = 0x16; +const DW_CFA_lo_user: u8 = 0x1c; +const DW_CFA_hi_user: u8 = 0x3f; + +impl<'a> Iterator for InstrIter<'a> { + type Item = Instruction; + + fn next(&mut self) -> Option { + let b = self.advance()?; + let high_2 = b & !(u8::MAX >> 2); + Some(match high_2 { + DW_CFA_advance_loc_hi => { + let delta = b & (u8::MAX >> 2); + Instruction::AdvanceLoc(delta) + } + DW_CFA_offset_hi => { + let register = b & (u8::MAX >> 2); + Instruction::Offset { + register_number: register as _, + factored_offset: self.uleb128(), + } + } + DW_CFA_restore_hi => { + let register = b & (u8::MAX >> 2); + Instruction::Restore(register as _) + } + _ => match b { + DW_CFA_nop => Instruction::Nop, + DW_CFA_set_loc => Instruction::SetLoc(todo!()), + DW_CFA_advance_loc1 => Instruction::AdvanceLoc1(todo!()), + DW_CFA_advance_loc2 => Instruction::AdvanceLoc2(todo!()), + DW_CFA_advance_loc4 => Instruction::AdvanceLoc4(todo!()), + DW_CFA_offset_extended => Instruction::OffsetExtended { + register_number: self.uleb128(), + factored_offset: self.uleb128(), + }, + DW_CFA_restore_extended => Instruction::RestoreExtended(self.uleb128()), + DW_CFA_undefined => Instruction::Undefined(self.uleb128()), + DW_CFA_same_value => Instruction::SameValue(self.uleb128()), + DW_CFA_register => Instruction::Register { + target_register: self.uleb128(), + from_register: self.uleb128(), + }, + DW_CFA_remember_state => Instruction::RememberState, + DW_CFA_restore_state => Instruction::RestoreState, + DW_CFA_def_cfa => Instruction::DefCfa { + register_number: self.uleb128(), + offset: self.uleb128(), + }, + DW_CFA_def_cfa_register => Instruction::DefCfaRegister(self.uleb128()), + DW_CFA_def_cfa_offset => Instruction::DefCfaOffset(self.uleb128()), + DW_CFA_def_cfa_expression => Instruction::DefCfaExpression(todo!()), + DW_CFA_expression => Instruction::Expression { + register: self.uleb128(), + expr: todo!(), + }, + DW_CFA_offset_extended_sf => Instruction::OffsetExtendedSf { + register_number: self.uleb128(), + factored_offste: self.uleb128(), + }, + DW_CFA_def_cfa_sf => Instruction::DefCfaSf { + register_number: self.uleb128(), + offset: self.uleb128(), + }, + DW_CFA_def_cfa_offset_sf => Instruction::DefCfaOffsetSf(self.uleb128()), + DW_CFA_val_offset => Instruction::ValOffset { + register_number: self.uleb128(), + factored_offste: self.uleb128(), + }, + DW_CFA_val_offset_sf => Instruction::ValOffsetSf { + register_number: self.uleb128(), + factored_offste: self.uleb128(), + }, + DW_CFA_val_expression => Instruction::ValExpression { + register: self.uleb128(), + expr: todo!(), + }, + _ => todo!(), + }, + }) + } +} diff --git a/test b/test deleted file mode 100755 index a478771..0000000 Binary files a/test and /dev/null differ