diff --git a/README.md b/README.md index fc84f95..e952de5 100644 --- a/README.md +++ b/README.md @@ -10,3 +10,4 @@ a PE loader for educational purposes. - https://learn.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-search-order - https://www.geoffchappell.com/studies/windows/win32/apisetschema/index.htm?tx=1 - http://www.nynaeve.net/?p=180 +- https://github.com/mingw-w64/mingw-w64 diff --git a/shell.nix b/shell.nix index fa4398f..8f77f41 100644 --- a/shell.nix +++ b/shell.nix @@ -1,3 +1,4 @@ { pkgs ? import { } }: pkgs.mkShell { nativeBuildInputs = with pkgs; [ lld_18 rustup ]; + packages = with pkgs; [ gef ]; } diff --git a/src/emulated.rs b/src/emulated.rs index 62f5d30..60cff5a 100644 --- a/src/emulated.rs +++ b/src/emulated.rs @@ -238,11 +238,15 @@ emulate!( emulate!( "api-ms-win-crt-runtime-l1-1-0.dll", mod api_ms_win_crt_runtime_l1_1_0 { - fn __p___argc() { - todo!("__p___argc") + fn __p___argc() -> *const u32 { + static ARGC: i32 = 1; + (&raw const ARGC).cast() } - fn __p___argv() { - todo!("__p___argv") + /// returns the address of argv + fn __p___argv() -> *const *const *const u8 { + static EMPTY_ARGS: [usize; 1] = [0]; + static ARGV: &[usize; 1] = &EMPTY_ARGS; + (&raw const ARGV).cast() } fn _c_exit() { todo!("_c_exit") @@ -259,8 +263,8 @@ emulate!( fn _exit() { todo!("_exit") } - fn _get_initial_narrow_environment() { - todo!("_get_initial_narrow_environment") + fn _get_initial_narrow_environment() -> *const () { + std::ptr::null() } fn _initialize_narrow_environment() { todo!("_initialize_narrow_environment") @@ -268,11 +272,11 @@ emulate!( fn _initialize_onexit_table() { todo!("_initialize_onexit_table") } - fn _initterm() { - todo!("_initterm") - } - fn _initterm_e() { - todo!("_initterm_e") + /// + fn _initterm(_start: *const (), _end: *const ()) {} + /// + fn _initterm_e(_start: *const (), _end: *const ()) -> u32 { + 0 } fn _register_onexit_function() { todo!("_register_onexit_function") @@ -286,8 +290,11 @@ emulate!( fn _set_app_type() { todo!("_set_app_type") } - fn exit() { - todo!("exit") + /// + fn exit(code: i32) -> ! { + tracing::info!("application requested exit with code {code}"); + // TODO: we need to do all kinds of cleanup + std::process::exit(code); } fn terminate() { todo!("terminate") diff --git a/src/lib.rs b/src/lib.rs index 2b654e1..4001afc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,11 +2,13 @@ mod emulated; mod sys; use std::{ + cell::RefCell, collections::HashMap, ffi::{CStr, CString}, fmt::Debug, ops::{Deref, DerefMut}, path::{Path, PathBuf}, + ptr, sync::{ atomic::{AtomicU64, Ordering}, LazyLock, Mutex, @@ -251,31 +253,89 @@ struct TlsDirectory { /// Note that this is a VA that should have been relocated earlier. raw_data_start_va: u64, /// The last byte of the TLS. - raw_data_env_va: u64, + raw_data_end_va: u64, address_of_index: u64, address_of_callbacks: u64, size_of_zero_fill: u32, characteristics: u32, } +#[expect(dead_code)] +const DLL_PROCESS_DETACH: u32 = 0; +const DLL_PROCESS_ATTACH: u32 = 1; +#[expect(dead_code)] +const DLL_THREAD_ATTACH: u32 = 2; +#[expect(dead_code)] +const DLL_THREAD_DETACH: u32 = 3; + +/// +type TlsCallback = + unsafe extern "win64" fn(dll_handle: *const (), reason: u32, reserved: *const ()); + const IMAGE_FILE_MACHINE_AMD64: u16 = 0x8664; const IMAGE_FILE_MACHINE_ARM64: u16 = 0xaa64; pub fn execute(pe: &[u8], executable_path: &Path) { + let mut main_tls_slots = [ptr::null_mut(); 64]; + + let mut main_teb = ThreadEnvironmentBlock { + tib: ThreadInformationBlock { + exception_list: ptr::null(), + stack_base: ptr::null(), + stack_limit: ptr::null(), + sub_system_tib: ptr::null(), + fiber_data: ptr::null(), + arbitrary_user_pointer: ptr::null(), + this: ptr::null(), + }, + environment_pointer: ptr::null(), + client_id_unique_process: 0, + client_id_unique_thread: 0, + active_rpc_handle: ptr::null(), + thread_local_storage_pointer: &raw mut main_tls_slots, + }; + main_teb.tib.this = &raw const main_teb; + + THREAD_STATE.with(|state| state.state.borrow_mut().teb = &raw mut main_teb); + GLOBAL_STATE.state.lock().unwrap().executable_path = Some(executable_path.to_owned()); let image = load(pe, executable_path, false); let entrypoint = image.base + image.opt_header.address_of_entry_point as usize; tracing::debug!("YOLO to {:#x}", entrypoint); + setup_thread(&raw mut main_teb); + post_load(&image); + unsafe { let entrypoint = std::mem::transmute:: u32>(entrypoint); + setup_thread(&raw mut main_teb); let result = entrypoint(); tracing::info!("result: {result}"); }; } +fn post_load(image: &Image<'_>) { + tracing::debug!("call TLS callbacks"); + let Some(tls_directory) = bytemuck::cast_slice::( + &image[image.opt_header.tls_table.rva as usize..] + [..image.opt_header.tls_table.size as usize], + ) + .get(0) else { + return; + }; + + let mut ptr = tls_directory.address_of_callbacks as *const Option; + while let Some(cb) = unsafe { *ptr } { + tracing::debug!("calling TLS callback at {ptr:p}"); + unsafe { cb(image.base as _, DLL_PROCESS_ATTACH, ptr::null()) } + unsafe { + ptr = ptr.add(1); + } + } +} + #[derive(Clone)] struct Image<'pe> { base: usize, @@ -325,6 +385,14 @@ struct TheGlobalState { executable_path: Option, hmodule_to_dll: HashMap, next_emulated_hmodule_idx: AtomicU64, + tls_slots: Vec, +} + +enum TlsSlot { + Static { + #[expect(dead_code)] + init: &'static [u8], + }, } struct GlobalStateWrapper { @@ -344,6 +412,22 @@ impl GlobalStateWrapper { } } +struct ThreadState { + teb: *mut ThreadEnvironmentBlock, +} + +struct ThreadStateWrapper { + state: RefCell, +} + +std::thread_local! { + static THREAD_STATE: ThreadStateWrapper = ThreadStateWrapper { + state: RefCell::new(ThreadState { + teb: ptr::null_mut() + }), + }; +} + static GLOBAL_STATE: GlobalStateWrapper = GlobalStateWrapper { state: LazyLock::new(|| { Mutex::new(TheGlobalState { @@ -351,18 +435,34 @@ static GLOBAL_STATE: GlobalStateWrapper = GlobalStateWrapper { executable_path: None, hmodule_to_dll: HashMap::new(), next_emulated_hmodule_idx: AtomicU64::new(1), + tls_slots: Vec::new(), }) }), }; #[repr(C)] -struct ThreadEnvironmentBlock { - host_thread_ptr: *const (), - _pad: [u8; 80], - thing: *const (), - +struct ThreadInformationBlock { + exception_list: *const (), + stack_base: *const (), + stack_limit: *const (), + sub_system_tib: *const (), + fiber_data: *const (), + arbitrary_user_pointer: *const (), + this: *const ThreadEnvironmentBlock, } -const _: () = assert!(std::mem::offset_of!(ThreadEnvironmentBlock, thing) == 88); + +// https://github.com/wine-mirror/wine/blob/1aff1e6a370ee8c0213a0fd4b220d121da8527aa/include/winternl.h#L347 +#[repr(C)] +struct ThreadEnvironmentBlock { + tib: ThreadInformationBlock, + environment_pointer: *const (), + client_id_unique_process: u64, // handle + client_id_unique_thread: u64, // handle, + active_rpc_handle: *const (), + thread_local_storage_pointer: *mut [*mut (); 64], +} +const _: [(); 88] = + [(); std::mem::offset_of!(ThreadEnvironmentBlock, thread_local_storage_pointer)]; #[tracing::instrument(skip(pe, is_dll))] fn load<'pe>(pe: &'pe [u8], executable_path: &Path, is_dll: bool) -> Image<'pe> { @@ -582,14 +682,44 @@ fn load_inner<'pe>(pe: &'pe [u8], executable_path: &Path, is_dll: bool) -> Image } } - /* - not what's happening? - tracing::debug!("load TLS"); - let tls_directory = bytemuck::cast_slice::( - &image[opt_header.tls_table.rva as usize..][..opt_header.tls_table.size as usize], - ); - tracing::debug!(?tls_directory, "TLS directory"); - */ + // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#the-tls-section + + if opt_header.tls_table.size > 0 { + let tls_directory = bytemuck::cast_slice::( + &image[opt_header.tls_table.rva as usize..][..opt_header.tls_table.size as usize], + )[0]; + // Each module's data gets a slot in the TLS. + tracing::debug!("load TLS"); + let mut state = GLOBAL_STATE.state.lock().unwrap(); + let module_tls_slot_idx = state.tls_slots.len(); + // TODO: hi i think i need to do something here to assign the correct index so that the DLL understands what it's supposed to do. + tracing::debug!(?tls_directory, "TLS directory"); + + let size = tls_directory.raw_data_end_va - tls_directory.raw_data_start_va; + assert!(size > 0); + + let init = unsafe { + std::slice::from_raw_parts(tls_directory.raw_data_start_va as *const u8, size as usize) + }; + + state.tls_slots.push(TlsSlot::Static { init }); + drop(state); + + // TODO: alignment.. + let tls_data = unsafe { + std::alloc::alloc(std::alloc::Layout::from_size_align(size as usize, 8).unwrap()) + }; + assert!(!tls_data.is_null()); + + unsafe { ptr::copy_nonoverlapping(init.as_ptr(), tls_data, size as usize) }; + + THREAD_STATE.with(|state| unsafe { + (*(*state.state.borrow().teb).thread_local_storage_pointer)[module_tls_slot_idx] = + tls_data.cast::<()>() + }) + } else { + tracing::debug!("no TLS"); + } tracing::debug!("applying section protections"); for section in section_table { @@ -707,6 +837,8 @@ fn load_dll(dll_name: &str, executable_path: &Path) -> Option { let mmap = unsafe { &*(&**mmap as *const [u8]) }; let img: Image<'static> = load(&mmap, &path, true); + // TODO: we need to call DllMain!!! + // TODO: we need to call TLS callbacks!!! // Read the single export directory table from the front // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#export-directory-table @@ -833,3 +965,12 @@ fn find_dll(name: &str, executable_path: &Path) -> Option { None } + +#[cfg(target_arch = "x86_64")] +fn setup_thread(ptr: *mut ThreadEnvironmentBlock) { + // https://www.kernel.org/doc/html/next/x86/x86_64/fsgs.html + // requires fsgsbase which is_x86_feature_detected can't seem to detect? whatever. + unsafe { + std::arch::asm!("wrgsbase {}", in(reg) ptr); + } +} diff --git a/test/example_exe_tls_crt.exe b/test/example_exe_tls_crt.exe new file mode 100644 index 0000000..62f9298 Binary files /dev/null and b/test/example_exe_tls_crt.exe differ diff --git a/test/example_exe_tls_crt.rs b/test/example_exe_tls_crt.rs new file mode 100644 index 0000000..74e49f7 --- /dev/null +++ b/test/example_exe_tls_crt.rs @@ -0,0 +1,26 @@ +#![feature(thread_local)] +#![no_std] +#![no_main] + +#[panic_handler] +fn handle_panic(_: &core::panic::PanicInfo<'_>) -> ! { + loop {} +} + +#[thread_local] +static mut A_THREAD_LOCAL: u32 = 50; +#[thread_local] +static mut ANOTHER_THREAD_LOCAL: u32 = 55; + +#[inline(never)] +fn set_tls(value: u32) { + unsafe { A_THREAD_LOCAL = value; } + unsafe { ANOTHER_THREAD_LOCAL = value; } +} + +#[no_mangle] +pub extern "stdcall" fn main() -> u32 { + // Use some indirection to actually force TLS to happen + set_tls(14); + unsafe { A_THREAD_LOCAL + ANOTHER_THREAD_LOCAL } +} diff --git a/test2/tls_exe.rs b/test2/tls_exe.rs index 39b9bce..7f96cb0 100644 --- a/test2/tls_exe.rs +++ b/test2/tls_exe.rs @@ -26,5 +26,13 @@ pub extern "stdcall" fn mainCRTStartup() -> u32 { unsafe { A_THREAD_LOCAL + ANOTHER_THREAD_LOCAL } } -#[no_mangle] -pub extern "stdcall" fn _tls_index() {} +/* +!!!!!!!!!!!!!!! +THIS IS WRONG. WE ARE NOT CREATING THE TLS DIRECTORY. THAT WOULD BE OUR JOB. +!!!!!!!!!!!!!! +*/ + + +extern "stdcall" { + static _tls_index: usize; +}