static TLS works for the main exe!!

This commit is contained in:
nora 2025-02-09 20:47:51 +01:00
parent 5c284548bd
commit a6de0298f2
7 changed files with 214 additions and 30 deletions

View file

@ -10,3 +10,4 @@ a PE loader for educational purposes.
- https://learn.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-search-order - https://learn.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-search-order
- https://www.geoffchappell.com/studies/windows/win32/apisetschema/index.htm?tx=1 - https://www.geoffchappell.com/studies/windows/win32/apisetschema/index.htm?tx=1
- http://www.nynaeve.net/?p=180 - http://www.nynaeve.net/?p=180
- https://github.com/mingw-w64/mingw-w64

View file

@ -1,3 +1,4 @@
{ pkgs ? import <nixpkgs> { } }: pkgs.mkShell { { pkgs ? import <nixpkgs> { } }: pkgs.mkShell {
nativeBuildInputs = with pkgs; [ lld_18 rustup ]; nativeBuildInputs = with pkgs; [ lld_18 rustup ];
packages = with pkgs; [ gef ];
} }

View file

@ -238,11 +238,15 @@ emulate!(
emulate!( emulate!(
"api-ms-win-crt-runtime-l1-1-0.dll", "api-ms-win-crt-runtime-l1-1-0.dll",
mod api_ms_win_crt_runtime_l1_1_0 { mod api_ms_win_crt_runtime_l1_1_0 {
fn __p___argc() { fn __p___argc() -> *const u32 {
todo!("__p___argc") static ARGC: i32 = 1;
(&raw const ARGC).cast()
} }
fn __p___argv() { /// returns the address of argv
todo!("__p___argv") fn __p___argv() -> *const *const *const u8 {
static EMPTY_ARGS: [usize; 1] = [0];
static ARGV: &[usize; 1] = &EMPTY_ARGS;
(&raw const ARGV).cast()
} }
fn _c_exit() { fn _c_exit() {
todo!("_c_exit") todo!("_c_exit")
@ -259,8 +263,8 @@ emulate!(
fn _exit() { fn _exit() {
todo!("_exit") todo!("_exit")
} }
fn _get_initial_narrow_environment() { fn _get_initial_narrow_environment() -> *const () {
todo!("_get_initial_narrow_environment") std::ptr::null()
} }
fn _initialize_narrow_environment() { fn _initialize_narrow_environment() {
todo!("_initialize_narrow_environment") todo!("_initialize_narrow_environment")
@ -268,11 +272,11 @@ emulate!(
fn _initialize_onexit_table() { fn _initialize_onexit_table() {
todo!("_initialize_onexit_table") todo!("_initialize_onexit_table")
} }
fn _initterm() { /// <https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/initterm-initterm-e?view=msvc-170>
todo!("_initterm") fn _initterm(_start: *const (), _end: *const ()) {}
} /// <https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/initterm-initterm-e?view=msvc-170>
fn _initterm_e() { fn _initterm_e(_start: *const (), _end: *const ()) -> u32 {
todo!("_initterm_e") 0
} }
fn _register_onexit_function() { fn _register_onexit_function() {
todo!("_register_onexit_function") todo!("_register_onexit_function")
@ -286,8 +290,11 @@ emulate!(
fn _set_app_type() { fn _set_app_type() {
todo!("_set_app_type") todo!("_set_app_type")
} }
fn exit() { /// <https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/exit-exit-exit?view=msvc-170>
todo!("exit") fn exit(code: i32) -> ! {
tracing::info!("application requested exit with code {code}");
// TODO: we need to do all kinds of cleanup
std::process::exit(code);
} }
fn terminate() { fn terminate() {
todo!("terminate") todo!("terminate")

View file

@ -2,11 +2,13 @@ mod emulated;
mod sys; mod sys;
use std::{ use std::{
cell::RefCell,
collections::HashMap, collections::HashMap,
ffi::{CStr, CString}, ffi::{CStr, CString},
fmt::Debug, fmt::Debug,
ops::{Deref, DerefMut}, ops::{Deref, DerefMut},
path::{Path, PathBuf}, path::{Path, PathBuf},
ptr,
sync::{ sync::{
atomic::{AtomicU64, Ordering}, atomic::{AtomicU64, Ordering},
LazyLock, Mutex, LazyLock, Mutex,
@ -251,31 +253,89 @@ struct TlsDirectory {
/// Note that this is a VA that should have been relocated earlier. /// Note that this is a VA that should have been relocated earlier.
raw_data_start_va: u64, raw_data_start_va: u64,
/// The last byte of the TLS. /// The last byte of the TLS.
raw_data_env_va: u64, raw_data_end_va: u64,
address_of_index: u64, address_of_index: u64,
address_of_callbacks: u64, address_of_callbacks: u64,
size_of_zero_fill: u32, size_of_zero_fill: u32,
characteristics: u32, characteristics: u32,
} }
#[expect(dead_code)]
const DLL_PROCESS_DETACH: u32 = 0;
const DLL_PROCESS_ATTACH: u32 = 1;
#[expect(dead_code)]
const DLL_THREAD_ATTACH: u32 = 2;
#[expect(dead_code)]
const DLL_THREAD_DETACH: u32 = 3;
/// <https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#tls-callback-functions>
type TlsCallback =
unsafe extern "win64" fn(dll_handle: *const (), reason: u32, reserved: *const ());
const IMAGE_FILE_MACHINE_AMD64: u16 = 0x8664; const IMAGE_FILE_MACHINE_AMD64: u16 = 0x8664;
const IMAGE_FILE_MACHINE_ARM64: u16 = 0xaa64; const IMAGE_FILE_MACHINE_ARM64: u16 = 0xaa64;
pub fn execute(pe: &[u8], executable_path: &Path) { pub fn execute(pe: &[u8], executable_path: &Path) {
let mut main_tls_slots = [ptr::null_mut(); 64];
let mut main_teb = ThreadEnvironmentBlock {
tib: ThreadInformationBlock {
exception_list: ptr::null(),
stack_base: ptr::null(),
stack_limit: ptr::null(),
sub_system_tib: ptr::null(),
fiber_data: ptr::null(),
arbitrary_user_pointer: ptr::null(),
this: ptr::null(),
},
environment_pointer: ptr::null(),
client_id_unique_process: 0,
client_id_unique_thread: 0,
active_rpc_handle: ptr::null(),
thread_local_storage_pointer: &raw mut main_tls_slots,
};
main_teb.tib.this = &raw const main_teb;
THREAD_STATE.with(|state| state.state.borrow_mut().teb = &raw mut main_teb);
GLOBAL_STATE.state.lock().unwrap().executable_path = Some(executable_path.to_owned()); GLOBAL_STATE.state.lock().unwrap().executable_path = Some(executable_path.to_owned());
let image = load(pe, executable_path, false); let image = load(pe, executable_path, false);
let entrypoint = image.base + image.opt_header.address_of_entry_point as usize; let entrypoint = image.base + image.opt_header.address_of_entry_point as usize;
tracing::debug!("YOLO to {:#x}", entrypoint); tracing::debug!("YOLO to {:#x}", entrypoint);
setup_thread(&raw mut main_teb);
post_load(&image);
unsafe { unsafe {
let entrypoint = let entrypoint =
std::mem::transmute::<usize, unsafe extern "win64" fn() -> u32>(entrypoint); std::mem::transmute::<usize, unsafe extern "win64" fn() -> u32>(entrypoint);
setup_thread(&raw mut main_teb);
let result = entrypoint(); let result = entrypoint();
tracing::info!("result: {result}"); tracing::info!("result: {result}");
}; };
} }
fn post_load(image: &Image<'_>) {
tracing::debug!("call TLS callbacks");
let Some(tls_directory) = bytemuck::cast_slice::<u8, TlsDirectory>(
&image[image.opt_header.tls_table.rva as usize..]
[..image.opt_header.tls_table.size as usize],
)
.get(0) else {
return;
};
let mut ptr = tls_directory.address_of_callbacks as *const Option<TlsCallback>;
while let Some(cb) = unsafe { *ptr } {
tracing::debug!("calling TLS callback at {ptr:p}");
unsafe { cb(image.base as _, DLL_PROCESS_ATTACH, ptr::null()) }
unsafe {
ptr = ptr.add(1);
}
}
}
#[derive(Clone)] #[derive(Clone)]
struct Image<'pe> { struct Image<'pe> {
base: usize, base: usize,
@ -325,6 +385,14 @@ struct TheGlobalState {
executable_path: Option<PathBuf>, executable_path: Option<PathBuf>,
hmodule_to_dll: HashMap<u64, LoadedDll>, hmodule_to_dll: HashMap<u64, LoadedDll>,
next_emulated_hmodule_idx: AtomicU64, next_emulated_hmodule_idx: AtomicU64,
tls_slots: Vec<TlsSlot>,
}
enum TlsSlot {
Static {
#[expect(dead_code)]
init: &'static [u8],
},
} }
struct GlobalStateWrapper { struct GlobalStateWrapper {
@ -344,6 +412,22 @@ impl GlobalStateWrapper {
} }
} }
struct ThreadState {
teb: *mut ThreadEnvironmentBlock,
}
struct ThreadStateWrapper {
state: RefCell<ThreadState>,
}
std::thread_local! {
static THREAD_STATE: ThreadStateWrapper = ThreadStateWrapper {
state: RefCell::new(ThreadState {
teb: ptr::null_mut()
}),
};
}
static GLOBAL_STATE: GlobalStateWrapper = GlobalStateWrapper { static GLOBAL_STATE: GlobalStateWrapper = GlobalStateWrapper {
state: LazyLock::new(|| { state: LazyLock::new(|| {
Mutex::new(TheGlobalState { Mutex::new(TheGlobalState {
@ -351,18 +435,34 @@ static GLOBAL_STATE: GlobalStateWrapper = GlobalStateWrapper {
executable_path: None, executable_path: None,
hmodule_to_dll: HashMap::new(), hmodule_to_dll: HashMap::new(),
next_emulated_hmodule_idx: AtomicU64::new(1), next_emulated_hmodule_idx: AtomicU64::new(1),
tls_slots: Vec::new(),
}) })
}), }),
}; };
#[repr(C)] #[repr(C)]
struct ThreadEnvironmentBlock { struct ThreadInformationBlock {
host_thread_ptr: *const (), exception_list: *const (),
_pad: [u8; 80], stack_base: *const (),
thing: *const (), stack_limit: *const (),
sub_system_tib: *const (),
fiber_data: *const (),
arbitrary_user_pointer: *const (),
this: *const ThreadEnvironmentBlock,
} }
const _: () = assert!(std::mem::offset_of!(ThreadEnvironmentBlock, thing) == 88);
// https://github.com/wine-mirror/wine/blob/1aff1e6a370ee8c0213a0fd4b220d121da8527aa/include/winternl.h#L347
#[repr(C)]
struct ThreadEnvironmentBlock {
tib: ThreadInformationBlock,
environment_pointer: *const (),
client_id_unique_process: u64, // handle
client_id_unique_thread: u64, // handle,
active_rpc_handle: *const (),
thread_local_storage_pointer: *mut [*mut (); 64],
}
const _: [(); 88] =
[(); std::mem::offset_of!(ThreadEnvironmentBlock, thread_local_storage_pointer)];
#[tracing::instrument(skip(pe, is_dll))] #[tracing::instrument(skip(pe, is_dll))]
fn load<'pe>(pe: &'pe [u8], executable_path: &Path, is_dll: bool) -> Image<'pe> { fn load<'pe>(pe: &'pe [u8], executable_path: &Path, is_dll: bool) -> Image<'pe> {
@ -582,14 +682,44 @@ fn load_inner<'pe>(pe: &'pe [u8], executable_path: &Path, is_dll: bool) -> Image
} }
} }
/* // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#the-tls-section
not what's happening?
tracing::debug!("load TLS"); if opt_header.tls_table.size > 0 {
let tls_directory = bytemuck::cast_slice::<u8, TlsDirectory>( let tls_directory = bytemuck::cast_slice::<u8, TlsDirectory>(
&image[opt_header.tls_table.rva as usize..][..opt_header.tls_table.size as usize], &image[opt_header.tls_table.rva as usize..][..opt_header.tls_table.size as usize],
); )[0];
tracing::debug!(?tls_directory, "TLS directory"); // Each module's data gets a slot in the TLS.
*/ tracing::debug!("load TLS");
let mut state = GLOBAL_STATE.state.lock().unwrap();
let module_tls_slot_idx = state.tls_slots.len();
// TODO: hi i think i need to do something here to assign the correct index so that the DLL understands what it's supposed to do.
tracing::debug!(?tls_directory, "TLS directory");
let size = tls_directory.raw_data_end_va - tls_directory.raw_data_start_va;
assert!(size > 0);
let init = unsafe {
std::slice::from_raw_parts(tls_directory.raw_data_start_va as *const u8, size as usize)
};
state.tls_slots.push(TlsSlot::Static { init });
drop(state);
// TODO: alignment..
let tls_data = unsafe {
std::alloc::alloc(std::alloc::Layout::from_size_align(size as usize, 8).unwrap())
};
assert!(!tls_data.is_null());
unsafe { ptr::copy_nonoverlapping(init.as_ptr(), tls_data, size as usize) };
THREAD_STATE.with(|state| unsafe {
(*(*state.state.borrow().teb).thread_local_storage_pointer)[module_tls_slot_idx] =
tls_data.cast::<()>()
})
} else {
tracing::debug!("no TLS");
}
tracing::debug!("applying section protections"); tracing::debug!("applying section protections");
for section in section_table { for section in section_table {
@ -707,6 +837,8 @@ fn load_dll(dll_name: &str, executable_path: &Path) -> Option<LoadedDll> {
let mmap = unsafe { &*(&**mmap as *const [u8]) }; let mmap = unsafe { &*(&**mmap as *const [u8]) };
let img: Image<'static> = load(&mmap, &path, true); let img: Image<'static> = load(&mmap, &path, true);
// TODO: we need to call DllMain!!!
// TODO: we need to call TLS callbacks!!!
// Read the single export directory table from the front // Read the single export directory table from the front
// https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#export-directory-table // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#export-directory-table
@ -833,3 +965,12 @@ fn find_dll(name: &str, executable_path: &Path) -> Option<DllLocation> {
None None
} }
#[cfg(target_arch = "x86_64")]
fn setup_thread(ptr: *mut ThreadEnvironmentBlock) {
// https://www.kernel.org/doc/html/next/x86/x86_64/fsgs.html
// requires fsgsbase which is_x86_feature_detected can't seem to detect? whatever.
unsafe {
std::arch::asm!("wrgsbase {}", in(reg) ptr);
}
}

Binary file not shown.

View file

@ -0,0 +1,26 @@
#![feature(thread_local)]
#![no_std]
#![no_main]
#[panic_handler]
fn handle_panic(_: &core::panic::PanicInfo<'_>) -> ! {
loop {}
}
#[thread_local]
static mut A_THREAD_LOCAL: u32 = 50;
#[thread_local]
static mut ANOTHER_THREAD_LOCAL: u32 = 55;
#[inline(never)]
fn set_tls(value: u32) {
unsafe { A_THREAD_LOCAL = value; }
unsafe { ANOTHER_THREAD_LOCAL = value; }
}
#[no_mangle]
pub extern "stdcall" fn main() -> u32 {
// Use some indirection to actually force TLS to happen
set_tls(14);
unsafe { A_THREAD_LOCAL + ANOTHER_THREAD_LOCAL }
}

View file

@ -26,5 +26,13 @@ pub extern "stdcall" fn mainCRTStartup() -> u32 {
unsafe { A_THREAD_LOCAL + ANOTHER_THREAD_LOCAL } unsafe { A_THREAD_LOCAL + ANOTHER_THREAD_LOCAL }
} }
#[no_mangle] /*
pub extern "stdcall" fn _tls_index() {} !!!!!!!!!!!!!!!
THIS IS WRONG. WE ARE NOT CREATING THE TLS DIRECTORY. THAT WOULD BE OUR JOB.
!!!!!!!!!!!!!!
*/
extern "stdcall" {
static _tls_index: usize;
}