Introduction
Let's see what we can do with this tool first. Consider the following program:
#include <Windows.h>
#include <stdio.h>
#pragma comment(lib, "TestDll.lib")
__declspec(dllimport) void export_function1();
__declspec(dllimport) void export_function2();
int main(int argc, char* argv[])
{
export_function1(); Sleep(1); getchar();
export_function2(); Sleep(1); getchar();
return 0;
}
Let's see how we can step through the calls in sync
mode:
And in async
mode:
Log file:
So the workflow is:
- Specify libraries used by target process, so their exports will be patched (we replace first two bytes of function with infinite loop, so the process will block, this ensures that we won't miss any API call)
- Launch target process (or perform some action that will trigger the execution of target process, it doesn't matter how it will start its execution)
- Now we can enter commands (to add another hooks, perform
sync
and async
waits, etc.)
Note that we can filter
API calls by loading filters. The job of the filter is to inspect the thread context, process memory, etc. and to return true
(if we are interested in this call) or false
(otherwise). Also in sync mode, we can stop on some call, attach debugger, do some stuff, detach debugger and continue like nothing happened. Another possibility is to add custom loggers that will dump register values and memory content to the file.
Using the Code
Now let's turn our attention to the code. I will inspect key moment's here, you can always download the sample to see the full code. Let's start with structure definitions:
struct FUNCTION_CONTEXT
{
DWORD64 Rip;
DWORD64 Rsp;
DWORD64 Rcx;
DWORD64 Rdx;
DWORD64 R8;
DWORD64 R9;
};
struct LIBRARY_ITEM;
struct FUNCTION_ITEM;
typedef BOOL(*TAux)(FUNCTION_ITEM *func, HANDLE hProcess, FUNCTION_CONTEXT *context);
struct FUNCTION_ITEM
{
LIBRARY_ITEM *lib;
char *Name;
UCHAR Bytes[BYTES_SIZE];
UCHAR *SyncTrampoline;
UCHAR SyncHook[BYTES_SIZE];
UCHAR *AsyncTrampoline;
UCHAR AsyncHook[BYTES_SIZE];
DWORD Offset;
DWORD RVA;
DWORD64 Rip;
HMODULE LibFilter;
TAux ProcFilter;
HMODULE LibLogger;
TAux ProcLogger;
DWORD Mode;
};
struct LIBRARY_ITEM
{
char *LibName;
char *FileName;
UCHAR *Base;
DWORD Count;
FUNCTION_ITEM *item;
};
FUNCTION_CONTEXT
structure holds register values in the moment of hook. LIBRARY_ITEM
structure holds information about module that holds functions to be hooked. FUNCTION_ITEM
structure holds information about hooked function. As you can see, we will use trampolines to implement API hooking, this technique is thread-safe and also allows us to hook "free" functions (not only "APIs" that are typically called through pointer in IAT).
Now let's see the code to patch library, it is executed before we enter target process id:
LIBRARY_ITEM* AddLibrary(char *FileName, char *LibName)
{
FILE *file;
DWORD size;
UCHAR *Image;
IMAGE_NT_HEADERS *Headers;
IMAGE_SECTION_HEADER *Sections;
DWORD ExportsRVA;
DWORD ExportsOffset;
IMAGE_EXPORT_DIRECTORY *Exports;
DWORD AddressOfFunctionsOffset;
DWORD AddressOfNamesOffset;
DWORD *AddressOfFunctions;
DWORD *AddressOfNames;
DWORD FunctionOffset;
DWORD NameOffset;
UCHAR *Function;
char *Name;
FUNCTION_ITEM *item;
LIBRARY_ITEM *lib;
DWORD Count;
DWORD RVA;
UCHAR SyncHook[SYNC_HOOK_SIZE];
GenerateSyncHook(NULL, SyncHook);
file = fopen(FileName, "rb");
fseek(file, 0, SEEK_END);
size = ftell(file);
Image = (UCHAR*)malloc(size);
fseek(file, 0, SEEK_SET);
fread(Image, size, 1, file);
fclose(file);
Headers = (IMAGE_NT_HEADERS64*)(Image + ((IMAGE_DOS_HEADER*)Image)->e_lfanew);
Sections = (IMAGE_SECTION_HEADER*)((UCHAR*)Headers +
(offsetof(IMAGE_NT_HEADERS, OptionalHeader) + Headers->FileHeader.SizeOfOptionalHeader));
ExportsRVA = Headers->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress;
ExportsOffset = TranslateRVAToOffset(Sections, ExportsRVA);
Exports = (IMAGE_EXPORT_DIRECTORY*)(Image + ExportsOffset);
if (Exports->NumberOfNames != Exports->NumberOfFunctions) DbgRaiseAssertionFailure();
AddressOfFunctionsOffset = TranslateRVAToOffset(Sections, Exports->AddressOfFunctions);
AddressOfNamesOffset = TranslateRVAToOffset(Sections, Exports->AddressOfNames);
AddressOfFunctions = (DWORD*)(Image + AddressOfFunctionsOffset);
AddressOfNames = (DWORD*)(Image + AddressOfNamesOffset);
Count = Exports->NumberOfNames;
item = (FUNCTION_ITEM*)malloc(sizeof(FUNCTION_ITEM) * Count);
lib = (LIBRARY_ITEM*)malloc(sizeof(LIBRARY_ITEM));
lib->Count = Count;
lib->Base = NULL;
lib->LibName = StringCopy(LibName);
lib->item = item;
lib->FileName = StringCopy(FileName);
for (DWORD i = 0; i < Count; ++i)
{
FunctionOffset = TranslateRVAToOffset(Sections, AddressOfFunctions[i]);
NameOffset = TranslateRVAToOffset(Sections, AddressOfNames[i]);
Function = Image + FunctionOffset;
Name = (char*)(Image + NameOffset);
RVA = GetTargetRVAFromStub(Function, AddressOfFunctions[i]);
if (RVA != AddressOfFunctions[i])
{
FunctionOffset = TranslateRVAToOffset(Sections, RVA);
Function = Image + FunctionOffset;
}
if (memcmp(Function, SyncHook, SYNC_HOOK_SIZE))
{
item[i].Name = StringCopy(Name);
item[i].RVA = RVA;
item[i].Rip = 0;
item[i].Offset = FunctionOffset;
item[i].LibFilter = NULL;
item[i].ProcFilter = NULL;
item[i].LibLogger = NULL;
item[i].ProcLogger = NULL;
item[i].SyncTrampoline = NULL;
item[i].AsyncTrampoline = NULL;
item[i].Mode = MODE_SYNC_HOOK;
item[i].lib = lib;
memcpy(item[i].Bytes, Function, BYTES_SIZE);
memcpy(Function, SyncHook, SYNC_HOOK_SIZE);
}
else
{
memset(&item[i], 0, sizeof(item[i]));
}
}
file = fopen(FileName, "wb");
fwrite(Image, size, 1, file);
fclose(file);
free(Image);
return lib;
}
As you can see, we patch library file, build LIBRARY_ITEM
structure that holds FUNCTION_ITEM
structure for each patched function.
Now let's consider the process of hooking.
Sync
hook:
- the process blocks (first few bytes of function are replaced by infinite loop)
- we enumerate all threads of target process, find the corresponding
FUNCTION_ITEM
structure - now we can debug the called function with debugger or just skip it (change thread instruction pointer, so it will point to sync trampoline that we previously generated)
Async
hook:
- first few bytes of function are replaced by instruction that fetches its own instruction pointer value and jump to the function exported by special DLL injected in the target process)
- this exported function collects the register values and passes this information to us (using sockets)
- we find corresponding
FUNCTION_ITEM
structure, log the function call and send the address of async trampoline back to target process (using sockets) - exported function receives the address of async trampoline and jumps to this address
Let's see the function to generate trampoline:
void GenerateTrampoline(HANDLE hProcess, DWORD64 Rip,
DWORD Mode, UCHAR *Bytes, UCHAR *TrampBytes, DWORD *TrampLen)
{
DWORD i = 0;
DWORD j = 0;
DWORD64 Address;
Buffer code_buf;
Instruction inst;
INT32 Offset;
UCHAR Rex;
UCHAR Opcode;
UCHAR Modrm;
DWORD HookLen;
DWORD AddrReg;
DWORD Reg;
if (Mode == MODE_SYNC_HOOK) HookLen = SYNC_HOOK_SIZE;
else if (Mode == MODE_ASYNC_HOOK) HookLen = ASYNC_HOOK_SIZE;
else DbgRaiseAssertionFailure();
while (TRUE)
{
if (Bytes[i] == 0xC3) {
TrampBytes[j] = Bytes[i];
++j;
break;
}
if (i >= HookLen)
{
Address = Rip + i;
GenerateDirectJump(Address, &TrampBytes[j]);
j += DIRECT_JUMP_SIZE;
break;
}
if (Bytes[i] == 0xEB) {
++i;
Offset = Bytes[i];
++i;
Address = Rip + i + Offset;
GenerateDirectJump(Address, &TrampBytes[j]);
j += DIRECT_JUMP_SIZE;
break;
}
if (Bytes[i] == 0xE9) {
++i;
Offset = *((INT32*)&Bytes[i]);
i += sizeof(Offset);
Address = Rip + i + Offset;
GenerateDirectJump(Address, &TrampBytes[j]);
j += DIRECT_JUMP_SIZE;
break;
}
if (Bytes[i] == 0xE8) {
++i;
Offset = *((INT32*)&Bytes[i]);
i += sizeof(Offset);
Address = Rip + i + Offset;
GenerateDirectCall(Address, &TrampBytes[j]);
j += DIRECT_CALL_SIZE;
continue;
}
if ((Bytes[i] >= 0x40) && (Bytes[i] <= 0x4F))
{
++i;
if ((Bytes[i] == 0xFF) && (Bytes[i + 1] == 0x25)) {
i += 2;
Offset = *((INT32*)&Bytes[i]);
i += sizeof(Offset);
ReadProcessMemoryEx(hProcess, Rip + i + Offset, (UCHAR*)&Address, sizeof(Address));
GenerateDirectJump(Address, &TrampBytes[j]);
j += DIRECT_JUMP_SIZE;
break;
}
if ((Bytes[i] == 0xFF) && (Bytes[i + 1] == 0x15)) {
i += 2;
Offset = *((INT32*)&Bytes[i]);
i += sizeof(Offset);
ReadProcessMemoryEx(hProcess, Rip + i + Offset, (UCHAR*)&Address, sizeof(Address));
GenerateDirectCall(Address, &TrampBytes[j]);
j += DIRECT_CALL_SIZE;
continue;
}
--i;
}
c_MakeBuffer(Bytes, BYTES_SIZE - i, (Encoding)0, &code_buf);
inst_set_params(&inst, MODE_64, C_TRUE, &code_buf, NULL,
SHOW_ADDRESS | SHOW_LOWER | SHOW_PSEUDO);
if (!decode(&inst)) DbgRaiseAssertionFailure();
if (inst.rip)
{
if ((Bytes[i] >= 0x40) && (Bytes[i] <= 0x4F))
{
Rex = Bytes[i];
++i;
}
else Rex = 0;
Opcode = Bytes[i];
++i;
Modrm = Bytes[i];
++i;
Offset = *((INT32*)&Bytes[i]);
i += sizeof(Offset);
Address = Rip + i + Offset;
Reg = (Modrm & 0x38) >> 3;
AddrReg = (Reg) ? (0) : (1);
GenerateRegisterOverride(AddrReg, Address, &TrampBytes[j]);
j += REGISTER_OVERRIDE_SIZE;
if (Rex)
{
TrampBytes[j] = Rex;
++j;
}
TrampBytes[j] = Opcode;
++j;
TrampBytes[j] = AddrReg | (Reg << 3);
++j;
GenerateRegisterRestore(AddrReg, &TrampBytes[j]);
j += REGISTER_RESTORE_SIZE;
}
else
{
memcpy(&TrampBytes[j], &Bytes[i], code_buf.i);
i += code_buf.i;
j += code_buf.i;
}
}
*TrampLen = j;
}
I use my own disassembler, it is included in the sample. However, you can replace it with another one, I just needed a fast way to find out whether instruction uses rip relative addressing.
Now functions to generate hooks:
void GenerateSyncHook(UCHAR *Bytes, UCHAR *HookBytes)
{
GenerateRelative8Jump(-(SYNC_HOOK_SIZE), HookBytes);
if (Bytes) memcpy(HookBytes + SYNC_HOOK_SIZE, Bytes + SYNC_HOOK_SIZE, BYTES_SIZE - SYNC_HOOK_SIZE);
}
void GenerateAsyncHook(UCHAR *Base, DWORD64 Rip, UCHAR *Bytes, UCHAR *HookBytes)
{
INT32 Offset;
GenerateRelativeCall(0, HookBytes);
Offset = (DWORD64)(Base + sizeof(IMAGE_DOS_HEADER)) - (Rip + ASYNC_HOOK_SIZE);
GenerateIndirectJump(Offset, HookBytes + RELATIVE_CALL_SIZE);
if (Bytes) memcpy(HookBytes + ASYNC_HOOK_SIZE, Bytes +
ASYNC_HOOK_SIZE, BYTES_SIZE - ASYNC_HOOK_SIZE);
}
Init
functions:
void InitSync(FUNCTION_ITEM *func, HANDLE hProcess)
{
DWORD TrampLen;
UCHAR TrampBytes[MAX_TRAMPOLINE_SIZE];
if (!func->SyncTrampoline)
{
GenerateTrampoline(hProcess, func->Rip, MODE_SYNC_HOOK, func->Bytes, TrampBytes, &TrampLen);
func->SyncTrampoline = (UCHAR*)VirtualAllocEx
(hProcess, NULL, TrampLen, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
WriteProcessMemoryEx(hProcess, (DWORD64)func->SyncTrampoline, TrampBytes, TrampLen);
GenerateSyncHook(func->Bytes, func->SyncHook);
}
}
void InitSync(LIBRARY_ITEM *lib, DWORD FilterMode, HANDLE hProcess)
{
for (DWORD i = 0; i < lib->Count; ++i)
{
if (lib->item[i].Name)
{
if (lib->item[i].Mode == FilterMode)
{
InitSync(&lib->item[i], hProcess);
}
}
}
}
void InitAsync(FUNCTION_ITEM *func, HANDLE hProcess)
{
DWORD TrampLen;
UCHAR TrampBytes[MAX_TRAMPOLINE_SIZE];
if (!func->AsyncTrampoline)
{
GenerateTrampoline(hProcess, func->Rip, MODE_ASYNC_HOOK, func->Bytes, TrampBytes, &TrampLen);
func->AsyncTrampoline =
(UCHAR*)VirtualAllocEx(hProcess, NULL, TrampLen, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
WriteProcessMemoryEx(hProcess, (DWORD64)func->AsyncTrampoline, TrampBytes, TrampLen);
GenerateAsyncHook(func->lib->Base, func->Rip, func->Bytes, func->AsyncHook);
}
}
void InitAsync(LIBRARY_ITEM *lib, DWORD FilterMode, HANDLE hProcess, UCHAR *pLog)
{
WriteProcessMemoryEx(hProcess,
(DWORD64)(lib->Base + sizeof(IMAGE_DOS_HEADER)), (UCHAR*)&pLog, sizeof(pLog));
for (DWORD i = 0; i < lib->Count; ++i)
{
if (lib->item[i].Name)
{
if (lib->item[i].Mode == FilterMode)
{
InitAsync(&lib->item[i], hProcess);
}
}
}
}
Functions to add export, "free" function, and object:
LIBRARY_ITEM* AddExport(char *LibName, char *ProcName, HANDLE hProcess)
{
HMODULE hModule;
LIBRARY_ITEM *lib;
UCHAR *Rip;
lib = NULL;
hModule = GetModuleHandleRemote(hProcess, LibName);
if (hModule)
{
Rip = GetProcAddressRemote(hProcess, hModule, ProcName);
if (Rip)
{
lib = (LIBRARY_ITEM*)malloc(sizeof(LIBRARY_ITEM));
lib->Base = (UCHAR*)hModule;
lib->Count = 1;
lib->FileName = NULL;
lib->LibName = StringCopy(LibName);
lib->item = (FUNCTION_ITEM*)malloc(sizeof(FUNCTION_ITEM));
lib->item->Name = StringCopy(ProcName);
lib->item->lib = lib;
lib->item->Offset = 0;
lib->item->Rip = (DWORD64)Rip;
lib->item->RVA = 0;
ReadProcessMemoryEx(hProcess, (DWORD64)Rip, lib->item->Bytes, BYTES_SIZE);
lib->item->LibFilter = NULL;
lib->item->ProcFilter = NULL;
lib->item->LibLogger = NULL;
lib->item->ProcLogger = NULL;
lib->item->SyncTrampoline = NULL;
lib->item->AsyncTrampoline = NULL;
InitSync(lib->item, hProcess);
SetHook(lib->item, hProcess, MODE_SYNC_HOOK);
}
}
return lib;
}
LIBRARY_ITEM* AddFunction(char *LibName, char *FunctionName, DWORD RVA, HANDLE hProcess)
{
HMODULE Module;
LIBRARY_ITEM *lib;
lib = NULL;
Module = GetModuleHandleRemote(hProcess, LibName);
if (Module)
{
lib = (LIBRARY_ITEM*)malloc(sizeof(LIBRARY_ITEM));
lib->Base = (UCHAR*)Module;
lib->Count = 1;
lib->FileName = NULL;
lib->LibName = StringCopy(LibName);
lib->item = (FUNCTION_ITEM*)malloc(sizeof(FUNCTION_ITEM));
lib->item->Rip = (DWORD64)Module + RVA;
ReadProcessMemoryEx(hProcess, lib->item->Rip, lib->item->Bytes, BYTES_SIZE);
lib->item->Name = StringCopy(FunctionName);
lib->item->LibFilter = NULL;
lib->item->ProcFilter = NULL;
lib->item->LibLogger = NULL;
lib->item->ProcLogger = NULL;
lib->item->SyncTrampoline = NULL;
lib->item->AsyncTrampoline = NULL;
lib->item->Offset = 0;
lib->item->RVA = 0;
lib->item->lib = lib;
InitSync(lib->item, hProcess);
SetHook(&lib->item[0], hProcess, MODE_SYNC_HOOK);
}
return lib;
}
LIBRARY_ITEM* AddObject(char *LibName, char *ObjectName, DWORD RVA, DWORD Count, HANDLE hProcess)
{
LIBRARY_ITEM *lib;
HMODULE Module;
UCHAR **Vtbl;
char Number[10];
lib = NULL;
Module = GetModuleHandleRemote(hProcess, LibName);
if (Module)
{
lib = (LIBRARY_ITEM*)malloc(sizeof(LIBRARY_ITEM));
lib->Base = (UCHAR*)Module;
lib->LibName = StringCopy(LibName);
lib->FileName = NULL;
lib->Count = Count;
lib->item = (FUNCTION_ITEM*)malloc(sizeof(FUNCTION_ITEM) * Count);
Vtbl = (UCHAR**)malloc(sizeof(UCHAR*) * Count);
ReadProcessMemoryEx(hProcess, (DWORD64)((UCHAR*)Module + RVA),
(UCHAR*)Vtbl, sizeof(UCHAR*) * Count);
for (DWORD i = 0; i < Count; ++i)
{
lib->item[i].Rip = (DWORD64)Vtbl[i];
ReadProcessMemoryEx(hProcess, (DWORD64)Vtbl[i], lib->item[i].Bytes, BYTES_SIZE);
lib->item[i].Name = (char*)malloc(strlen(ObjectName) + 5);
strcpy(lib->item[i].Name, ObjectName);
strcat(lib->item[i].Name, "::");
itoa(i, Number, 10);
strcat(lib->item[i].Name, Number);
lib->item[i].LibFilter = NULL;
lib->item[i].ProcFilter = NULL;
lib->item[i].LibLogger = NULL;
lib->item[i].ProcLogger = NULL;
lib->item[i].SyncTrampoline = NULL;
lib->item[i].AsyncTrampoline = NULL;
lib->item[i].Offset = 0;
lib->item[i].lib = lib;
InitSync(&lib->item[i], hProcess);
SetHook(&lib->item[i], hProcess, MODE_SYNC_HOOK);
}
free(Vtbl);
}
return lib;
}
Functions to set hook:
void SetHook(FUNCTION_ITEM *func, HANDLE hProcess, DWORD Mode)
{
if (Mode == MODE_NO_HOOK)
{
WriteProcessMemoryEx(hProcess, func->Rip, func->Bytes, BYTES_SIZE);
}
else if (Mode == MODE_SYNC_HOOK)
{
WriteProcessMemoryEx(hProcess, func->Rip, func->SyncHook, BYTES_SIZE);
}
else if (Mode == MODE_ASYNC_HOOK)
{
WriteProcessMemoryEx(hProcess, func->Rip, func->AsyncHook, BYTES_SIZE);
}
else DbgRaiseAssertionFailure();
func->Mode = Mode;
}
void SetHook(LIBRARY_ITEM *lib, DWORD FilterMode, HANDLE hProcess, DWORD Mode)
{
for (DWORD i = 0; i < lib->Count; ++i)
{
if (lib->item[i].Name)
{
if (lib->item[i].Mode == FilterMode)
{
SetHook(&lib->item[i], hProcess, Mode);
}
}
}
}
Function to check whether library is loaded by target process:
BOOL IsLibraryLoaded(LIBRARY_ITEM *lib, HANDLE hProcess)
{
HMODULE hModule;
hModule = GetModuleHandleRemote(hProcess, lib->LibName);
if (hModule)
{
if (!lib->Base)
{
lib->Base = (UCHAR*)hModule;
for (DWORD i = 0; i < lib->Count; ++i)
{
if (lib->item[i].Name)
{
lib->item[i].Rip = (DWORD64)(lib->Base + lib->item[i].RVA);
}
}
}
else
{
if (lib->Base != (UCHAR*)hModule) DbgRaiseAssertionFailure();
}
return TRUE;
}
else
{
lib->Base = NULL;
return FALSE;
}
}
Function to enumerate threads of target process and find the block:
BOOL GetSyncRip(HANDLE hProcess, FUNCTION_CONTEXT *context, HANDLE *phThread)
{
DWORD TID;
DWORD PID;
HANDLE h;
BOOL b;
HANDLE hThread;
THREADENTRY32 te;
CONTEXT lcContext;
UCHAR Buffer[SYNC_HOOK_SIZE];
UCHAR SyncHook[SYNC_HOOK_SIZE];
GenerateSyncHook(NULL, SyncHook);
b = FALSE;
TID = 0;
PID = GetProcessId(hProcess);
h = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0);
if (h != INVALID_HANDLE_VALUE)
{
te.dwSize = sizeof(te);
if (Thread32First(h, &te))
{
do
{
if (te.dwSize >= (FIELD_OFFSET(THREADENTRY32, th32OwnerProcessID) +
sizeof(te.th32OwnerProcessID)))
{
if (PID == te.th32OwnerProcessID)
{
TID = te.th32ThreadID;
hThread = OpenThread(PROCESS_ALL_ACCESS, FALSE, TID);
if (!hThread) DbgRaiseAssertionFailure();
lcContext.ContextFlags = CONTEXT_ALL;
if (!GetThreadContext(hThread, &lcContext)) DbgRaiseAssertionFailure();
ReadProcessMemoryEx(hProcess, lcContext.Rip, Buffer, sizeof(Buffer));
if (!memcmp(Buffer, SyncHook, SYNC_HOOK_SIZE))
{
context->Rip = lcContext.Rip;
context->Rsp = lcContext.Rsp;
context->Rcx = lcContext.Rcx;
context->Rdx = lcContext.Rdx;
context->R8 = lcContext.R8;
context->R9 = lcContext.R9;
*phThread = hThread;
b = TRUE;
break;
}
else CloseHandle(hThread);
}
}
te.dwSize = sizeof(te);
}
while (Thread32Next(h, &te));
}
CloseHandle(h);
}
return b;
}
Code that implements sync
and async
waits runs in the separate thread.
Thread function for sync
wait
:
DWORD WaitSyncThreadRoutine(SYNC_WAIT_THREAD_ARGS *args)
{
BOOL b;
HANDLE hThread;
FUNCTION_ITEM *func;
FUNCTION_CONTEXT context;
std::list<LIBRARY_ITEM*>::iterator lib_i;
while (TRUE)
{
b = FALSE;
while (!args->Exit)
{
Sleep(100);
b = GetSyncRip(args->hProcess, &context, &hThread);
if (b) break;
}
if (b)
{
func = NULL;
for (lib_i = g_slib.begin(); lib_i != g_slib.end(); ++lib_i)
{
if (IsLibraryLoaded((*lib_i), args->hProcess))
{
func = LookupItemByRip((*lib_i), context.Rip);
if (func) break;
}
}
if (func) goto found;
for (lib_i = g_lib.begin(); lib_i != g_lib.end(); ++lib_i)
{
func = LookupItemByRip((*lib_i), context.Rip);
if (func) break;
}
found:
if (func)
{
if ((!func->ProcFilter) || (func->ProcFilter(func, args->hProcess, &context)))
{
memcpy(&args->context, &context, sizeof(context));
args->hThread = hThread;
args->func = func;
printf("Found\n");
break;
}
else
{
InitSync(func, args->hProcess);
SkipItem(func, hThread);
CloseHandle(hThread);
}
}
else
{
CloseHandle(hThread);
DbgRaiseAssertionFailure();
}
}
}
return 0;
}
Thread function for async
wait
:
DWORD WaitAsyncThreadRoutine(ASYNC_WAIT_THREAD_ARGS *args)
{
fd_set set;
BOOL restored;
timeval timeout;
FUNCTION_ITEM *func;
FUNCTION_CONTEXT context;
std::list<LIBRARY_ITEM*>::iterator lib_i;
restored = FALSE;
for (lib_i = g_lib.begin(); lib_i != g_lib.end(); ++lib_i)
{
SetHook((*lib_i), MODE_SYNC_HOOK, args->hProcess, MODE_ASYNC_HOOK);
}
for (lib_i = g_slib.begin(); lib_i != g_slib.end(); ++lib_i)
{
if (IsLibraryLoaded((*lib_i), args->hProcess))
{
SetHook((*lib_i), MODE_SYNC_HOOK, args->hProcess, MODE_ASYNC_HOOK);
}
}
while (TRUE)
{
while (TRUE)
{
if (args->Exit)
{
if (!restored)
{
for (lib_i = g_lib.begin(); lib_i != g_lib.end(); ++lib_i)
{
SetHook((*lib_i), MODE_ASYNC_HOOK, args->hProcess, MODE_SYNC_HOOK);
}
for (lib_i = g_slib.begin(); lib_i != g_slib.end(); ++lib_i)
{
if (IsLibraryLoaded((*lib_i), args->hProcess))
{
SetHook((*lib_i), MODE_ASYNC_HOOK, args->hProcess, MODE_SYNC_HOOK);
}
}
restored = TRUE;
}
}
set.fd_count = 1;
set.fd_array[0] = args->Socket;
timeout.tv_sec = 5;
timeout.tv_usec = 0;
if (select(0, &set, NULL, NULL, &timeout)) break;
if (args->Exit) goto end;
}
if (recv(args->Socket, (char*)&context, sizeof(context), 0) == SOCKET_ERROR) goto end;
func = NULL;
for (lib_i = g_slib.begin(); lib_i != g_slib.end(); ++lib_i)
{
if (IsLibraryLoaded((*lib_i), args->hProcess))
{
func = LookupItemByRip((*lib_i), context.Rip);
if (func) break;
}
}
if (func) goto found;
for (lib_i = g_lib.begin(); lib_i != g_lib.end(); ++lib_i)
{
func = LookupItemByRip((*lib_i), context.Rip);
if (func) break;
}
found:
if (func)
{
if ((!func->ProcFilter) || (func->ProcFilter(func, args->hProcess, &context)))
{
if (!func->ProcLogger) LogItem(func, args->hProcess, &context);
else func->ProcLogger(func, args->hProcess, &context);
}
send(args->Socket, (char*)&func->AsyncTrampoline, sizeof(func->AsyncTrampoline), 0);
}
else DbgRaiseAssertionFailure();
}
end:
return 0;
}
Functions to inject code in the target process:
HMODULE LoadLibraryRemote(HANDLE hProcess, char *pLibName)
{
HANDLE hThread;
void *Proc, *Data;
LOAD_LIBRARY_THREAD_ARGS thread_args;
thread_args.pLoadLibraryA = LoadLibraryA;
thread_args.Module = NULL;
strcpy(thread_args.LibName, pLibName);
Proc = VirtualAllocEx(hProcess, NULL, LOAD_LIBRARY_THREAD_ROUTINE_SIZE,
MEM_COMMIT, PAGE_EXECUTE_READWRITE);
Data = VirtualAllocEx(hProcess, NULL, sizeof(thread_args), MEM_COMMIT, PAGE_READWRITE);
WriteProcessMemoryEx(hProcess, (DWORD64)Proc,
(UCHAR*)LoadLibraryThreadRoutine, LOAD_LIBRARY_THREAD_ROUTINE_SIZE);
WriteProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));
hThread = CreateRemoteThread(hProcess, NULL, 0, (LPTHREAD_START_ROUTINE)Proc, Data, 0, NULL);
WaitForSingleObject(hThread, INFINITE);
CloseHandle(hThread);
ReadProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));
VirtualFreeEx(hProcess, Proc, 0, MEM_RELEASE);
VirtualFreeEx(hProcess, Data, 0, MEM_RELEASE);
return thread_args.Module;
}
HMODULE GetModuleHandleRemote(HANDLE hProcess, char *pLibName)
{
HANDLE hThread;
void *Proc, *Data;
GET_MODULE_HANDLE_THREAD_ARGS thread_args;
thread_args.pGetModuleHandleA = GetModuleHandleA;
thread_args.Module = NULL;
strcpy(thread_args.LibName, pLibName);
Proc = VirtualAllocEx(hProcess, NULL, GET_MODULE_HANDLE_THREAD_ROUTINE_SIZE,
MEM_COMMIT, PAGE_EXECUTE_READWRITE);
Data = VirtualAllocEx(hProcess, NULL, sizeof(thread_args), MEM_COMMIT, PAGE_READWRITE);
WriteProcessMemoryEx(hProcess, (DWORD64)Proc,
(UCHAR*)GetModuleHandleThreadRoutine, GET_MODULE_HANDLE_THREAD_ROUTINE_SIZE);
WriteProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));
hThread = CreateRemoteThread(hProcess, NULL, 0, (LPTHREAD_START_ROUTINE)Proc, Data, 0, NULL);
WaitForSingleObject(hThread, INFINITE);
CloseHandle(hThread);
ReadProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));
VirtualFreeEx(hProcess, Proc, 0, MEM_RELEASE);
VirtualFreeEx(hProcess, Data, 0, MEM_RELEASE);
return thread_args.Module;
}
UCHAR* GetProcAddressRemote(HANDLE hProcess, HMODULE hModule, char *pProcName)
{
DWORD64 Rip;
UCHAR Bytes[BYTES_SIZE];
void *Proc, *Data;
HANDLE hThread;
GET_PROC_ADDRESS_THREAD_ARGS thread_args;
thread_args.pGetProcAddress = GetProcAddress;
thread_args.Module = hModule;
thread_args.Proc = NULL;
strcpy(thread_args.ProcName, pProcName);
Proc = VirtualAllocEx(hProcess, NULL,
GET_PROCESS_ADDRESS_THREAD_ROUTINE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
Data = VirtualAllocEx(hProcess, NULL, sizeof(thread_args), MEM_COMMIT, PAGE_READWRITE);
WriteProcessMemoryEx(hProcess, (DWORD64)Proc,
(UCHAR*)GetProcAddressThreadRoutine, GET_PROCESS_ADDRESS_THREAD_ROUTINE_SIZE);
WriteProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));
hThread = CreateRemoteThread(hProcess, NULL, 0, (LPTHREAD_START_ROUTINE)Proc, Data, 0, NULL);
WaitForSingleObject(hThread, INFINITE);
CloseHandle(hThread);
ReadProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));
VirtualFreeEx(hProcess, Proc, 0, MEM_RELEASE);
VirtualFreeEx(hProcess, Data, 0, MEM_RELEASE);
Rip = (DWORD64)thread_args.Proc;
ReadProcessMemoryEx(hProcess, Rip, Bytes, BYTES_SIZE);
Rip = GetTargetAddressFromStub(hProcess, Bytes, Rip);
return (UCHAR*)Rip;
}
void FreeLibraryRemote(HANDLE hProcess, HMODULE hModule)
{
HANDLE hThread;
void *Proc, *Data;
FREE_LIBRARY_THREAD_ARGS thread_args;
thread_args.pFreeLibrary = FreeLibrary;
thread_args.Module = hModule;
Proc = VirtualAllocEx(hProcess, NULL, FREE_LIBRARY_THREAD_ROUTINE_SIZE,
MEM_COMMIT, PAGE_EXECUTE_READWRITE);
Data = VirtualAllocEx(hProcess, NULL, sizeof(thread_args), MEM_COMMIT, PAGE_READWRITE);
WriteProcessMemoryEx(hProcess, (DWORD64)Proc, (UCHAR*)FreeLibraryThreadRoutine,
FREE_LIBRARY_THREAD_ROUTINE_SIZE);
WriteProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));
hThread = CreateRemoteThread(hProcess, NULL, 0, (LPTHREAD_START_ROUTINE)Proc, Data, 0, NULL);
WaitForSingleObject(hThread, INFINITE);
CloseHandle(hThread);
ReadProcessMemoryEx(hProcess, (DWORD64)Data, (UCHAR*)&thread_args, sizeof(thread_args));
VirtualFreeEx(hProcess, Proc, 0, MEM_RELEASE);
VirtualFreeEx(hProcess, Data, 0, MEM_RELEASE);
}
Now let's turn our attention to a special DLL injected in the target process to implement async
hooks. The exported function:
PUBLIC Log
EXTERN LogImp : PROC
.code
Log PROC
pop rax ; get function rip + 5
sub rax, 5 ; get function rip
sub rsp, 48 ; make space for context structure
mov qword ptr [rsp], rax ; store rip
lea rax, [rsp + 48] ; get function rsp
mov qword ptr [rsp + 8], rax ; store rsp
mov qword ptr [rsp + 16], rcx ; store rcx
mov qword ptr [rsp + 24], rdx ; store rdx
mov qword ptr [rsp + 32], r8 ; store r8
mov qword ptr [rsp + 40], r9 ; store r9
mov rcx, rsp ; one parameter
sub rsp, 32 ; shadow space
call LogImp ; returns address of trampoline
add rsp, 32 ; shadow space
mov r9, qword ptr [rsp + 40] ; restore r9
mov r8, qword ptr [rsp + 32] ; restore r8
mov rdx, qword ptr [rsp + 24] ; restore rdx
mov rcx, qword ptr [rsp + 16] ; restore rcx
add rsp, 48 ; restore rsp
jmp rax ; jump to trampoline
Log ENDP
END
The async
hook looks like this:
call $+5
sub rsp, 8
mov dword ptr [rsp], Log ; low dword of Log address
mov dword ptr [rsp + 4], Log >> 32 ; high dword of Log address
ret
So the first two instructions of Log
function puts the address of hooked function in rax
register.
Now LogImp
function:
extern "C" DWORD64 LogImp(FUNCTION_CONTEXT *context)
{
DWORD64 Trampoline;
EnterCriticalSection(&g_Section);
send(g_Socket, (const char*)context, sizeof(*context), 0);
recv(g_Socket, (char*)&Trampoline, sizeof(Trampoline), 0);
LeaveCriticalSection(&g_Section);
return Trampoline;
}
Basically, that's it! Thank you for reading.