Introduction
A few years ago I was considering creating my own byte-code interpreted language. One of the goals for the language was to be able to call native code as directly as possible from the byte-code. I think the idea was that the byte-code compiler would setup generic call instructions, and the interpreter would determine whether the destination address was inside the byte-code memory block. If it wasn't, it would use the CNativeInvoke class, which I am now sharing with all of you - perhaps someone can take this and build the byte-code
interpreted language that I didn't write.
CNativeInvoke
CNativeInvoke exposes methods for defining whether a generated call is __cdecl or __stdcall, setting a "this" pointer for C++ class/interface calls, adding parameters, and executing the calls (with or without return values). CNativeInvoke makes use of the Windows API VirtualAlloc() to allocate memory for the dynamic code block and VirtualProtect() to mark that memory executable.
The comments in the code show you which assembly instructions are being written into memory. I don't claim to be an assembly expert at all, so it took me some time to figure out which opcodes and such to use. Visual Studio's mapping from C++ to ASM was helpful during this research.
NativeInvoke.h
#pragma once
#define SIZE_NATIVE_INVOKE_PAGE 1024
#define MAX_NATIVE_PARAMS 63
class CNativeInvoke
{
private:
LPBYTE m_pbPage;
INT m_nWritePtr;
INT m_cParams;
BOOL m_fStackCleanup;
DWORD_PTR m_dwThisPtr;
public:
CNativeInvoke (BOOL fStackCleanup = TRUE , DWORD_PTR dwThisPtr = 0);
~CNativeInvoke ();
HRESULT Initialize (VOID);
VOID SetStackCleanup (BOOL fStackCleanup);
VOID SetThisPtr (DWORD_PTR dwThisPtr);
VOID Reset (VOID);
HRESULT AddParam8 (BYTE bParam);
HRESULT AddParam16 (WORD wParam);
HRESULT AddParam32 (DWORD dwParam);
HRESULT Call (DWORD_PTR dwPtr);
HRESULT Call (DWORD_PTR dwPtr, DWORD* pdwReturn);
HRESULT Call (DWORD_PTR dwPtr, DWORDLONG* pdwlReturn);
protected:
VOID EmitCall (DWORD_PTR dwPtr);
VOID EmitOpCode (BYTE bOpCode, DWORD dwValue);
VOID EmitOpCode (BYTE bOpCode, BYTE bOperand, DWORD dwValue);
HRESULT Execute (VOID);
};
NativeInvoke.cpp
#include <windows.h>
#include "Assert.h" // Change this to include your own Assert(x) macro
#include "NativeInvoke.h"
#define DWORDPTR(p) (DWORD)(DWORD_PTR)(p)
CNativeInvoke::CNativeInvoke (BOOL fStackCleanup, DWORD_PTR dwThisPtr)
{
m_pbPage = NULL;
m_nWritePtr = 0;
m_cParams = 0;
m_fStackCleanup = fStackCleanup;
m_dwThisPtr = dwThisPtr;
}
CNativeInvoke::~CNativeInvoke ()
{
if(m_pbPage)
VirtualFree(m_pbPage,SIZE_NATIVE_INVOKE_PAGE,MEM_RELEASE);
}
HRESULT CNativeInvoke::Initialize (VOID)
{
HRESULT hr;
Assert(NULL == m_pbPage);
m_pbPage = (LPBYTE)VirtualAlloc(NULL,SIZE_NATIVE_INVOKE_PAGE,MEM_COMMIT |
MEM_RESERVE,PAGE_EXECUTE_READWRITE);
if(m_pbPage)
{
Reset();
hr = S_OK;
}
else
hr = HRESULT_FROM_WIN32(GetLastError());
return hr;
}
VOID CNativeInvoke::SetStackCleanup (BOOL fStackCleanup)
{
m_fStackCleanup = fStackCleanup;
}
VOID CNativeInvoke::SetThisPtr (DWORD_PTR dwThisPtr)
{
m_dwThisPtr = dwThisPtr;
}
VOID CNativeInvoke::Reset (VOID)
{
m_pbPage[0] = 0x55; m_pbPage[1] = 0x8B; m_pbPage[2] = 0xEC;
m_nWritePtr = 3;
m_cParams = 0;
}
HRESULT CNativeInvoke::AddParam8 (BYTE bParam)
{
HRESULT hr;
if(m_nWritePtr + 5 < SIZE_NATIVE_INVOKE_PAGE && m_cParams < MAX_NATIVE_PARAMS)
{
m_pbPage[m_nWritePtr] = 0x68; m_pbPage[m_nWritePtr + 1] = bParam;
m_pbPage[m_nWritePtr + 2] = 0;
m_pbPage[m_nWritePtr + 3] = 0;
m_pbPage[m_nWritePtr + 4] = 0;
m_nWritePtr += 5;
m_cParams++;
hr = S_OK;
}
else
hr = E_FAIL;
return hr;
}
HRESULT CNativeInvoke::AddParam16 (WORD wParam)
{
HRESULT hr;
if(m_nWritePtr + 5 < SIZE_NATIVE_INVOKE_PAGE && m_cParams < MAX_NATIVE_PARAMS)
{
m_pbPage[m_nWritePtr] = 0x68; m_pbPage[m_nWritePtr + 1] = (BYTE)(wParam & 0xFF);
m_pbPage[m_nWritePtr + 2] = (BYTE)(wParam >> 8);
m_pbPage[m_nWritePtr + 3] = 0;
m_pbPage[m_nWritePtr + 4] = 0;
m_nWritePtr += 5;
m_cParams++;
hr = S_OK;
}
else
hr = E_FAIL;
return hr;
}
HRESULT CNativeInvoke::AddParam32 (DWORD dwParam)
{
HRESULT hr;
if(m_nWritePtr + 5 < SIZE_NATIVE_INVOKE_PAGE && m_cParams < MAX_NATIVE_PARAMS)
{
m_pbPage[m_nWritePtr] = 0x68; m_pbPage[m_nWritePtr + 1] = (BYTE)(dwParam & 0xFF);
m_pbPage[m_nWritePtr + 2] = (BYTE)(dwParam >> 8);
m_pbPage[m_nWritePtr + 3] = (BYTE)(dwParam >> 16);
m_pbPage[m_nWritePtr + 4] = (BYTE)(dwParam >> 24);
m_nWritePtr += 5;
m_cParams++;
hr = S_OK;
}
else
hr = E_FAIL;
return hr;
}
HRESULT CNativeInvoke::Call (DWORD_PTR dwPtr)
{
HRESULT hr;
if(m_nWritePtr + 17 < SIZE_NATIVE_INVOKE_PAGE)
{
INT nWritePtr = m_nWritePtr;
EmitCall(dwPtr);
m_pbPage[m_nWritePtr] = 0x5D; m_pbPage[m_nWritePtr + 1] = 0xC3; m_nWritePtr += 2;
hr = Execute();
m_nWritePtr = nWritePtr;
}
else
hr = E_FAIL;
return hr;
}
HRESULT CNativeInvoke::Call (DWORD_PTR dwPtr, DWORD* pdwReturn)
{
HRESULT hr;
if(m_nWritePtr + 22 < SIZE_NATIVE_INVOKE_PAGE)
{
INT nWritePtr = m_nWritePtr;
EmitCall(dwPtr);
EmitOpCode(0xA3, DWORDPTR(pdwReturn));
m_pbPage[m_nWritePtr] = 0x5D; m_pbPage[m_nWritePtr + 1] = 0xC3; m_nWritePtr += 2;
hr = Execute();
m_nWritePtr = nWritePtr;
}
else
hr = E_FAIL;
return hr;
}
HRESULT CNativeInvoke::Call (DWORD_PTR dwPtr, DWORDLONG* pdwlReturn)
{
HRESULT hr;
if(m_nWritePtr + 28 < SIZE_NATIVE_INVOKE_PAGE)
{
DWORD* pdwReturn = (DWORD*)pdwlReturn;
INT nWritePtr = m_nWritePtr;
EmitCall(dwPtr);
EmitOpCode(0xA3, DWORDPTR(pdwReturn)); EmitOpCode(0x89, 0x15, DWORDPTR(pdwReturn + 1));
m_pbPage[m_nWritePtr] = 0x5D; m_pbPage[m_nWritePtr + 1] = 0xC3; m_nWritePtr += 2;
hr = Execute();
m_nWritePtr = nWritePtr;
}
else
hr = E_FAIL;
return hr;
}
VOID CNativeInvoke::EmitCall (DWORD_PTR dwPtr)
{
Assert(m_nWritePtr + 15 < SIZE_NATIVE_INVOKE_PAGE);
if(0 != m_dwThisPtr)
EmitOpCode(0xB9,(DWORD)m_dwThisPtr);
m_pbPage[m_nWritePtr] = 0xB8; m_pbPage[m_nWritePtr + 1] = (BYTE)(dwPtr & 0xFF);
m_pbPage[m_nWritePtr + 2] = (BYTE)(dwPtr >> 8);
m_pbPage[m_nWritePtr + 3] = (BYTE)(dwPtr >> 16);
m_pbPage[m_nWritePtr + 4] = (BYTE)(dwPtr >> 24);
m_pbPage[m_nWritePtr + 5] = 0xFF; m_pbPage[m_nWritePtr + 6] = 0xD0;
m_nWritePtr += 7;
if(0 < m_cParams && m_fStackCleanup)
{
m_pbPage[m_nWritePtr] = 0x83; m_pbPage[m_nWritePtr + 1] = 0xC4;
m_pbPage[m_nWritePtr + 2] = (BYTE)(m_cParams * sizeof(DWORD));
m_nWritePtr += 3;
}
}
VOID CNativeInvoke::EmitOpCode (BYTE bOpCode, DWORD dwValue)
{
Assert(m_nWritePtr + 5 < SIZE_NATIVE_INVOKE_PAGE);
m_pbPage[m_nWritePtr] = bOpCode;
m_pbPage[m_nWritePtr + 1] = (BYTE)(dwValue & 0xFF);
m_pbPage[m_nWritePtr + 2] = (BYTE)(dwValue >> 8);
m_pbPage[m_nWritePtr + 3] = (BYTE)(dwValue >> 16);
m_pbPage[m_nWritePtr + 4] = (BYTE)(dwValue >> 24);
m_nWritePtr += 5;
}
VOID CNativeInvoke::EmitOpCode (BYTE bOpCode, BYTE bOperand, DWORD dwValue)
{
Assert(m_nWritePtr + 6 < SIZE_NATIVE_INVOKE_PAGE);
m_pbPage[m_nWritePtr] = bOpCode;
m_pbPage[m_nWritePtr + 1] = bOperand;
m_pbPage[m_nWritePtr + 2] = (BYTE)(dwValue & 0xFF);
m_pbPage[m_nWritePtr + 3] = (BYTE)(dwValue >> 8);
m_pbPage[m_nWritePtr + 4] = (BYTE)(dwValue >> 16);
m_pbPage[m_nWritePtr + 5] = (BYTE)(dwValue >> 24);
m_nWritePtr += 6;
}
HRESULT CNativeInvoke::Execute (VOID)
{
HRESULT hr;
DWORD dwPrevProtection;
if(VirtualProtect(m_pbPage, m_nWritePtr, PAGE_EXECUTE, &dwPrevProtection))
{
VOID (WINAPI* pfnCall)(VOID) = (VOID(WINAPI*)(VOID))m_pbPage;
if(FlushInstructionCache(GetCurrentProcess(), pfnCall, m_nWritePtr))
{
pfnCall();
hr = S_OK;
}
else
hr = HRESULT_FROM_WIN32(GetLastError());
VirtualProtect(m_pbPage, m_nWritePtr, dwPrevProtection, &dwPrevProtection);
}
else
hr = HRESULT_FROM_WIN32(GetLastError());
return hr;
}
Using the code
Before trying to compile this code, make sure to provide a definition for the Assert(x) macro used in NativeInvoke.cpp.
Be sure to build this code as x86. It will run on 64-bit Windows using WoW64, but it has to be built x86 (32-bit) itself.
Once you've got this code compiling, using it is easy. Here are some examples:
DWORDLONG MyTestFunction (int a, int b)
{
printf("MyTestFunction(): a = %d, b = %d\n",a,b);
return (DWORDLONG)a * (DWORDLONG)b;
}
DWORD MyOtherTestFunction (int a, int b)
{
printf("MyOtherTestFunction(): a = %d, b = %d\n",a,b);
return (DWORD)(a - b);
}
interface ITest
{
virtual VOID Whatever (int a, int b) = 0;
};
class CTest : public ITest
{
protected:
INT m_n;
public:
CTest () { m_n = 10; }
~CTest () {}
VOID Whatever (int a, int b)
{
printf("CTest::Whatever(): m_n = %d\n",m_n);
printf("a = %d, b = %d\n",a,b);
}
};
INT main (INT cArgs, __in_ecount(cArgs) PSTR* ppszArgs)
{
CNativeInvoke Native;
CTest Test;
ITest* lpTest = &Test;
union
{
VOID (ITest::*pWhatever)(int,int);
DWORD dwWhatever;
};
pWhatever = &ITest::Whatever;
if(SUCCEEDED(Native.Initialize()))
{
DWORDLONG dwResult64 = 0;
DWORD dwResult32 = 0;
Native.AddParam32(150000); Native.AddParam32(200000);
Native.Call((DWORD)MyTestFunction, &dwResult64);
printf("Result: %I64u\r\n", dwResult64);
Native.Call((DWORD)MyOtherTestFunction, &dwResult32);
printf("Result: %u\r\n", dwResult32);
Native.SetThisPtr((DWORD_PTR)lpTest);
Native.SetStackCleanup(FALSE); Native.Call(dwWhatever);
}
return 0;
}
If everything compiles and runs correctly, you'll see this output:
MyTestFunction(): a = 200000, b = 150000
Result: 30000000000
MyOtherTestFunction(): a = 200000, b = 150000
Result: 50000
CTest::Whatever(): m_n = 10
a = 200000, b = 150000
Next Steps?
I'd love to see someone incorporate this into a byte-code interpreter or something similar. It'd also be great
if someone could re-implement CNativeInvoke's methods to generate x64 or ARM instructions. Using pre-processor macros internally
to select the implementation based on the platform would be great!