From 11cb62c1ed07e0c07c236739f816107f3f78bdf3 Mon Sep 17 00:00:00 2001 From: flobernd Date: Sat, 1 Nov 2014 03:42:37 +0100 Subject: [PATCH] Improved formatter performance Added symbol resolver example --- Examples/PerformanceTest/Main.cpp | 42 +++-- Examples/SymbolResolver/Main.cpp | 153 +++++++++++++++++- .../VXInstructionFormatter.cpp | 83 +++++----- .../VXInstructionFormatter.h | 1 + .../VXSymbolResolver.cpp | 9 +- 5 files changed, 231 insertions(+), 57 deletions(-) diff --git a/Examples/PerformanceTest/Main.cpp b/Examples/PerformanceTest/Main.cpp index 648b182..36f1383 100644 --- a/Examples/PerformanceTest/Main.cpp +++ b/Examples/PerformanceTest/Main.cpp @@ -40,30 +40,38 @@ using namespace Disassembler; void testDecodingAndFormatting(uintptr_t baseAddress, PIMAGE_NT_HEADERS ntHeaders) { + uint32_t sizeTotal = 0; VXInstructionInfo info; VXInstructionDecoder decoder; VXIntelInstructionFormatter formatter; +#ifdef _M_X64 + decoder.setDisassemblerMode(VXDisassemblerMode::M32BIT); +#else decoder.setDisassemblerMode(VXDisassemblerMode::M64BIT); - PIMAGE_SECTION_HEADER sectionHeader = - reinterpret_cast( - reinterpret_cast(ntHeaders) + sizeof(IMAGE_NT_HEADERS) - + ntHeaders->FileHeader.SizeOfOptionalHeader - sizeof(IMAGE_OPTIONAL_HEADER)); - // Decode and format all code sections - for (unsigned int i = 0; i < ntHeaders->FileHeader.NumberOfSections; ++i) +#endif + while (sizeTotal < 1024 * 1024 * 50) { - if (sectionHeader->Characteristics & IMAGE_SCN_CNT_CODE) + PIMAGE_SECTION_HEADER sectionHeader = + reinterpret_cast( + reinterpret_cast(ntHeaders) + sizeof(IMAGE_NT_HEADERS) + + ntHeaders->FileHeader.SizeOfOptionalHeader - sizeof(IMAGE_OPTIONAL_HEADER)); + // Decode and format all code sections + for (unsigned int i = 0; i < ntHeaders->FileHeader.NumberOfSections; ++i) { - std::cout << sectionHeader->SizeOfRawData / 1024 << " KiB" << std::endl; - VXMemoryDataSource input(reinterpret_cast( - baseAddress + sectionHeader->VirtualAddress), sectionHeader->SizeOfRawData); - decoder.setDataSource(&input); - decoder.setInstructionPointer(baseAddress + sectionHeader->VirtualAddress); - while (decoder.decodeInstruction(info)) + if (sectionHeader->Characteristics & IMAGE_SCN_CNT_CODE) { - formatter.formatInstruction(info); + VXMemoryDataSource input(reinterpret_cast( + baseAddress + sectionHeader->VirtualAddress), sectionHeader->SizeOfRawData); + decoder.setDataSource(&input); + decoder.setInstructionPointer(baseAddress + sectionHeader->VirtualAddress); + while (decoder.decodeInstruction(info)) + { + formatter.formatInstruction(info); + } + sizeTotal += sectionHeader->SizeOfRawData; } + sectionHeader++; } - sectionHeader++; } } @@ -87,8 +95,8 @@ int _tmain(int argc, _TCHAR* argv[]) return 1; } - double pcFrequency = 0.0; - uint64_t pcStart = 0; + double pcFrequency; + uint64_t pcStart; LARGE_INTEGER li; // Start the performance counter diff --git a/Examples/SymbolResolver/Main.cpp b/Examples/SymbolResolver/Main.cpp index afe5a5f..ec71f95 100644 --- a/Examples/SymbolResolver/Main.cpp +++ b/Examples/SymbolResolver/Main.cpp @@ -30,9 +30,160 @@ **************************************************************************************************/ #include +#include +#include +#include +#include "VXDisassembler.h" +#include + +using namespace Verteron; +using namespace Disassembler; int _tmain(int argc, _TCHAR* argv[]) { - // TODO: + // Find module base in memory + void *moduleBase = GetModuleHandle(L"kernel32.dll"); + uintptr_t baseAddress = reinterpret_cast(moduleBase); + // Parse PE headers + PIMAGE_DOS_HEADER dosHeader = static_cast(moduleBase); + if (dosHeader->e_magic != IMAGE_DOS_SIGNATURE) + { + return 1; + } + PIMAGE_NT_HEADERS ntHeaders = + reinterpret_cast(baseAddress + dosHeader->e_lfanew); + if (ntHeaders->Signature != IMAGE_NT_SIGNATURE) + { + return 1; + } + // Initialize disassembler + VXInstructionInfo info; + VXInstructionDecoder decoder; + VXExactSymbolResolver resolver; + VXIntelInstructionFormatter formatter; + decoder.setDisassemblerMode(VXDisassemblerMode::M64BIT); + formatter.setSymbolResolver(&resolver); + // Initialize output stream + std::ofstream out; + out.open(".\\output.txt"); + // Find all call and jump targets + uint64_t subCount = 0; + uint64_t locCount = 0; + PIMAGE_SECTION_HEADER sectionHeader = + reinterpret_cast( + reinterpret_cast(ntHeaders) + sizeof(IMAGE_NT_HEADERS) + + ntHeaders->FileHeader.SizeOfOptionalHeader - sizeof(IMAGE_OPTIONAL_HEADER)); + for (unsigned int i = 0; i < ntHeaders->FileHeader.NumberOfSections; ++i) + { + if (sectionHeader->Characteristics & IMAGE_SCN_CNT_CODE) + { + VXMemoryDataSource input(reinterpret_cast( + baseAddress + sectionHeader->VirtualAddress), sectionHeader->SizeOfRawData); + decoder.setDataSource(&input); + decoder.setInstructionPointer(baseAddress + sectionHeader->VirtualAddress); + while (decoder.decodeInstruction(info)) + { + // Skip invalid instructions and non-relative instructions + if ((info.flags & IF_ERROR_MASK) || !(info.flags & IF_RELATIVE)) + { + continue; + } + switch (info.mnemonic) + { + case VXInstructionMnemonic::CALL: + resolver.setSymbol(VDECalcAbsoluteTarget(info, info.operand[0]), + std::string("sub_" + std::to_string(subCount)).c_str()); + subCount++; + break; + case VXInstructionMnemonic::JMP: + case VXInstructionMnemonic::JO: + case VXInstructionMnemonic::JNO: + case VXInstructionMnemonic::JB: + case VXInstructionMnemonic::JNB: + case VXInstructionMnemonic::JE: + case VXInstructionMnemonic::JNE: + case VXInstructionMnemonic::JBE: + case VXInstructionMnemonic::JA: + case VXInstructionMnemonic::JS: + case VXInstructionMnemonic::JNS: + case VXInstructionMnemonic::JP: + case VXInstructionMnemonic::JNP: + case VXInstructionMnemonic::JL: + case VXInstructionMnemonic::JGE: + case VXInstructionMnemonic::JLE: + case VXInstructionMnemonic::JG: + case VXInstructionMnemonic::JCXZ: + case VXInstructionMnemonic::JECXZ: + case VXInstructionMnemonic::JRCXZ: + resolver.setSymbol(VDECalcAbsoluteTarget(info, info.operand[0]), + std::string("loc_" + std::to_string(locCount)).c_str()); + locCount++; + break; + default: + break; + } + } + } + sectionHeader++; + } + // Add entry point symbol + resolver.setSymbol(baseAddress + ntHeaders->OptionalHeader.AddressOfEntryPoint, "EntryPoint"); + // Add exported symbols + if (ntHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress > 0) + { + PIMAGE_EXPORT_DIRECTORY exports = + reinterpret_cast(reinterpret_cast(baseAddress) + + ntHeaders->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT].VirtualAddress); + PDWORD address = + reinterpret_cast(reinterpret_cast(baseAddress) + + exports->AddressOfFunctions); + PDWORD name = + reinterpret_cast(reinterpret_cast(baseAddress) + + exports->AddressOfNames); + PWORD ordinal = + reinterpret_cast(reinterpret_cast(baseAddress) + + exports->AddressOfNameOrdinals); + for(unsigned int i = 0; i < exports->NumberOfNames; ++i) + { + resolver.setSymbol(baseAddress + address[ordinal[i]], + reinterpret_cast(baseAddress) + name[i]); + } + } + // Disassemble + sectionHeader = + reinterpret_cast( + reinterpret_cast(ntHeaders) + sizeof(IMAGE_NT_HEADERS) + + ntHeaders->FileHeader.SizeOfOptionalHeader - sizeof(IMAGE_OPTIONAL_HEADER)); + for (unsigned int i = 0; i < ntHeaders->FileHeader.NumberOfSections; ++i) + { + if (sectionHeader->Characteristics & IMAGE_SCN_CNT_CODE) + { + VXMemoryDataSource input(reinterpret_cast( + baseAddress + sectionHeader->VirtualAddress), sectionHeader->SizeOfRawData); + decoder.setDataSource(&input); + decoder.setInstructionPointer(baseAddress + sectionHeader->VirtualAddress); + while (decoder.decodeInstruction(info)) + { + uint64_t offset; + const char *symbol = resolver.resolveSymbol(info, info.instrAddress, offset); + if (symbol) + { + out << symbol << ": " << std::endl; + } + out << " " << std::hex << std::setw(16) << std::setfill('0') + << info.instrAddress << " "; + if (info.flags & IF_ERROR_MASK) + { + out << "db " << std::hex << std::setw(2) << std::setfill('0') + << static_cast(info.data[0]) << std::endl; + } else + { + out << formatter.formatInstruction(info) << std::endl; + } + } + } + sectionHeader++; + } + out.close(); return 0; } diff --git a/VerteronDisassemblerEngine/VXInstructionFormatter.cpp b/VerteronDisassemblerEngine/VXInstructionFormatter.cpp index 1fdf80c..2225bc9 100644 --- a/VerteronDisassemblerEngine/VXInstructionFormatter.cpp +++ b/VerteronDisassemblerEngine/VXInstructionFormatter.cpp @@ -96,13 +96,14 @@ const char* VXBaseInstructionFormatter::m_registerStrings[] = "rip" }; -void VXBaseInstructionFormatter::internalFormatInstruction(VXInstructionInfo const& info) +void VXBaseInstructionFormatter::internalFormatInstruction(const VXInstructionInfo &info) { // Nothing to do here } VXBaseInstructionFormatter::VXBaseInstructionFormatter() : m_symbolResolver(nullptr) + , m_outputStringLen(0) , m_uppercase(false) { @@ -110,6 +111,7 @@ VXBaseInstructionFormatter::VXBaseInstructionFormatter() VXBaseInstructionFormatter::VXBaseInstructionFormatter(VXBaseSymbolResolver *symbolResolver) : m_symbolResolver(symbolResolver) + , m_outputStringLen(0) , m_uppercase(false) { @@ -137,7 +139,7 @@ VXBaseInstructionFormatter::~VXBaseInstructionFormatter() void VXBaseInstructionFormatter::outputClear() { - m_outputBuffer.clear(); + m_outputStringLen = 0; } char const* VXBaseInstructionFormatter::outputString() @@ -145,65 +147,72 @@ char const* VXBaseInstructionFormatter::outputString() return &m_outputBuffer[0]; } -void VXBaseInstructionFormatter::outputAppend(char const *text) -{ + void VXBaseInstructionFormatter::outputAppend(char const *text) + { // Get the string length including the null-terminator char size_t strLen = strlen(text) + 1; - // Get the buffer capacity and size - size_t bufCap = m_outputBuffer.capacity(); + // Get the buffer size size_t bufLen = m_outputBuffer.size(); - // Decrease the offset by one, to exclude already existing null-terminator chars in the + // Decrease the offset by one, to exclude already existing null-terminator chars in the // output buffer - size_t offset = (bufLen) ? bufLen - 1 : 0; + size_t offset = (m_outputStringLen) ? m_outputStringLen - 1 : 0; // Resize capacity of the output buffer on demand and add some extra space to improve the - // performance - if (bufCap <= (bufLen + strLen)) + // performance + if (bufLen <= (m_outputStringLen + strLen)) { - m_outputBuffer.reserve(bufCap + strLen + 256); + m_outputBuffer.resize(bufLen + strLen + 512); } - // Append the text - m_outputBuffer.resize(offset + strLen); + // Write the text to the output buffer memcpy(&m_outputBuffer[offset], text, strLen); + // Increase the string length + m_outputStringLen = offset + strLen; // Convert to uppercase if (m_uppercase) { - for (size_t i = offset; i < m_outputBuffer.size() - 1; ++i) + for (size_t i = offset; i < m_outputStringLen - 1; ++i) { m_outputBuffer[i] = toupper(m_outputBuffer[i]); } } -} + } -void VXBaseInstructionFormatter::outputAppendFormatted(char const *format, ...) -{ + void VXBaseInstructionFormatter::outputAppendFormatted(char const *format, ...) + { va_list arguments; va_start(arguments, format); - // Get the string length including the null-terminator char - size_t strLen = _vscprintf(format, arguments) + 1; - // Get the buffer capacity and size - size_t bufCap = m_outputBuffer.capacity(); + // Get the buffer size size_t bufLen = m_outputBuffer.size(); - // Decrease the offset by one, to exclude already existing null-terminator chars in the + // Decrease the offset by one, to exclude already existing null-terminator chars in the // output buffer - size_t offset = (bufLen) ? bufLen - 1 : 0; - if (strLen > 1) + size_t offset = (m_outputStringLen) ? m_outputStringLen - 1 : 0; + // Resize the output buffer on demand and add some extra space to improve the performance + if ((bufLen - m_outputStringLen) < 256) { - // Resize capacity of the output buffer on demand and add some extra space to improve the - // performance - if (bufCap < (bufLen + strLen)) + bufLen = bufLen + 512; + m_outputBuffer.resize(bufLen); + } + int strLen = 0; + do + { + // If the formatted text did not fit in the output buffer, resize it, and try again + if (strLen < 0) { - m_outputBuffer.reserve(bufCap + strLen + 256); + m_outputBuffer.resize(bufLen + 512); + return outputAppendFormatted(format, arguments); } - // Append the formatted text - m_outputBuffer.resize(offset + strLen); - vsnprintf_s(&m_outputBuffer[offset], strLen, strLen, format, arguments); - // Convert to uppercase - if (m_uppercase) + // Write the formatted text to the output buffer + assert((bufLen - offset) > 0); + strLen = + vsnprintf_s(&m_outputBuffer[offset], bufLen - offset, _TRUNCATE, format, arguments); + } while (strLen < 0); + // Increase the string length + m_outputStringLen = offset + strLen + 1; + // Convert to uppercase + if (m_uppercase) + { + for (size_t i = offset; i < m_outputStringLen - 1; ++i) { - for (size_t i = offset; i < m_outputBuffer.size() - 1; ++i) - { - m_outputBuffer[i] = toupper(m_outputBuffer[i]); - } + m_outputBuffer[i] = toupper(m_outputBuffer[i]); } } va_end(arguments); diff --git a/VerteronDisassemblerEngine/VXInstructionFormatter.h b/VerteronDisassemblerEngine/VXInstructionFormatter.h index 149d419..50cd69e 100644 --- a/VerteronDisassemblerEngine/VXInstructionFormatter.h +++ b/VerteronDisassemblerEngine/VXInstructionFormatter.h @@ -50,6 +50,7 @@ private: static const char *m_registerStrings[]; VXBaseSymbolResolver *m_symbolResolver; std::vector m_outputBuffer; + size_t m_outputStringLen; bool m_uppercase; protected: /** diff --git a/VerteronDisassemblerEngine/VXSymbolResolver.cpp b/VerteronDisassemblerEngine/VXSymbolResolver.cpp index d729a5c..51fac85 100644 --- a/VerteronDisassemblerEngine/VXSymbolResolver.cpp +++ b/VerteronDisassemblerEngine/VXSymbolResolver.cpp @@ -59,13 +59,18 @@ const char* VXExactSymbolResolver::resolveSymbol(const VXInstructionInfo &info, uint64_t &offset) { std::unordered_map::const_iterator iterator = m_symbolMap.find(address); - return (iterator == m_symbolMap.end()) ? nullptr : iterator->second.c_str(); + if (iterator != m_symbolMap.end()) + { + offset = 0; + return iterator->second.c_str(); + } + return nullptr; } bool VXExactSymbolResolver::containsSymbol(uint64_t address) const { std::unordered_map::const_iterator iterator = m_symbolMap.find(address); - return (iterator == m_symbolMap.end()) ? false : true; + return (iterator != m_symbolMap.end()); } void VXExactSymbolResolver::setSymbol(uint64_t address, const char* name)