Added CS vs Zydis diff code & various fixes
- Fixed various porting bugs in the Zydis `CapstoneTokenizer` - Added Capstone vs Zydis tokenizing diff and various exceptions for known issues
This commit is contained in:
parent
4c841d85c6
commit
0711ac09df
|
@ -6,6 +6,7 @@
|
|||
#include "MainWindow.h"
|
||||
#include "CachedFontMetrics.h"
|
||||
#include "QBeaEngine.h"
|
||||
#include "CsQBeaEngine.h"
|
||||
#include "MemoryPage.h"
|
||||
|
||||
Disassembly::Disassembly(QWidget* parent) : AbstractTableView(parent), mDisassemblyPopup(this)
|
||||
|
@ -32,6 +33,8 @@ Disassembly::Disassembly(QWidget* parent) : AbstractTableView(parent), mDisassem
|
|||
|
||||
mDisasm = new QBeaEngine(maxModuleSize);
|
||||
mDisasm->UpdateConfig();
|
||||
mCsDisasm = new CsQBeaEngine(maxModuleSize);
|
||||
mCsDisasm->UpdateConfig();
|
||||
|
||||
mCodeFoldingManager = nullptr;
|
||||
duint setting;
|
||||
|
@ -1493,7 +1496,76 @@ Instruction_t Disassembly::DisassembleAt(dsint rva)
|
|||
if(!mMemPage->read(wBuffer.data(), rva, wBuffer.size()))
|
||||
return Instruction_t();
|
||||
|
||||
return mDisasm->DisassembleAt((byte_t*)wBuffer.data(), wBuffer.size(), base, rva);
|
||||
auto zy_instr = mDisasm->DisassembleAt((byte_t*)wBuffer.data(), wBuffer.size(), base, rva);
|
||||
auto cs_instr = mCsDisasm->DisassembleAt((byte_t*)wBuffer.data(), wBuffer.size(), base, rva);
|
||||
|
||||
if (zy_instr.tokens.tokens != cs_instr.tokens.tokens)
|
||||
{
|
||||
if (zy_instr.instStr.startsWith("lea")) // cs scales lea mem op incorrectly
|
||||
goto _exit;
|
||||
if (cs_instr.instStr.startsWith("movabs")) // cs uses non-standard movabs mnem
|
||||
goto _exit;
|
||||
if (cs_instr.instStr.startsWith("lock") || cs_instr.instStr.startsWith("rep")) // cs includes prefix in mnem
|
||||
goto _exit;
|
||||
if (cs_instr.instStr.startsWith('j') && cs_instr.length == 4) // cs has AMD style handling of 66 branches
|
||||
goto _exit;
|
||||
if (cs_instr.instStr.startsWith("prefetchw")) // cs uses m8 (AMD/intel doc), zy m512
|
||||
goto _exit; // (doesn't matter, prefetch doesn't really have a size)
|
||||
if (cs_instr.instStr.startsWith("xchg")) // cs/zy print operands in different order (doesn't make any diff)
|
||||
goto _exit;
|
||||
if (cs_instr.instStr.startsWith("rdpmc") ||
|
||||
cs_instr.instStr.startsWith("in") ||
|
||||
cs_instr.instStr.startsWith("out") ||
|
||||
cs_instr.instStr.startsWith("sti") ||
|
||||
cs_instr.instStr.startsWith("cli") ||
|
||||
cs_instr.instStr.startsWith("iret")) // cs assumes priviliged, zydis doesn't (CPL is configurable for those)
|
||||
goto _exit;
|
||||
if (cs_instr.instStr.startsWith("sal")) // cs says sal, zydis say shl (both correct)
|
||||
goto _exit;
|
||||
if (cs_instr.instStr.startsWith("xlat")) // cs uses xlatb form, zydis xlat m8 form (both correct)
|
||||
goto _exit;
|
||||
if (cs_instr.instStr.startsWith("lcall") ||
|
||||
cs_instr.instStr.startsWith("ljmp") ||
|
||||
cs_instr.instStr.startsWith("retf")) // cs uses "f" mnem-suffic, zydis has seperate "far" token
|
||||
goto _exit;
|
||||
if (cs_instr.instStr.startsWith("movsxd")) // cs has wrong operand size (32) for 0x63 variant (e.g. "63646566")
|
||||
goto _exit;
|
||||
if (cs_instr.instStr.startsWith('j') && (cs_instr.dump[0] & 0x40) == 0x40) // cs honors rex.w on jumps, truncating the
|
||||
goto _exit; // target address to 32 bit (must be ignored)
|
||||
if (cs_instr.instStr.startsWith("enter")) // cs has wrong operand size (32)
|
||||
goto _exit;
|
||||
if (cs_instr.instStr.startsWith("wait")) // cs says wait, zy says fwait (both ok)
|
||||
goto _exit;
|
||||
if (cs_instr.dump.length() > 2 && // cs ignores segment prefixes if followed by branch hints
|
||||
cs_instr.dump[1] == '\x2e' &&
|
||||
cs_instr.dump[2] == '\x3e')
|
||||
goto _exit;
|
||||
|
||||
auto insn_hex = zy_instr.dump.toHex().toStdString();
|
||||
auto cs = cs_instr.instStr.toStdString();
|
||||
auto zy = zy_instr.instStr.toStdString();
|
||||
|
||||
for (auto zy_it = zy_instr.tokens.tokens.begin(), cs_it = cs_instr.tokens.tokens.begin()
|
||||
; zy_it != zy_instr.tokens.tokens.end() && cs_it != cs_instr.tokens.tokens.end()
|
||||
; ++zy_it, ++cs_it)
|
||||
{
|
||||
auto zy_tok_text = zy_it->text.toStdString();
|
||||
auto cs_tok_text = cs_it->text.toStdString();
|
||||
|
||||
if (zy_tok_text == "bnd") // cs doesn't support BND prefix
|
||||
goto _exit;
|
||||
if (zy_it->value.size != cs_it->value.size) // imm sizes in CS are completely broken
|
||||
goto _exit;
|
||||
|
||||
if (!(*zy_it == *cs_it))
|
||||
__debugbreak();
|
||||
}
|
||||
|
||||
//__debugbreak();
|
||||
}
|
||||
|
||||
_exit:
|
||||
return zy_instr;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
class CodeFoldingHelper;
|
||||
class QBeaEngine;
|
||||
class CsQBeaEngine;
|
||||
class MemoryPage;
|
||||
|
||||
class Disassembly : public AbstractTableView
|
||||
|
@ -223,6 +224,7 @@ protected:
|
|||
bool mPopupEnabled;
|
||||
MemoryPage* mMemPage;
|
||||
QBeaEngine* mDisasm;
|
||||
CsQBeaEngine* mCsDisasm;
|
||||
bool mShowMnemonicBrief;
|
||||
XREF_INFO mXrefInfo;
|
||||
CodeFoldingHelper* mCodeFoldingManager;
|
||||
|
|
|
@ -0,0 +1,329 @@
|
|||
#include "CsQBeaEngine.h"
|
||||
#include "StringUtil.h"
|
||||
#include "EncodeMap.h"
|
||||
#include "CodeFolding.h"
|
||||
|
||||
CsQBeaEngine::CsQBeaEngine(int maxModuleSize)
|
||||
: _tokenizer(maxModuleSize), mCodeFoldingManager(nullptr), _bLongDataInst(false)
|
||||
{
|
||||
CsCapstoneTokenizer::UpdateColors();
|
||||
UpdateDataInstructionMap();
|
||||
this->mEncodeMap = new EncodeMap();
|
||||
}
|
||||
|
||||
CsQBeaEngine::~CsQBeaEngine()
|
||||
{
|
||||
delete this->mEncodeMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the address of the nth instruction before the instruction pointed by ip. @n
|
||||
* This function has been grabbed from OllyDbg ("Disassembleback" in asmserv.c)
|
||||
*
|
||||
* @param[in] data Address of the data to disassemble
|
||||
* @param[in] base Original base address of the memory page (Required to disassemble destination addresses)
|
||||
* @param[in] size Size of the data block pointed by data
|
||||
* @param[in] ip RVA of the current instruction (Relative to data pointer)
|
||||
* @param[in] n Number of instruction back
|
||||
*
|
||||
* @return Return the RVA (Relative to the data pointer) of the nth instruction before the instruction pointed by ip
|
||||
*/
|
||||
ulong CsQBeaEngine::DisassembleBack(byte_t* data, duint base, duint size, duint ip, int n)
|
||||
{
|
||||
int i;
|
||||
uint abuf[128], addr, back, cmdsize;
|
||||
unsigned char* pdata;
|
||||
|
||||
// Reset Disasm Structure
|
||||
Capstone cp;
|
||||
|
||||
// Check if the pointer is not null
|
||||
if(data == NULL)
|
||||
return 0;
|
||||
|
||||
// Round the number of back instructions to 127
|
||||
if(n < 0)
|
||||
n = 0;
|
||||
else if(n > 127)
|
||||
n = 127;
|
||||
|
||||
// Check if the instruction pointer ip is not outside the memory range
|
||||
if(ip >= size)
|
||||
ip = size - 1;
|
||||
|
||||
// Obvious answer
|
||||
if(n == 0)
|
||||
return ip;
|
||||
|
||||
if(ip < (uint)n)
|
||||
return ip;
|
||||
|
||||
//TODO: buffer overflow due to unchecked "back" value
|
||||
back = MAX_DISASM_BUFFER * (n + 3); // Instruction length limited to 16
|
||||
|
||||
if(ip < back)
|
||||
back = ip;
|
||||
|
||||
addr = ip - back;
|
||||
if(mCodeFoldingManager && mCodeFoldingManager->isFolded(addr + base))
|
||||
{
|
||||
duint newback = mCodeFoldingManager->getFoldBegin(addr + base);
|
||||
if(newback >= base && newback < size + base)
|
||||
addr = newback - base;
|
||||
}
|
||||
|
||||
pdata = data + addr;
|
||||
|
||||
for(i = 0; addr < ip; i++)
|
||||
{
|
||||
abuf[i % 128] = addr;
|
||||
|
||||
if(mCodeFoldingManager && mCodeFoldingManager->isFolded(addr + base))
|
||||
{
|
||||
duint newaddr = mCodeFoldingManager->getFoldBegin(addr + base);
|
||||
if(newaddr >= base)
|
||||
{
|
||||
addr = newaddr - base;
|
||||
}
|
||||
cmdsize = mCodeFoldingManager->getFoldEnd(addr + base) - (addr + base) + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(!cp.DisassembleSafe(addr + base, pdata, (int)size))
|
||||
cmdsize = 2; //heuristic for better output (FF FE or FE FF are usually part of an instruction)
|
||||
else
|
||||
cmdsize = cp.Size();
|
||||
|
||||
cmdsize = mEncodeMap->getDataSize(base + addr, cmdsize);
|
||||
|
||||
}
|
||||
|
||||
|
||||
pdata += cmdsize;
|
||||
addr += cmdsize;
|
||||
back -= cmdsize;
|
||||
size -= cmdsize;
|
||||
}
|
||||
|
||||
if(i < n)
|
||||
return abuf[0];
|
||||
else
|
||||
return abuf[(i - n + 128) % 128];
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Return the address of the nth instruction after the instruction pointed by ip. @n
|
||||
* This function has been grabbed from OllyDbg ("Disassembleforward" in asmserv.c)
|
||||
*
|
||||
* @param[in] data Address of the data to disassemble
|
||||
* @param[in] base Original base address of the memory page (Required to disassemble destination addresses)
|
||||
* @param[in] size Size of the data block pointed by data
|
||||
* @param[in] ip RVA of the current instruction (Relative to data pointer)
|
||||
* @param[in] n Number of instruction next
|
||||
*
|
||||
* @return Return the RVA (Relative to the data pointer) of the nth instruction after the instruction pointed by ip
|
||||
*/
|
||||
ulong CsQBeaEngine::DisassembleNext(byte_t* data, duint base, duint size, duint ip, int n)
|
||||
{
|
||||
int i;
|
||||
uint cmdsize;
|
||||
unsigned char* pdata;
|
||||
|
||||
// Reset Disasm Structure
|
||||
Capstone cp;
|
||||
|
||||
if(data == NULL)
|
||||
return 0;
|
||||
|
||||
if(ip >= size)
|
||||
ip = size - 1;
|
||||
|
||||
if(n <= 0)
|
||||
return ip;
|
||||
|
||||
|
||||
pdata = data + ip;
|
||||
size -= ip;
|
||||
|
||||
for(i = 0; i < n && size > 0; i++)
|
||||
{
|
||||
if(mCodeFoldingManager && mCodeFoldingManager->isFolded(ip + base))
|
||||
{
|
||||
cmdsize = mCodeFoldingManager->getFoldEnd(ip + base) - (ip + base) + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(!cp.DisassembleSafe(ip + base, pdata, (int)size))
|
||||
cmdsize = 1;
|
||||
else
|
||||
cmdsize = cp.Size();
|
||||
|
||||
cmdsize = mEncodeMap->getDataSize(base + ip, cmdsize);
|
||||
|
||||
}
|
||||
|
||||
pdata += cmdsize;
|
||||
ip += cmdsize;
|
||||
size -= cmdsize;
|
||||
}
|
||||
|
||||
return ip;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Disassemble the instruction at the given ip RVA.
|
||||
*
|
||||
* @param[in] data Pointer to memory data (Can be either a buffer or the original data memory)
|
||||
* @param[in] size Size of the memory pointed by data (Can be the memory page size if data points to the original memory page base address)
|
||||
* @param[in] origBase Original base address of the memory page (Required to disassemble destination addresses)
|
||||
* @param[in] origInstRVA Original Instruction RVA of the instruction to disassemble
|
||||
*
|
||||
* @return Return the disassembled instruction
|
||||
*/
|
||||
Instruction_t CsQBeaEngine::DisassembleAt(byte_t* data, duint size, duint origBase, duint origInstRVA, bool datainstr)
|
||||
{
|
||||
if(datainstr)
|
||||
{
|
||||
ENCODETYPE type = mEncodeMap->getDataType(origBase + origInstRVA);
|
||||
if(!mEncodeMap->isCode(type))
|
||||
return DecodeDataAt(data, size, origBase, origInstRVA, type);
|
||||
}
|
||||
//tokenize
|
||||
CsCapstoneTokenizer::InstructionToken cap;
|
||||
_tokenizer.Tokenize(origBase + origInstRVA, data, size, cap);
|
||||
int len = _tokenizer.Size();
|
||||
|
||||
const auto & cp = _tokenizer.GetCapstone();
|
||||
bool success = cp.Success();
|
||||
|
||||
|
||||
auto branchType = Instruction_t::None;
|
||||
Instruction_t wInst;
|
||||
if(success && (cp.InGroup(CS_GRP_JUMP) || cp.IsLoop() || cp.InGroup(CS_GRP_CALL) || cp.InGroup(CS_GRP_RET)))
|
||||
{
|
||||
wInst.branchDestination = DbgGetBranchDestination(origBase + origInstRVA);
|
||||
switch(cp.GetId())
|
||||
{
|
||||
case X86_INS_JMP:
|
||||
case X86_INS_LJMP:
|
||||
branchType = Instruction_t::Unconditional;
|
||||
break;
|
||||
case X86_INS_CALL:
|
||||
case X86_INS_LCALL:
|
||||
branchType = Instruction_t::Call;
|
||||
break;
|
||||
default:
|
||||
branchType = cp.InGroup(CS_GRP_RET) ? Instruction_t::None : Instruction_t::Conditional;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
wInst.branchDestination = 0;
|
||||
|
||||
wInst.instStr = QString(cp.InstructionText().c_str());
|
||||
wInst.dump = QByteArray((const char*)data, len);
|
||||
wInst.rva = origInstRVA;
|
||||
if(mCodeFoldingManager && mCodeFoldingManager->isFolded(origInstRVA))
|
||||
wInst.length = mCodeFoldingManager->getFoldEnd(origInstRVA + origBase) - (origInstRVA + origBase) + 1;
|
||||
else
|
||||
wInst.length = len;
|
||||
wInst.branchType = branchType;
|
||||
wInst.tokens = cap;
|
||||
|
||||
if(success)
|
||||
{
|
||||
cp.RegInfo(reginfo);
|
||||
cp.FlagInfo(flaginfo);
|
||||
|
||||
auto flaginfo2reginfo = [](uint8_t info)
|
||||
{
|
||||
auto result = 0;
|
||||
#define checkFlag(test, reg) result |= (info & test) == test ? reg : 0
|
||||
checkFlag(Capstone::Modify, Capstone::Write);
|
||||
checkFlag(Capstone::Prior, Capstone::None);
|
||||
checkFlag(Capstone::Reset, Capstone::Write);
|
||||
checkFlag(Capstone::Set, Capstone::Write);
|
||||
checkFlag(Capstone::Test, Capstone::Read);
|
||||
checkFlag(Capstone::Undefined, Capstone::None);
|
||||
#undef checkFlag
|
||||
return result;
|
||||
};
|
||||
|
||||
for(uint8_t i = Capstone::FLAG_INVALID; i < Capstone::FLAG_ENDING; i++)
|
||||
if(flaginfo[i])
|
||||
{
|
||||
reginfo[X86_REG_EFLAGS] = Capstone::None;
|
||||
wInst.regsReferenced.push_back({cp.FlagName(Capstone::Flag(i)), flaginfo2reginfo(flaginfo[i])});
|
||||
}
|
||||
|
||||
reginfo[ArchValue(X86_REG_EIP, X86_REG_RIP)] = Capstone::None;
|
||||
for(uint8_t i = X86_REG_INVALID; i < X86_REG_ENDING; i++)
|
||||
if(reginfo[i])
|
||||
wInst.regsReferenced.push_back({cp.RegName(x86_reg(i)), reginfo[i]});
|
||||
}
|
||||
|
||||
return wInst;
|
||||
}
|
||||
|
||||
Instruction_t CsQBeaEngine::DecodeDataAt(byte_t* data, duint size, duint origBase, duint origInstRVA, ENCODETYPE type)
|
||||
{
|
||||
//tokenize
|
||||
CsCapstoneTokenizer::InstructionToken cap;
|
||||
|
||||
auto & infoIter = dataInstMap.find(type);
|
||||
if(infoIter == dataInstMap.end())
|
||||
infoIter = dataInstMap.find(enc_byte);
|
||||
|
||||
int len = mEncodeMap->getDataSize(origBase + origInstRVA, 1);
|
||||
|
||||
QString mnemonic = _bLongDataInst ? infoIter.value().longName : infoIter.value().shortName;
|
||||
|
||||
len = std::min(len, (int)size);
|
||||
|
||||
QString datastr = GetDataTypeString(data, len, type);
|
||||
|
||||
_tokenizer.TokenizeData(mnemonic, datastr, cap);
|
||||
|
||||
Instruction_t wInst;
|
||||
wInst.instStr = mnemonic + " " + datastr;
|
||||
wInst.dump = QByteArray((const char*)data, len);
|
||||
wInst.rva = origInstRVA;
|
||||
wInst.length = len;
|
||||
wInst.branchType = Instruction_t::None;
|
||||
wInst.branchDestination = 0;
|
||||
wInst.tokens = cap;
|
||||
|
||||
return wInst;
|
||||
}
|
||||
|
||||
void CsQBeaEngine::UpdateDataInstructionMap()
|
||||
{
|
||||
dataInstMap.clear();
|
||||
dataInstMap.insert(enc_byte, {"db", "byte", "int8"});
|
||||
dataInstMap.insert(enc_word, {"dw", "word", "short"});
|
||||
dataInstMap.insert(enc_dword, {"dd", "dword", "int"});
|
||||
dataInstMap.insert(enc_fword, {"df", "fword", "fword"});
|
||||
dataInstMap.insert(enc_qword, {"dq", "qword", "long"});
|
||||
dataInstMap.insert(enc_tbyte, {"tbyte", "tbyte", "tbyte"});
|
||||
dataInstMap.insert(enc_oword, {"oword", "oword", "oword"});
|
||||
dataInstMap.insert(enc_mmword, {"mmword", "mmword", "long long"});
|
||||
dataInstMap.insert(enc_xmmword, {"xmmword", "xmmword", "_m128"});
|
||||
dataInstMap.insert(enc_ymmword, {"ymmword", "ymmword", "_m256"});
|
||||
dataInstMap.insert(enc_real4, {"real4", "real4", "float"});
|
||||
dataInstMap.insert(enc_real8, {"real8", "real8", "double"});
|
||||
dataInstMap.insert(enc_real10, {"real10", "real10", "long double"});
|
||||
dataInstMap.insert(enc_ascii, {"ascii", "ascii", "string"});
|
||||
dataInstMap.insert(enc_unicode, {"unicode", "unicode", "wstring"});
|
||||
}
|
||||
|
||||
void CsQBeaEngine::setCodeFoldingManager(CodeFoldingHelper* CodeFoldingManager)
|
||||
{
|
||||
mCodeFoldingManager = CodeFoldingManager;
|
||||
}
|
||||
|
||||
void CsQBeaEngine::UpdateConfig()
|
||||
{
|
||||
_bLongDataInst = ConfigBool("Disassembler", "LongDataInstruction");
|
||||
_tokenizer.UpdateConfig();
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
#ifndef CSQBEAENGINE_H
|
||||
#define CSQBEAENGINE_H
|
||||
|
||||
#include <QString>
|
||||
#include <vector>
|
||||
#include "cs_capstone_gui.h"
|
||||
|
||||
class EncodeMap;
|
||||
class CodeFoldingHelper;
|
||||
|
||||
class CsQBeaEngine
|
||||
{
|
||||
public:
|
||||
explicit CsQBeaEngine(int maxModuleSize);
|
||||
~CsQBeaEngine();
|
||||
ulong DisassembleBack(byte_t* data, duint base, duint size, duint ip, int n);
|
||||
ulong DisassembleNext(byte_t* data, duint base, duint size, duint ip, int n);
|
||||
Instruction_t DisassembleAt(byte_t* data, duint size, duint origBase, duint origInstRVA, bool datainstr = true);
|
||||
Instruction_t DecodeDataAt(byte_t* data, duint size, duint origBase, duint origInstRVA, ENCODETYPE type);
|
||||
void setCodeFoldingManager(CodeFoldingHelper* CodeFoldingManager);
|
||||
void UpdateConfig();
|
||||
|
||||
EncodeMap* getEncodeMap()
|
||||
{
|
||||
return mEncodeMap;
|
||||
}
|
||||
|
||||
private:
|
||||
struct DataInstructionInfo
|
||||
{
|
||||
QString shortName;
|
||||
QString longName;
|
||||
QString cName;
|
||||
};
|
||||
|
||||
void UpdateDataInstructionMap();
|
||||
CsCapstoneTokenizer _tokenizer;
|
||||
QHash<ENCODETYPE, DataInstructionInfo> dataInstMap;
|
||||
bool _bLongDataInst;
|
||||
EncodeMap* mEncodeMap;
|
||||
CodeFoldingHelper* mCodeFoldingManager;
|
||||
uint8_t reginfo[X86_REG_ENDING];
|
||||
uint8_t flaginfo[Capstone::FLAG_ENDING];
|
||||
};
|
||||
|
||||
#endif // CSQBEAENGINE_H
|
|
@ -67,8 +67,8 @@ private:
|
|||
bool _bLongDataInst;
|
||||
EncodeMap* mEncodeMap;
|
||||
CodeFoldingHelper* mCodeFoldingManager;
|
||||
uint8_t reginfo[ZYDIS_REGISTER_ENUM_COUNT];
|
||||
uint8_t flaginfo[ZYDIS_CPUFLAG_ENUM_COUNT];
|
||||
uint8_t reginfo[ZYDIS_REGISTER_MAX_VALUE + 1];
|
||||
uint8_t flaginfo[ZYDIS_CPUFLAG_MAX_VALUE + 1];
|
||||
};
|
||||
|
||||
#endif // QBEAENGINE_H
|
||||
|
|
|
@ -116,6 +116,9 @@ bool CapstoneTokenizer::Tokenize(duint addr, const unsigned char* data, int data
|
|||
_success = _cp.DisassembleSafe(addr, data, datasize);
|
||||
if(_success)
|
||||
{
|
||||
if (!tokenizePrefix())
|
||||
return false;
|
||||
|
||||
isNop = _cp.IsNop();
|
||||
if(!tokenizeMnemonic())
|
||||
return false;
|
||||
|
@ -361,31 +364,30 @@ QString CapstoneTokenizer::printValue(const TokenValue & value, bool expandModul
|
|||
bool CapstoneTokenizer::tokenizePrefix()
|
||||
{
|
||||
bool hasPrefix = true;
|
||||
QString prefixText;
|
||||
QStringList prefixText;
|
||||
|
||||
//TODO: look at multiple prefixes on one instruction
|
||||
auto attr = _cp.GetInstr()->attributes;
|
||||
|
||||
if(attr & ZYDIS_ATTRIB_HAS_LOCK)
|
||||
prefixText = "lock";
|
||||
prefixText += "lock";
|
||||
else if(attr & ZYDIS_ATTRIB_HAS_REP)
|
||||
prefixText = "rep";
|
||||
prefixText += "rep";
|
||||
else if(attr & ZYDIS_ATTRIB_HAS_REPNE)
|
||||
prefixText = "repe";
|
||||
prefixText += "repe";
|
||||
else if(attr & ZYDIS_ATTRIB_HAS_REPNE)
|
||||
prefixText = "repne";
|
||||
prefixText += "repne";
|
||||
else if(attr & ZYDIS_ATTRIB_HAS_BOUND)
|
||||
prefixText = "bnd";
|
||||
prefixText += "bnd";
|
||||
else if(attr & ZYDIS_ATTRIB_HAS_XACQUIRE)
|
||||
prefixText = "xacquire";
|
||||
prefixText += "xacquire";
|
||||
else if(attr & ZYDIS_ATTRIB_HAS_XRELEASE)
|
||||
prefixText = "xrelease";
|
||||
prefixText += "xrelease";
|
||||
else
|
||||
hasPrefix = false;
|
||||
|
||||
if(hasPrefix)
|
||||
{
|
||||
addToken(TokenType::Prefix, prefixText);
|
||||
addToken(TokenType::Prefix, prefixText.join(' '));
|
||||
addToken(TokenType::Space, " ");
|
||||
}
|
||||
|
||||
|
@ -458,7 +460,7 @@ bool CapstoneTokenizer::tokenizeRegOperand(const ZydisDecodedOperand & op)
|
|||
{
|
||||
auto registerType = TokenType::GeneralRegister;
|
||||
auto reg = op.reg;
|
||||
auto regClass = ZydisRegisterGetClass(reg);
|
||||
auto regClass = ZydisRegisterGetClass(reg.value);
|
||||
|
||||
switch(regClass)
|
||||
{
|
||||
|
@ -469,10 +471,10 @@ bool CapstoneTokenizer::tokenizeRegOperand(const ZydisDecodedOperand & op)
|
|||
case ZYDIS_REGCLASS_ZMM: registerType = TokenType::ZmmRegister; break;
|
||||
}
|
||||
|
||||
if(reg == ArchValue(ZYDIS_REGISTER_FS, ZYDIS_REGISTER_GS))
|
||||
if(reg.value == ArchValue(ZYDIS_REGISTER_FS, ZYDIS_REGISTER_GS))
|
||||
registerType = TokenType::MnemonicUnusual;
|
||||
|
||||
addToken(registerType, _cp.RegName(reg));
|
||||
addToken(registerType, _cp.RegName(reg.value));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -482,15 +484,17 @@ bool CapstoneTokenizer::tokenizeImmOperand(const ZydisDecodedOperand & op)
|
|||
TokenType valueType;
|
||||
if(_cp.IsBranchType(Zydis::BT_Jmp | Zydis::BT_Call | Zydis::BT_Loop))
|
||||
{
|
||||
value = _cp.BranchDestination();
|
||||
valueType = TokenType::Address;
|
||||
value = op.imm.value.u;
|
||||
}
|
||||
else
|
||||
{
|
||||
value = duint(op.imm.value.u) & (duint(-1) >> (op.size ? (8 * sizeof(duint) - op.size) : 0));
|
||||
auto opsize = _cp.GetInstr()->operandWidth;
|
||||
valueType = TokenType::Value;
|
||||
value = duint(op.imm.value.u) & (duint(-1) >> (sizeof(duint) * 8 - opsize));
|
||||
|
||||
}
|
||||
auto tokenValue = TokenValue(op.size, value);
|
||||
auto tokenValue = TokenValue(op.size / 8, value);
|
||||
addToken(valueType, printValue(tokenValue, true, _maxModuleLength), tokenValue);
|
||||
return true;
|
||||
}
|
||||
|
@ -498,7 +502,8 @@ bool CapstoneTokenizer::tokenizeImmOperand(const ZydisDecodedOperand & op)
|
|||
bool CapstoneTokenizer::tokenizeMemOperand(const ZydisDecodedOperand & op)
|
||||
{
|
||||
//memory size
|
||||
const char* sizeText = _cp.MemSizeName(op.size);
|
||||
auto opsize = op.size / 8;
|
||||
const char* sizeText = _cp.MemSizeName(opsize);
|
||||
if(!sizeText)
|
||||
return false;
|
||||
addToken(TokenType::MemorySize, QString(sizeText) + " ptr");
|
||||
|
@ -506,7 +511,7 @@ bool CapstoneTokenizer::tokenizeMemOperand(const ZydisDecodedOperand & op)
|
|||
|
||||
//memory segment
|
||||
const auto & mem = op.mem;
|
||||
auto segmentType = op.reg == ArchValue(ZYDIS_REGISTER_FS, ZYDIS_REGISTER_GS)
|
||||
auto segmentType = mem.segment == ArchValue(ZYDIS_REGISTER_FS, ZYDIS_REGISTER_GS)
|
||||
? TokenType::MnemonicUnusual : TokenType::MemorySegment;
|
||||
addToken(segmentType, _cp.RegName(mem.segment));
|
||||
addToken(TokenType::Uncategorized, ":");
|
||||
|
@ -529,7 +534,7 @@ bool CapstoneTokenizer::tokenizeMemOperand(const ZydisDecodedOperand & op)
|
|||
if(mem.base == ZYDIS_REGISTER_RIP) //rip-relative (#replacement)
|
||||
{
|
||||
duint addr = _cp.Address() + duint(mem.disp.value) + _cp.Size();
|
||||
TokenValue value = TokenValue(op.size, addr);
|
||||
TokenValue value = TokenValue(opsize, addr);
|
||||
auto displacementType = DbgMemIsValidReadPtr(addr) ? TokenType::Address : TokenType::Value;
|
||||
addToken(displacementType, printValue(value, false, _maxModuleLength), value);
|
||||
}
|
||||
|
@ -556,13 +561,13 @@ bool CapstoneTokenizer::tokenizeMemOperand(const ZydisDecodedOperand & op)
|
|||
if(mem.disp.value)
|
||||
{
|
||||
char operatorText = '+';
|
||||
TokenValue value(op.size, duint(mem.disp.value));
|
||||
TokenValue value(opsize, duint(mem.disp.value));
|
||||
auto displacementType = DbgMemIsValidReadPtr(duint(mem.disp.value)) ? TokenType::Address : TokenType::Value;
|
||||
QString valueText;
|
||||
if(mem.disp.value < 0)
|
||||
{
|
||||
operatorText = '-';
|
||||
valueText = printValue(TokenValue(op.size, duint(mem.disp.value * -1)), false, _maxModuleLength);
|
||||
valueText = printValue(TokenValue(opsize, duint(mem.disp.value * -1)), false, _maxModuleLength);
|
||||
}
|
||||
else
|
||||
valueText = printValue(value, false, _maxModuleLength);
|
||||
|
|
|
@ -71,6 +71,11 @@ public:
|
|||
value(0)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator == (const TokenValue & rhs) const
|
||||
{
|
||||
return /*size == rhs.size &&*/ value == rhs.value;
|
||||
}
|
||||
};
|
||||
|
||||
struct SingleToken
|
||||
|
@ -95,6 +100,11 @@ public:
|
|||
SingleToken(type, text, TokenValue())
|
||||
{
|
||||
}
|
||||
|
||||
bool operator == (const SingleToken & rhs) const
|
||||
{
|
||||
return type == rhs.type && text == rhs.text && value == rhs.value;
|
||||
}
|
||||
};
|
||||
|
||||
struct InstructionToken
|
||||
|
|
|
@ -0,0 +1,618 @@
|
|||
#include "cs_capstone_gui.h"
|
||||
#include "Configuration.h"
|
||||
#include "StringUtil.h"
|
||||
#include "CachedFontMetrics.h"
|
||||
|
||||
CsCapstoneTokenizer::CsCapstoneTokenizer(int maxModuleLength)
|
||||
: _maxModuleLength(maxModuleLength),
|
||||
_success(false),
|
||||
isNop(false),
|
||||
_mnemonicType(TokenType::Uncategorized)
|
||||
{
|
||||
SetConfig(false, false, false, false, false, false, false);
|
||||
}
|
||||
|
||||
CsCapstoneTokenizer::TokenColor colorNamesMap[CsCapstoneTokenizer::TokenType::Last];
|
||||
QHash<QString, int> CsCapstoneTokenizer::stringPoolMap;
|
||||
int CsCapstoneTokenizer::poolId = 0;
|
||||
|
||||
void CsCapstoneTokenizer::addColorName(TokenType type, QString color, QString backgroundColor)
|
||||
{
|
||||
colorNamesMap[int(type)] = TokenColor(color, backgroundColor);
|
||||
}
|
||||
|
||||
void CsCapstoneTokenizer::addStringsToPool(const QString & strings)
|
||||
{
|
||||
QStringList stringList = strings.split(' ', QString::SkipEmptyParts);
|
||||
for(const QString & string : stringList)
|
||||
stringPoolMap.insert(string, poolId);
|
||||
poolId++;
|
||||
}
|
||||
|
||||
void CsCapstoneTokenizer::UpdateColors()
|
||||
{
|
||||
//filling
|
||||
addColorName(TokenType::Comma, "InstructionCommaColor", "InstructionCommaBackgroundColor");
|
||||
addColorName(TokenType::Space, "", "");
|
||||
addColorName(TokenType::ArgumentSpace, "", "");
|
||||
addColorName(TokenType::MemoryOperatorSpace, "", "");
|
||||
//general instruction parts
|
||||
addColorName(TokenType::Prefix, "InstructionPrefixColor", "InstructionPrefixBackgroundColor");
|
||||
addColorName(TokenType::Uncategorized, "InstructionUncategorizedColor", "InstructionUncategorizedBackgroundColor");
|
||||
addColorName(TokenType::Address, "InstructionAddressColor", "InstructionAddressBackgroundColor"); //jump/call destinations
|
||||
addColorName(TokenType::Value, "InstructionValueColor", "InstructionValueBackgroundColor");
|
||||
//mnemonics
|
||||
addColorName(TokenType::MnemonicNormal, "InstructionMnemonicColor", "InstructionMnemonicBackgroundColor");
|
||||
addColorName(TokenType::MnemonicPushPop, "InstructionPushPopColor", "InstructionPushPopBackgroundColor");
|
||||
addColorName(TokenType::MnemonicCall, "InstructionCallColor", "InstructionCallBackgroundColor");
|
||||
addColorName(TokenType::MnemonicRet, "InstructionRetColor", "InstructionRetBackgroundColor");
|
||||
addColorName(TokenType::MnemonicCondJump, "InstructionConditionalJumpColor", "InstructionConditionalJumpBackgroundColor");
|
||||
addColorName(TokenType::MnemonicUncondJump, "InstructionUnconditionalJumpColor", "InstructionUnconditionalJumpBackgroundColor");
|
||||
addColorName(TokenType::MnemonicNop, "InstructionNopColor", "InstructionNopBackgroundColor");
|
||||
addColorName(TokenType::MnemonicFar, "InstructionFarColor", "InstructionFarBackgroundColor");
|
||||
addColorName(TokenType::MnemonicInt3, "InstructionInt3Color", "InstructionInt3BackgroundColor");
|
||||
addColorName(TokenType::MnemonicUnusual, "InstructionUnusualColor", "InstructionUnusualBackgroundColor");
|
||||
//memory
|
||||
addColorName(TokenType::MemorySize, "InstructionMemorySizeColor", "InstructionMemorySizeBackgroundColor");
|
||||
addColorName(TokenType::MemorySegment, "InstructionMemorySegmentColor", "InstructionMemorySegmentBackgroundColor");
|
||||
addColorName(TokenType::MemoryBrackets, "InstructionMemoryBracketsColor", "InstructionMemoryBracketsBackgroundColor");
|
||||
addColorName(TokenType::MemoryStackBrackets, "InstructionMemoryStackBracketsColor", "InstructionMemoryStackBracketsBackgroundColor");
|
||||
addColorName(TokenType::MemoryBaseRegister, "InstructionMemoryBaseRegisterColor", "InstructionMemoryBaseRegisterBackgroundColor");
|
||||
addColorName(TokenType::MemoryIndexRegister, "InstructionMemoryIndexRegisterColor", "InstructionMemoryIndexRegisterBackgroundColor");
|
||||
addColorName(TokenType::MemoryScale, "InstructionMemoryScaleColor", "InstructionMemoryScaleBackgroundColor");
|
||||
addColorName(TokenType::MemoryOperator, "InstructionMemoryOperatorColor", "InstructionMemoryOperatorBackgroundColor");
|
||||
//registers
|
||||
addColorName(TokenType::GeneralRegister, "InstructionGeneralRegisterColor", "InstructionGeneralRegisterBackgroundColor");
|
||||
addColorName(TokenType::FpuRegister, "InstructionFpuRegisterColor", "InstructionFpuRegisterBackgroundColor");
|
||||
addColorName(TokenType::MmxRegister, "InstructionMmxRegisterColor", "InstructionMmxRegisterBackgroundColor");
|
||||
addColorName(TokenType::XmmRegister, "InstructionXmmRegisterColor", "InstructionXmmRegisterBackgroundColor");
|
||||
addColorName(TokenType::YmmRegister, "InstructionYmmRegisterColor", "InstructionYmmRegisterBackgroundColor");
|
||||
addColorName(TokenType::ZmmRegister, "InstructionZmmRegisterColor", "InstructionZmmRegisterBackgroundColor");
|
||||
}
|
||||
|
||||
void CsCapstoneTokenizer::UpdateStringPool()
|
||||
{
|
||||
poolId = 0;
|
||||
stringPoolMap.clear();
|
||||
// These registers must be in lower case.
|
||||
addStringsToPool("rax eax ax al ah");
|
||||
addStringsToPool("rbx ebx bx bl bh");
|
||||
addStringsToPool("rcx ecx cx cl ch");
|
||||
addStringsToPool("rdx edx dx dl dh");
|
||||
addStringsToPool("rsi esi si sil");
|
||||
addStringsToPool("rdi edi di dil");
|
||||
addStringsToPool("rbp ebp bp bpl");
|
||||
addStringsToPool("rsp esp sp spl");
|
||||
addStringsToPool("r8 r8d r8w r8b");
|
||||
addStringsToPool("r9 r9d r9w r9b");
|
||||
addStringsToPool("r10 r10d r10w r10b");
|
||||
addStringsToPool("r11 r11d r11w r11b");
|
||||
addStringsToPool("r12 r12d r12w r12b");
|
||||
addStringsToPool("r13 r13d r13w r13b");
|
||||
addStringsToPool("r14 r14d r14w r14b");
|
||||
addStringsToPool("r15 r15d r15w r15b");
|
||||
addStringsToPool("xmm0 ymm0");
|
||||
addStringsToPool("xmm1 ymm1");
|
||||
addStringsToPool("xmm2 ymm2");
|
||||
addStringsToPool("xmm3 ymm3");
|
||||
addStringsToPool("xmm4 ymm4");
|
||||
addStringsToPool("xmm5 ymm5");
|
||||
addStringsToPool("xmm6 ymm6");
|
||||
addStringsToPool("xmm7 ymm7");
|
||||
addStringsToPool("xmm8 ymm8");
|
||||
addStringsToPool("xmm9 ymm9");
|
||||
addStringsToPool("xmm10 ymm10");
|
||||
addStringsToPool("xmm11 ymm11");
|
||||
addStringsToPool("xmm12 ymm12");
|
||||
addStringsToPool("xmm13 ymm13");
|
||||
addStringsToPool("xmm14 ymm14");
|
||||
addStringsToPool("xmm15 ymm15");
|
||||
}
|
||||
|
||||
bool CsCapstoneTokenizer::Tokenize(duint addr, const unsigned char* data, int datasize, InstructionToken & instruction)
|
||||
{
|
||||
_inst = InstructionToken();
|
||||
|
||||
_success = _cp.DisassembleSafe(addr, data, datasize);
|
||||
if(_success)
|
||||
{
|
||||
isNop = _cp.IsNop();
|
||||
if(!tokenizeMnemonic())
|
||||
return false;
|
||||
|
||||
for(int i = 0; i < _cp.OpCount(); i++)
|
||||
{
|
||||
if(i)
|
||||
{
|
||||
addToken(TokenType::Comma, ",");
|
||||
if(_bArgumentSpaces)
|
||||
addToken(TokenType::ArgumentSpace, " ");
|
||||
}
|
||||
if(!tokenizeOperand(_cp[i]))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
isNop = false;
|
||||
addToken(TokenType::MnemonicUnusual, "???");
|
||||
}
|
||||
|
||||
if(_bNoHighlightOperands)
|
||||
{
|
||||
while(_inst.tokens.size() && _inst.tokens[_inst.tokens.size() - 1].type == TokenType::Space)
|
||||
_inst.tokens.pop_back();
|
||||
for(SingleToken & token : _inst.tokens)
|
||||
token.type = _mnemonicType;
|
||||
}
|
||||
|
||||
instruction = _inst;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CsCapstoneTokenizer::TokenizeData(const QString & datatype, const QString & data, InstructionToken & instruction)
|
||||
{
|
||||
_inst = InstructionToken();
|
||||
isNop = false;
|
||||
|
||||
if(!tokenizeMnemonic(TokenType::MnemonicNormal, datatype))
|
||||
return false;
|
||||
|
||||
addToken(TokenType::Value, data);
|
||||
|
||||
instruction = _inst;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void CsCapstoneTokenizer::UpdateConfig()
|
||||
{
|
||||
SetConfig(ConfigBool("Disassembler", "Uppercase"),
|
||||
ConfigBool("Disassembler", "TabbedMnemonic"),
|
||||
ConfigBool("Disassembler", "ArgumentSpaces"),
|
||||
ConfigBool("Disassembler", "MemorySpaces"),
|
||||
ConfigBool("Disassembler", "NoHighlightOperands"),
|
||||
ConfigBool("Disassembler", "NoCurrentModuleText"),
|
||||
ConfigBool("Disassembler", "0xPrefixValues"));
|
||||
_maxModuleLength = (int)ConfigUint("Disassembler", "MaxModuleSize");
|
||||
UpdateStringPool();
|
||||
}
|
||||
|
||||
void CsCapstoneTokenizer::SetConfig(bool bUppercase, bool bTabbedMnemonic, bool bArgumentSpaces, bool bMemorySpaces, bool bNoHighlightOperands, bool bNoCurrentModuleText, bool b0xPrefixValues)
|
||||
{
|
||||
_bUppercase = bUppercase;
|
||||
_bTabbedMnemonic = bTabbedMnemonic;
|
||||
_bArgumentSpaces = bArgumentSpaces;
|
||||
_bMemorySpaces = bMemorySpaces;
|
||||
_bNoHighlightOperands = bNoHighlightOperands;
|
||||
_bNoCurrentModuleText = bNoCurrentModuleText;
|
||||
_b0xPrefixValues = b0xPrefixValues;
|
||||
}
|
||||
|
||||
int CsCapstoneTokenizer::Size() const
|
||||
{
|
||||
return _success ? _cp.Size() : 1;
|
||||
}
|
||||
|
||||
const Capstone & CsCapstoneTokenizer::GetCapstone() const
|
||||
{
|
||||
return _cp;
|
||||
}
|
||||
|
||||
void CsCapstoneTokenizer::TokenToRichText(const InstructionToken & instr, RichTextPainter::List & richTextList, const SingleToken* highlightToken)
|
||||
{
|
||||
QColor highlightColor = ConfigColor("InstructionHighlightColor");
|
||||
for(const auto & token : instr.tokens)
|
||||
{
|
||||
RichTextPainter::CustomRichText_t richText;
|
||||
richText.highlight = TokenEquals(&token, highlightToken);
|
||||
richText.highlightColor = highlightColor;
|
||||
richText.flags = RichTextPainter::FlagNone;
|
||||
richText.text = token.text;
|
||||
if(token.type < TokenType::Last)
|
||||
{
|
||||
const auto & tokenColor = colorNamesMap[int(token.type)];
|
||||
richText.flags = tokenColor.flags;
|
||||
richText.textColor = tokenColor.color;
|
||||
richText.textBackground = tokenColor.backgroundColor;
|
||||
}
|
||||
richTextList.push_back(richText);
|
||||
}
|
||||
}
|
||||
|
||||
bool CsCapstoneTokenizer::TokenFromX(const InstructionToken & instr, SingleToken & token, int x, CachedFontMetrics* fontMetrics)
|
||||
{
|
||||
if(x < instr.x) //before the first token
|
||||
return false;
|
||||
int len = int(instr.tokens.size());
|
||||
for(int i = 0, xStart = instr.x; i < len; i++)
|
||||
{
|
||||
const auto & curToken = instr.tokens.at(i);
|
||||
int curWidth = fontMetrics->width(curToken.text);
|
||||
int xEnd = xStart + curWidth;
|
||||
if(x >= xStart && x < xEnd)
|
||||
{
|
||||
token = curToken;
|
||||
return true;
|
||||
}
|
||||
xStart = xEnd;
|
||||
}
|
||||
return false; //not found
|
||||
}
|
||||
|
||||
bool CsCapstoneTokenizer::IsHighlightableToken(const SingleToken & token)
|
||||
{
|
||||
switch(token.type)
|
||||
{
|
||||
case TokenType::Comma:
|
||||
case TokenType::Space:
|
||||
case TokenType::ArgumentSpace:
|
||||
case TokenType::Uncategorized:
|
||||
case TokenType::MemoryOperatorSpace:
|
||||
case TokenType::MemoryBrackets:
|
||||
case TokenType::MemoryStackBrackets:
|
||||
case TokenType::MemoryOperator:
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CsCapstoneTokenizer::tokenTextPoolEquals(const QString & a, const QString & b)
|
||||
{
|
||||
if(a.compare(b, Qt::CaseInsensitive) == 0)
|
||||
return true;
|
||||
auto found1 = stringPoolMap.find(a.toLower());
|
||||
auto found2 = stringPoolMap.find(b.toLower());
|
||||
if(found1 == stringPoolMap.end() || found2 == stringPoolMap.end())
|
||||
return false;
|
||||
return found1.value() == found2.value();
|
||||
}
|
||||
|
||||
bool CsCapstoneTokenizer::TokenEquals(const SingleToken* a, const SingleToken* b, bool ignoreSize)
|
||||
{
|
||||
if(!a || !b)
|
||||
return false;
|
||||
if(a->value.size != 0 && b->value.size != 0) //we have a value
|
||||
{
|
||||
if(!ignoreSize && a->value.size != b->value.size)
|
||||
return false;
|
||||
else if(a->value.value != b->value.value)
|
||||
return false;
|
||||
}
|
||||
return tokenTextPoolEquals(a->text, b->text);
|
||||
}
|
||||
|
||||
void CsCapstoneTokenizer::addToken(TokenType type, QString text, const TokenValue & value)
|
||||
{
|
||||
switch(type)
|
||||
{
|
||||
case TokenType::Space:
|
||||
case TokenType::ArgumentSpace:
|
||||
case TokenType::MemoryOperatorSpace:
|
||||
break;
|
||||
default:
|
||||
text = text.trimmed();
|
||||
break;
|
||||
}
|
||||
if(_bUppercase && !value.size)
|
||||
text = text.toUpper();
|
||||
_inst.tokens.push_back(SingleToken(isNop ? TokenType::MnemonicNop : type, text, value));
|
||||
}
|
||||
|
||||
void CsCapstoneTokenizer::addToken(TokenType type, const QString & text)
|
||||
{
|
||||
addToken(type, text, TokenValue());
|
||||
}
|
||||
|
||||
void CsCapstoneTokenizer::addMemoryOperator(char operatorText)
|
||||
{
|
||||
if(_bMemorySpaces)
|
||||
addToken(TokenType::MemoryOperatorSpace, " ");
|
||||
QString text;
|
||||
text += operatorText;
|
||||
addToken(TokenType::MemoryOperator, text);
|
||||
if(_bMemorySpaces)
|
||||
addToken(TokenType::MemoryOperatorSpace, " ");
|
||||
}
|
||||
|
||||
QString CsCapstoneTokenizer::printValue(const TokenValue & value, bool expandModule, int maxModuleLength) const
|
||||
{
|
||||
QString labelText;
|
||||
char label_[MAX_LABEL_SIZE] = "";
|
||||
char module_[MAX_MODULE_SIZE] = "";
|
||||
QString moduleText;
|
||||
duint addr = value.value;
|
||||
bool bHasLabel = DbgGetLabelAt(addr, SEG_DEFAULT, label_);
|
||||
labelText = QString(label_);
|
||||
bool bHasModule;
|
||||
if(_bNoCurrentModuleText)
|
||||
{
|
||||
duint size, base;
|
||||
base = DbgMemFindBaseAddr(this->GetCapstone().Address(), &size);
|
||||
if(addr >= base && addr < base + size)
|
||||
bHasModule = false;
|
||||
else
|
||||
bHasModule = (expandModule && DbgGetModuleAt(addr, module_) && !QString(labelText).startsWith("JMP.&"));
|
||||
}
|
||||
else
|
||||
bHasModule = (expandModule && DbgGetModuleAt(addr, module_) && !QString(labelText).startsWith("JMP.&"));
|
||||
moduleText = QString(module_);
|
||||
if(maxModuleLength != -1)
|
||||
moduleText.truncate(maxModuleLength);
|
||||
if(moduleText.length())
|
||||
moduleText += ".";
|
||||
QString addrText = ToHexString(addr);
|
||||
QString finalText;
|
||||
if(bHasLabel && bHasModule) //<module.label>
|
||||
finalText = QString("<%1%2>").arg(moduleText).arg(labelText);
|
||||
else if(bHasModule) //module.addr
|
||||
finalText = QString("%1%2").arg(moduleText).arg(addrText);
|
||||
else if(bHasLabel) //<label>
|
||||
finalText = QString("<%1>").arg(labelText);
|
||||
else if(_b0xPrefixValues)
|
||||
finalText = QString("0x") + addrText;
|
||||
else
|
||||
finalText = addrText;
|
||||
return finalText;
|
||||
}
|
||||
|
||||
bool CsCapstoneTokenizer::tokenizePrefix()
|
||||
{
|
||||
bool hasPrefix = true;
|
||||
QString prefixText;
|
||||
//TODO: look at multiple prefixes on one instruction (https://github.com/aquynh/capstone/blob/921904888d7c1547c558db3a24fa64bcf97dede4/arch/X86/X86DisassemblerDecoder.c#L540)
|
||||
switch(_cp.x86().prefix[0])
|
||||
{
|
||||
case X86_PREFIX_LOCK:
|
||||
prefixText = "lock";
|
||||
break;
|
||||
case X86_PREFIX_REP:
|
||||
prefixText = "rep";
|
||||
break;
|
||||
case X86_PREFIX_REPNE:
|
||||
prefixText = "repne";
|
||||
break;
|
||||
default:
|
||||
hasPrefix = false;
|
||||
}
|
||||
|
||||
if(hasPrefix)
|
||||
{
|
||||
addToken(TokenType::Prefix, prefixText);
|
||||
addToken(TokenType::Space, " ");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CsCapstoneTokenizer::tokenizeMnemonic()
|
||||
{
|
||||
QString mnemonic = QString(_cp.Mnemonic().c_str());
|
||||
_mnemonicType = TokenType::MnemonicNormal;
|
||||
auto id = _cp.GetId();
|
||||
if(isNop)
|
||||
_mnemonicType = TokenType::MnemonicNop;
|
||||
else if(_cp.InGroup(CS_GRP_CALL))
|
||||
_mnemonicType = TokenType::MnemonicCall;
|
||||
else if(_cp.InGroup(CS_GRP_JUMP) || _cp.IsLoop())
|
||||
{
|
||||
switch(id)
|
||||
{
|
||||
case X86_INS_JMP:
|
||||
case X86_INS_LJMP:
|
||||
_mnemonicType = TokenType::MnemonicUncondJump;
|
||||
break;
|
||||
default:
|
||||
_mnemonicType = TokenType::MnemonicCondJump;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if(_cp.IsInt3())
|
||||
_mnemonicType = TokenType::MnemonicInt3;
|
||||
else if(_cp.IsUnusual())
|
||||
_mnemonicType = TokenType::MnemonicUnusual;
|
||||
else if(_cp.InGroup(CS_GRP_RET))
|
||||
_mnemonicType = TokenType::MnemonicRet;
|
||||
else
|
||||
{
|
||||
switch(id)
|
||||
{
|
||||
case X86_INS_PUSH:
|
||||
case X86_INS_PUSHF:
|
||||
case X86_INS_PUSHFD:
|
||||
case X86_INS_PUSHFQ:
|
||||
case X86_INS_PUSHAL:
|
||||
case X86_INS_PUSHAW:
|
||||
case X86_INS_POP:
|
||||
case X86_INS_POPF:
|
||||
case X86_INS_POPFD:
|
||||
case X86_INS_POPFQ:
|
||||
case X86_INS_POPAL:
|
||||
case X86_INS_POPAW:
|
||||
_mnemonicType = TokenType::MnemonicPushPop;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
tokenizeMnemonic(_mnemonicType, mnemonic);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CsCapstoneTokenizer::tokenizeMnemonic(TokenType type, const QString & mnemonic)
|
||||
{
|
||||
addToken(type, mnemonic);
|
||||
if(_bTabbedMnemonic)
|
||||
{
|
||||
int spaceCount = 7 - mnemonic.length();
|
||||
if(spaceCount > 0)
|
||||
{
|
||||
for(int i = 0; i < spaceCount; i++)
|
||||
addToken(TokenType::Space, " ");
|
||||
}
|
||||
}
|
||||
addToken(TokenType::Space, " ");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CsCapstoneTokenizer::tokenizeOperand(const cs_x86_op & op)
|
||||
{
|
||||
switch(op.type)
|
||||
{
|
||||
case X86_OP_REG:
|
||||
return tokenizeRegOperand(op);
|
||||
case X86_OP_IMM:
|
||||
return tokenizeImmOperand(op);
|
||||
case X86_OP_MEM:
|
||||
return tokenizeMemOperand(op);
|
||||
case X86_OP_INVALID:
|
||||
return tokenizeInvalidOperand(op);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool CsCapstoneTokenizer::tokenizeRegOperand(const cs_x86_op & op)
|
||||
{
|
||||
auto registerType = TokenType::GeneralRegister;
|
||||
auto reg = op.reg;
|
||||
if(reg >= X86_REG_FP0 && reg <= X86_REG_FP7)
|
||||
registerType = TokenType::FpuRegister;
|
||||
else if(reg >= X86_REG_ST0 && reg <= X86_REG_ST7)
|
||||
registerType = TokenType::FpuRegister;
|
||||
else if(reg >= X86_REG_MM0 && reg <= X86_REG_MM7)
|
||||
registerType = TokenType::MmxRegister;
|
||||
else if(reg >= X86_REG_XMM0 && reg <= X86_REG_XMM31)
|
||||
registerType = TokenType::XmmRegister;
|
||||
else if(reg >= X86_REG_YMM0 && reg <= X86_REG_YMM31)
|
||||
registerType = TokenType::YmmRegister;
|
||||
else if(reg >= X86_REG_ZMM0 && reg <= X86_REG_ZMM31)
|
||||
registerType = TokenType::ZmmRegister;
|
||||
else if(reg == ArchValue(X86_REG_FS, X86_REG_GS))
|
||||
registerType = TokenType::MnemonicUnusual;
|
||||
addToken(registerType, _cp.RegName(reg));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CsCapstoneTokenizer::tokenizeImmOperand(const cs_x86_op & op)
|
||||
{
|
||||
auto value = duint(op.imm) & (duint(-1) >> (op.size ? 8 * (sizeof(duint) - op.size) : 0));
|
||||
auto valueType = TokenType::Value;
|
||||
if(_cp.InGroup(CS_GRP_JUMP) || _cp.InGroup(CS_GRP_CALL) || _cp.IsLoop())
|
||||
valueType = TokenType::Address;
|
||||
auto tokenValue = TokenValue(op.size, value);
|
||||
addToken(valueType, printValue(tokenValue, true, _maxModuleLength), tokenValue);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CsCapstoneTokenizer::tokenizeMemOperand(const cs_x86_op & op)
|
||||
{
|
||||
//memory size
|
||||
const char* sizeText = _cp.MemSizeName(op.size);
|
||||
if(!sizeText)
|
||||
return false;
|
||||
addToken(TokenType::MemorySize, QString(sizeText) + " ptr");
|
||||
addToken(TokenType::Space, " ");
|
||||
|
||||
//memory segment
|
||||
const auto & mem = op.mem;
|
||||
const char* segmentText = _cp.RegName(mem.segment);
|
||||
if(mem.segment == X86_REG_INVALID) //segment not set
|
||||
{
|
||||
switch(mem.base)
|
||||
{
|
||||
#ifdef _WIN64
|
||||
case X86_REG_RSP:
|
||||
case X86_REG_RBP:
|
||||
#else //x86
|
||||
case X86_REG_ESP:
|
||||
case X86_REG_EBP:
|
||||
#endif //_WIN64
|
||||
segmentText = "ss";
|
||||
break;
|
||||
default:
|
||||
segmentText = "ds";
|
||||
break;
|
||||
}
|
||||
}
|
||||
auto segmentType = op.reg == ArchValue(X86_REG_FS, X86_REG_GS) ? TokenType::MnemonicUnusual : TokenType::MemorySegment;
|
||||
addToken(segmentType, segmentText);
|
||||
addToken(TokenType::Uncategorized, ":");
|
||||
|
||||
//memory opening bracket
|
||||
auto bracketsType = TokenType::MemoryBrackets;
|
||||
switch(mem.base)
|
||||
{
|
||||
case X86_REG_ESP:
|
||||
case X86_REG_RSP:
|
||||
case X86_REG_EBP:
|
||||
case X86_REG_RBP:
|
||||
bracketsType = TokenType::MemoryStackBrackets;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
addToken(bracketsType, "[");
|
||||
|
||||
//stuff inside the brackets
|
||||
if(mem.base == X86_REG_RIP) //rip-relative (#replacement)
|
||||
{
|
||||
duint addr = _cp.Address() + duint(mem.disp) + _cp.Size();
|
||||
TokenValue value = TokenValue(op.size, addr);
|
||||
auto displacementType = DbgMemIsValidReadPtr(addr) ? TokenType::Address : TokenType::Value;
|
||||
addToken(displacementType, printValue(value, false, _maxModuleLength), value);
|
||||
}
|
||||
else //#base + #index * #scale + #displacement
|
||||
{
|
||||
bool prependPlus = false;
|
||||
if(mem.base != X86_REG_INVALID) //base register
|
||||
{
|
||||
addToken(TokenType::MemoryBaseRegister, _cp.RegName(mem.base));
|
||||
prependPlus = true;
|
||||
}
|
||||
if(mem.index != X86_REG_INVALID) //index register
|
||||
{
|
||||
if(prependPlus)
|
||||
addMemoryOperator('+');
|
||||
addToken(TokenType::MemoryIndexRegister, _cp.RegName(mem.index));
|
||||
if(mem.scale > 1)
|
||||
{
|
||||
addMemoryOperator('*');
|
||||
addToken(TokenType::MemoryScale, QString().sprintf("%d", mem.scale));
|
||||
}
|
||||
prependPlus = true;
|
||||
}
|
||||
if(mem.disp)
|
||||
{
|
||||
char operatorText = '+';
|
||||
TokenValue value(op.size, duint(mem.disp));
|
||||
auto displacementType = DbgMemIsValidReadPtr(duint(mem.disp)) ? TokenType::Address : TokenType::Value;
|
||||
QString valueText;
|
||||
if(mem.disp < 0)
|
||||
{
|
||||
operatorText = '-';
|
||||
valueText = printValue(TokenValue(op.size, duint(mem.disp * -1)), false, _maxModuleLength);
|
||||
}
|
||||
else
|
||||
valueText = printValue(value, false, _maxModuleLength);
|
||||
if(prependPlus)
|
||||
addMemoryOperator(operatorText);
|
||||
addToken(displacementType, valueText, value);
|
||||
}
|
||||
else if(!prependPlus)
|
||||
addToken(TokenType::Value, "0");
|
||||
}
|
||||
|
||||
//closing bracket
|
||||
addToken(bracketsType, "]");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CsCapstoneTokenizer::tokenizeInvalidOperand(const cs_x86_op & op)
|
||||
{
|
||||
addToken(TokenType::MnemonicUnusual, "???");
|
||||
return true;
|
||||
}
|
|
@ -0,0 +1,197 @@
|
|||
#ifndef CS_CAPSTONE_GUI_H
|
||||
#define CS_CAPSTONE_GUI_H
|
||||
|
||||
#include <capstone_wrapper.h>
|
||||
#include "RichTextPainter.h"
|
||||
#include "Configuration.h"
|
||||
#include <map>
|
||||
#include <QHash>
|
||||
#include <QtCore>
|
||||
|
||||
class CsCapstoneTokenizer
|
||||
{
|
||||
public:
|
||||
enum class TokenType
|
||||
{
|
||||
//filling
|
||||
Comma = 0,
|
||||
Space,
|
||||
ArgumentSpace,
|
||||
MemoryOperatorSpace,
|
||||
//general instruction parts
|
||||
Prefix,
|
||||
Uncategorized,
|
||||
//mnemonics
|
||||
MnemonicNormal,
|
||||
MnemonicPushPop,
|
||||
MnemonicCall,
|
||||
MnemonicRet,
|
||||
MnemonicCondJump,
|
||||
MnemonicUncondJump,
|
||||
MnemonicNop,
|
||||
MnemonicFar,
|
||||
MnemonicInt3,
|
||||
MnemonicUnusual,
|
||||
//values
|
||||
Address, //jump/call destinations or displacements inside memory
|
||||
Value,
|
||||
//memory
|
||||
MemorySize,
|
||||
MemorySegment,
|
||||
MemoryBrackets,
|
||||
MemoryStackBrackets,
|
||||
MemoryBaseRegister,
|
||||
MemoryIndexRegister,
|
||||
MemoryScale,
|
||||
MemoryOperator, //'+', '-' and '*'
|
||||
//registers
|
||||
GeneralRegister,
|
||||
FpuRegister,
|
||||
MmxRegister,
|
||||
XmmRegister,
|
||||
YmmRegister,
|
||||
ZmmRegister,
|
||||
//last
|
||||
Last
|
||||
};
|
||||
|
||||
struct TokenValue
|
||||
{
|
||||
int size; //value size (in bytes), zero means no value
|
||||
duint value; //actual value
|
||||
|
||||
TokenValue(int size, duint value) :
|
||||
size(size),
|
||||
value(value)
|
||||
{
|
||||
}
|
||||
|
||||
TokenValue() :
|
||||
size(0),
|
||||
value(0)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
struct SingleToken
|
||||
{
|
||||
TokenType type; //token type
|
||||
QString text; //token text
|
||||
TokenValue value; //token value (if applicable)
|
||||
|
||||
SingleToken() :
|
||||
type(TokenType::Uncategorized)
|
||||
{
|
||||
}
|
||||
|
||||
SingleToken(TokenType type, const QString & text, const TokenValue & value) :
|
||||
type(type),
|
||||
text(text),
|
||||
value(value)
|
||||
{
|
||||
}
|
||||
|
||||
SingleToken(TokenType type, const QString & text) :
|
||||
SingleToken(type, text, TokenValue())
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
struct InstructionToken
|
||||
{
|
||||
std::vector<SingleToken> tokens; //list of tokens that form the instruction
|
||||
int x; //x of the first character
|
||||
|
||||
InstructionToken()
|
||||
{
|
||||
tokens.clear();
|
||||
x = 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct TokenColor
|
||||
{
|
||||
RichTextPainter::CustomRichTextFlags flags;
|
||||
QColor color;
|
||||
QColor backgroundColor;
|
||||
|
||||
TokenColor(QString color, QString backgroundColor)
|
||||
{
|
||||
if(color.length() && backgroundColor.length())
|
||||
{
|
||||
this->flags = RichTextPainter::FlagAll;
|
||||
this->color = ConfigColor(color);
|
||||
this->backgroundColor = ConfigColor(backgroundColor);
|
||||
}
|
||||
else if(color.length())
|
||||
{
|
||||
this->flags = RichTextPainter::FlagColor;
|
||||
this->color = ConfigColor(color);
|
||||
}
|
||||
else if(backgroundColor.length())
|
||||
{
|
||||
this->flags = RichTextPainter::FlagBackground;
|
||||
this->backgroundColor = ConfigColor(backgroundColor);
|
||||
}
|
||||
else
|
||||
this->flags = RichTextPainter::FlagNone;
|
||||
}
|
||||
|
||||
TokenColor()
|
||||
: TokenColor("", "")
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
CsCapstoneTokenizer(int maxModuleLength);
|
||||
bool Tokenize(duint addr, const unsigned char* data, int datasize, InstructionToken & instruction);
|
||||
bool TokenizeData(const QString & datatype, const QString & data, InstructionToken & instruction);
|
||||
void UpdateConfig();
|
||||
void SetConfig(bool bUppercase, bool bTabbedMnemonic, bool bArgumentSpaces, bool bMemorySpaces, bool bNoHighlightOperands, bool bNoCurrentModuleText, bool b0xPrefixValues);
|
||||
int Size() const;
|
||||
const Capstone & GetCapstone() const;
|
||||
|
||||
static void UpdateColors();
|
||||
static void UpdateStringPool();
|
||||
static void TokenToRichText(const InstructionToken & instr, RichTextPainter::List & richTextList, const SingleToken* highlightToken);
|
||||
static bool TokenFromX(const InstructionToken & instr, SingleToken & token, int x, CachedFontMetrics* fontMetrics);
|
||||
static bool IsHighlightableToken(const SingleToken & token);
|
||||
static bool TokenEquals(const SingleToken* a, const SingleToken* b, bool ignoreSize = true);
|
||||
static void addColorName(TokenType type, QString color, QString backgroundColor);
|
||||
static void addStringsToPool(const QString & regs);
|
||||
static bool tokenTextPoolEquals(const QString & a, const QString & b);
|
||||
|
||||
private:
|
||||
Capstone _cp;
|
||||
bool isNop;
|
||||
InstructionToken _inst;
|
||||
bool _success;
|
||||
int _maxModuleLength;
|
||||
bool _bUppercase;
|
||||
bool _bTabbedMnemonic;
|
||||
bool _bArgumentSpaces;
|
||||
bool _bMemorySpaces;
|
||||
bool _bNoHighlightOperands;
|
||||
bool _bNoCurrentModuleText;
|
||||
bool _b0xPrefixValues;
|
||||
TokenType _mnemonicType;
|
||||
|
||||
void addToken(TokenType type, QString text, const TokenValue & value);
|
||||
void addToken(TokenType type, const QString & text);
|
||||
void addMemoryOperator(char operatorText);
|
||||
QString printValue(const TokenValue & value, bool expandModule, int maxModuleLength) const;
|
||||
|
||||
static QHash<QString, int> stringPoolMap;
|
||||
static int poolId;
|
||||
|
||||
bool tokenizePrefix();
|
||||
bool tokenizeMnemonic();
|
||||
bool tokenizeMnemonic(TokenType type, const QString & mnemonic);
|
||||
bool tokenizeOperand(const cs_x86_op & op);
|
||||
bool tokenizeRegOperand(const cs_x86_op & op);
|
||||
bool tokenizeImmOperand(const cs_x86_op & op);
|
||||
bool tokenizeMemOperand(const cs_x86_op & op);
|
||||
bool tokenizeInvalidOperand(const cs_x86_op & op);
|
||||
};
|
||||
|
||||
#endif //CS_CAPSTONE_GUI_H
|
|
@ -1,5 +1,6 @@
|
|||
#include "main.h"
|
||||
#include "zydis_wrapper.h"
|
||||
#include "capstone_wrapper.h"
|
||||
#include "MainWindow.h"
|
||||
#include "Configuration.h"
|
||||
#include <QTextCodec>
|
||||
|
@ -108,6 +109,7 @@ int main(int argc, char* argv[])
|
|||
|
||||
// initialize capstone
|
||||
Zydis::GlobalInitialize();
|
||||
Capstone::GlobalInitialize();
|
||||
|
||||
// load config file + set config font
|
||||
mConfiguration = new Configuration;
|
||||
|
|
|
@ -83,7 +83,9 @@ SOURCES += \
|
|||
Src/BasicView/HexDump.cpp \
|
||||
Src/BasicView/AbstractTableView.cpp \
|
||||
Src/Disassembler/QBeaEngine.cpp \
|
||||
Src/Disassembler/CsQBeaEngine.cpp \
|
||||
Src/Disassembler/capstone_gui.cpp \
|
||||
Src/Disassembler/cs_capstone_gui.cpp \
|
||||
Src/Memory/MemoryPage.cpp \
|
||||
Src/Bridge/Bridge.cpp \
|
||||
Src/BasicView/StdTable.cpp \
|
||||
|
@ -193,7 +195,9 @@ HEADERS += \
|
|||
Src/BasicView/HexDump.h \
|
||||
Src/BasicView/AbstractTableView.h \
|
||||
Src/Disassembler/QBeaEngine.h \
|
||||
Src/Disassembler/CsQBeaEngine.h \
|
||||
Src/Disassembler/capstone_gui.h \
|
||||
Src/Disassembler/cs_capstone_gui.h \
|
||||
Src/Memory/MemoryPage.h \
|
||||
Src/Bridge/Bridge.h \
|
||||
Src/Exports.h \
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 27a0fb119e5be7c532d071edfd23cb67c4628b88
|
||||
Subproject commit a9298c2f45d0bf1f482142aa15cde9e498de84f6
|
Loading…
Reference in New Issue