Fixed instruction pointer pointing to current instead of next instruction

Added instruction address field to the VXInstructionInfo struct
Added operand access mode information to the VXOperandInfo struct
Added abstract data provider interface for the VXInstructionDecoder
Added concrete VXBufferDataProvider and VXStreamDataProvider classes
Published effectively used REX/VEX.w/r/x/b and VEX.l values in the VXInstructionInfo struct
Published extended modrm.reg/rm and sib.base/index values in the VXInstructionInfo struct
Internal changes and improvements
This commit is contained in:
flobernd 2014-10-27 14:10:22 +01:00
parent 2fbefa36ba
commit d2e3dbabe8
9 changed files with 5861 additions and 5881 deletions

View File

@ -68,19 +68,22 @@ int _tmain(int argc, _TCHAR* argv[])
};
VXInstructionInfo info;
VXInstructionDecoder decoder;
VXIntelInstructionFormatter formatter;
VXInstructionDecoder decoder32(&data32[0], sizeof(data32), VXDisassemblerMode::M32BIT);
VXInstructionDecoder decoder64(&data64[0], sizeof(data64), VXDisassemblerMode::M64BIT);
VXBufferDataSource input32(&data32[0], sizeof(data32));
VXBufferDataSource input64(&data64[0], sizeof(data64));
decoder32.setInstructionPointer(0x77091852);
decoder.setDisassemblerMode(VXDisassemblerMode::M32BIT);
decoder.setDataSource(&input32);
decoder.setInstructionPointer(0x77091852);
std::cout << "32 bit test ..." << std::endl << std::endl;
while (decoder32.decodeNextInstruction(info))
while (decoder.decodeInstruction(info))
{
std::cout << std::hex << std::setw(8) << std::setfill('0') << std::uppercase
<< info.instructionPointer << " ";
<< info.instrAddress << " ";
if (info.flags & IF_ERROR_MASK)
{
std::cout << "db " << std::setw(2) << info.instructionBytes[0];
std::cout << "db " << std::setw(2) << info.data[0];
} else
{
std::cout << formatter.formatInstruction(info) << std::endl;
@ -89,15 +92,17 @@ int _tmain(int argc, _TCHAR* argv[])
std::cout << std::endl;
decoder64.setInstructionPointer(0x00007FFA39A81930ull);
decoder.setDisassemblerMode(VXDisassemblerMode::M64BIT);
decoder.setDataSource(&input64);
decoder.setInstructionPointer(0x00007FFA39A81930ull);
std::cout << "64 bit test ..." << std::endl << std::endl;
while (decoder64.decodeNextInstruction(info))
while (decoder.decodeInstruction(info))
{
std::cout << std::hex << std::setw(16) << std::setfill('0') << std::uppercase
<< info.instructionPointer << " ";
<< info.instrAddress << " ";
if (info.flags & IF_ERROR_MASK)
{
std::cout << "db " << std::setw(2) << info.instructionBytes[0];
std::cout << "db " << std::setw(2) << info.data[0];
} else
{
std::cout << formatter.formatInstruction(info) << std::endl;

View File

@ -15,6 +15,7 @@ Fast and lightweight x86/x86-64 disassembler library.
- Only 44.00 KiB (64 bit: 47.00 KiB) for the decoder and 62.00 KiB (64 bit: 69.50 KiB) with the optional formatting functionality
- Abstract formatter and symbol-resolver classes for custom syntax implementations.
- Intel syntax is implemented by default
- Complete doxygen documentation
## Compilation ##

View File

@ -47,19 +47,19 @@ enum InstructionFlags : uint32_t
{
IF_NONE = 0x00000000,
/**
* @brief The instruction got decoded in 16 bit disassembler mode.
* @brief The instruction was decoded in 16 bit disassembler mode.
*/
IF_DISASSEMBLER_MODE_16 = 0x00000001,
/**
* @brief The instruction got decoded in 32 bit disassembler mode.
* @brief The instruction was decoded in 32 bit disassembler mode.
*/
IF_DISASSEMBLER_MODE_32 = 0x00000002,
/**
* @brief The instruction got decoded in 64 bit disassembler mode.
* @brief The instruction was decoded in 64 bit disassembler mode.
*/
IF_DISASSEMBLER_MODE_64 = 0x00000004,
/**
* @brief The instruction has a segment override prefix (0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65).
* @brief The instruction has a segment prefix (0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65).
*/
IF_PREFIX_SEGMENT = 0x00000008,
/**
@ -67,21 +67,21 @@ enum InstructionFlags : uint32_t
*/
IF_PREFIX_LOCK = 0x00000010,
/**
* @brief The instruction has a repnz prefix (0xF2).
* @brief The instruction has a repne prefix (0xF2).
*/
IF_PREFIX_REPNZ = 0x00000020,
IF_PREFIX_REPNE = 0x00000020,
/**
* @brief The instruction has a repz prefix (0xF3).
* @brief The instruction has a rep prefix (0xF3).
*/
IF_PREFIX_REPZ = 0x00000040,
IF_PREFIX_REP = 0x00000040,
/**
* @brief The instruction has an operand size override prefix (0x66).
* @brief The instruction has an operand size prefix (0x66).
*/
IF_PREFIX_OPERAND_SIZE_OVERRIDE = 0x00000080,
IF_PREFIX_OPERAND_SIZE = 0x00000080,
/**
* @brief The instruction has an address size override prefix (0x67).
* @brief The instruction has an address size prefix (0x67).
*/
IF_PREFIX_ADDRESS_SIZE_OVERRIDE = 0x00000100,
IF_PREFIX_ADDRESS_SIZE = 0x00000100,
/**
* @brief The instruction has a rex prefix (0x40 - 0x4F).
*/
@ -128,41 +128,6 @@ enum InstructionFlags : uint32_t
IF_ERROR_OPERAND = 0x01000000
};
/**
* @brief Values that represent the type of a decoded operand.
*/
enum class VXOperandType
{
/**
* @brief The operand is not used.
*/
NONE,
/**
* @brief The operand is a register operand.
*/
REGISTER,
/**
* @brief The operand is a memory operand.
*/
MEMORY,
/**
* @brief The operand is a pointer operand.
*/
POINTER,
/**
* @brief The operand is an immediate operand.
*/
IMMEDIATE,
/**
* @brief The operand is a relative immediate operand.
*/
REL_IMMEDIATE,
/**
* @brief The operand is a constant value.
*/
CONSTANT
};
/**
* @brief Values that represent a cpu register.
*/
@ -223,6 +188,61 @@ enum class VXRegister : uint16_t
RIP
};
/**
* @brief Values that represent the type of a decoded operand.
*/
enum class VXOperandType
{
/**
* @brief The operand is not used.
*/
NONE,
/**
* @brief The operand is a register operand.
*/
REGISTER,
/**
* @brief The operand is a memory operand.
*/
MEMORY,
/**
* @brief The operand is a pointer operand.
*/
POINTER,
/**
* @brief The operand is an immediate operand.
*/
IMMEDIATE,
/**
* @brief The operand is a relative immediate operand.
*/
REL_IMMEDIATE,
/**
* @brief The operand is a constant value.
*/
CONSTANT
};
/**
* @brief Values that represent the operand access mode.
*/
enum class VXOperandAccessMode
{
NA,
/**
* @brief The operand is accessed in read-only mode.
*/
READ,
/**
* @brief The operand is accessed in write mode.
*/
WRITE,
/**
* @brief The operand is accessed in read-write mode.
*/
READWRITE
};
/**
* @brief This struct holds information about a decoded operand.
*/
@ -236,6 +256,10 @@ struct VXOperandInfo
* @brief The size of the operand.
*/
uint16_t size;
/**
* @brief The operand access mode.
*/
VXOperandAccessMode access_mode;
/**
* @brief The base register.
*/
@ -249,11 +273,16 @@ struct VXOperandInfo
*/
uint8_t scale;
/**
* @brief The offset. TODO: improve documentation
* @brief The lvalue offset. If the @c offset is zero and the operand @c type is not
* @c CONSTANT, no lvalue is present.
*/
uint8_t offset;
/**
* @brief The lvalue. TODO: improve documentation
* @brief Signals, if the lval is signed.
*/
bool signed_lval;
/**
* @brief The lvalue.
*/
union {
int8_t sbyte;
@ -289,13 +318,13 @@ struct VXInstructionInfo
*/
uint8_t length;
/**
* @brief The instruction bytes.
* @brief Contains all bytes of the instruction.
*/
uint8_t instructionBytes[15];
uint8_t data[15];
/**
* @brief The length of the instruction opcodes.
*/
uint8_t opcodeLength;
uint8_t opcode_length;
/**
* @brief The instruction opcodes.
*/
@ -303,20 +332,20 @@ struct VXInstructionInfo
/**
* @brief The operand mode.
*/
uint8_t operandMode;
uint8_t operand_mode;
/**
* @brief The address mode.
*/
uint8_t addressMode;
uint8_t address_mode;
/**
* @brief The decoded operands.
*/
VXOperandInfo operand[4];
/**
* @brief The segment register. This value will default to @c NONE, if no segment register
* override prefix is present.
* prefix is present.
*/
VXRegister segmentRegister;
VXRegister segment;
/**
* @brief The rex prefix byte.
*/
@ -352,12 +381,24 @@ struct VXInstructionInfo
* with 1 most-significant bit to 4 bits total.
*/
uint8_t modrm_reg;
/**
* @brief The extended modrm register bits. If the instruction definition does not have the
* @c IDF_ACCEPTS_REXR flag set, this value defaults to the normal @c modrm_reg
* field.
*/
uint8_t modrm_reg_ext;
/**
* @brief The modrm register/memory bits. Specifies a direct or indirect register operand,
* optionally with a displacement. The REX.B, VEX.~B or XOP.~B field can extend this
* field with 1 most-significant bit to 4 bits total.
*/
uint8_t modrm_rm;
/**
* @brief The extended modrm register/memory bits. If the instruction definition does not
* have the @c IDF_ACCEPTS_REXB flag set, this value defaults to the normal
* @c modrm_rm field.
*/
uint8_t modrm_rm_ext;
/**
* @brief The sib byte.
*/
@ -371,11 +412,23 @@ struct VXInstructionInfo
* with 1 most-significant bit to 4 bits total.
*/
uint8_t sib_index;
/**
* @brief The extended index register. If the instruction definition does not have the
* @c IDF_ACCEPTS_REXX flag set, this value defaults to the normal @c sib_index
* field.
*/
uint8_t sib_index_ext;
/**
* @brief The base register to use. The REX.B, VEX.~B or XOP.~B field can extend this field
* with 1 most-significant bit to 4 bits total.
*/
uint8_t sib_base;
/**
* @brief The extended base register. If the instruction definition does not have the
* @c IDF_ACCEPTS_REXB flag set, this value defaults to the normal @c sib_index
* field.
*/
uint8_t sib_base_ext;
/**
* @brief The primary vex prefix byte.
*/
@ -434,15 +487,46 @@ struct VXInstructionInfo
* 11 = 0xF2
*/
uint8_t vex_pp;
/**
* @brief The effectively used REX/VEX.w value. If the instruction definition does not have
* the @c IDF_ACCEPTS_REXW flag set, this value defaults to zero.
*/
uint8_t eff_rexvex_w;
/**
* @brief The effectively used REX/VEX.r value. If the instruction definition does not have
* the @c IDF_ACCEPTS_REXR flag set, this value defaults to zero.
*/
uint8_t eff_rexvex_r;
/**
* @brief The effectively used REX/VEX.x value. If the instruction definition does not have
* the @c IDF_ACCEPTS_REXX flag set, this value defaults to zero.
*/
uint8_t eff_rexvex_x;
/**
* @brief The effectively used REX/VEX.b value. If the instruction definition does not have
* the @c IDF_ACCEPTS_REXB flag set, this value defaults to zero.
*/
uint8_t eff_rexvex_b;
/**
* @brief The effectively used VEX.l value. If the instruction definition does not have
* the @c IDF_ACCEPTS_VEXL flag set, this value defaults to zero.
*/
uint8_t eff_vex_l;
/**
* @brief The instruction definition.
*/
const VXInstructionDefinition *instrDefinition;
/**
* @brief The instruction pointer. This field is used to properly format relative
* instructions.
* @brief The instruction address points to the current instruction (relative to the
* initial instruction pointer).
*/
uint64_t instructionPointer;
uint64_t instrAddress;
/**
* @brief The instruction pointer points to the address of the next instruction (relative
* to the initial instruction pointer).
* This field is used to properly format relative instructions.
*/
uint64_t instrPointer;
};
}

View File

@ -102,7 +102,6 @@ bool VXInstructionDecoder::decodeRegisterOperand(VXInstructionInfo &info, VXOper
reg = static_cast<VXRegister>(static_cast<uint16_t>(VXRegister::ES) + (registerId & 0x07));
break;
case RegisterClass::XMM:
// TODO: Needs to be tested
reg = static_cast<VXRegister>(registerId + static_cast<uint16_t>(
((size == 256) ? VXRegister::YMM0 : VXRegister::XMM0)));
break;
@ -118,18 +117,22 @@ bool VXInstructionDecoder::decodeRegisterOperand(VXInstructionInfo &info, VXOper
bool VXInstructionDecoder::decodeRegisterMemoryOperand(VXInstructionInfo &info,
VXOperandInfo &operand, RegisterClass registerClass, VXDefinedOperandSize operandSize)
{
if (!decodeModrm(info))
{
return false;
}
assert(info.flags & IF_MODRM);
// Decode register operand
if (info.modrm_mod == 3)
{
return decodeRegisterOperand(info, operand, registerClass, m_effectiveModrmRm,
return decodeRegisterOperand(info, operand, registerClass, info.modrm_rm_ext,
operandSize);
}
// Decode memory operand
uint8_t offset = 0;
operand.type = VXOperandType::MEMORY;
operand.size = getEffectiveOperandSize(info, operandSize);
switch (info.addressMode)
switch (info.address_mode)
{
case 16:
{
@ -139,10 +142,10 @@ bool VXInstructionDecoder::decodeRegisterMemoryOperand(VXInstructionInfo &info,
static const VXRegister indices[] = {
VXRegister::SI, VXRegister::DI, VXRegister::SI, VXRegister::DI,
VXRegister::NONE, VXRegister::NONE, VXRegister::NONE, VXRegister::NONE };
operand.base = static_cast<VXRegister>(bases[m_effectiveModrmRm & 0x07]);
operand.index = static_cast<VXRegister>(indices[m_effectiveModrmRm & 0x07]);
operand.base = static_cast<VXRegister>(bases[info.modrm_rm_ext & 0x07]);
operand.index = static_cast<VXRegister>(indices[info.modrm_rm_ext & 0x07]);
operand.scale = 0;
if (info.modrm_mod == 0 && m_effectiveModrmRm == 6) {
if (info.modrm_mod == 0 && info.modrm_rm_ext == 6) {
offset = 16;
operand.base = VXRegister::NONE;
} else if (info.modrm_mod == 1) {
@ -154,11 +157,11 @@ bool VXInstructionDecoder::decodeRegisterMemoryOperand(VXInstructionInfo &info,
break;
case 32:
operand.base =
static_cast<VXRegister>(static_cast<uint16_t>(VXRegister::EAX) + m_effectiveModrmRm);
static_cast<VXRegister>(static_cast<uint16_t>(VXRegister::EAX) + info.modrm_rm_ext);
switch (info.modrm_mod)
{
case 0:
if (m_effectiveModrmRm == 5)
if (info.modrm_rm_ext == 5)
{
operand.base = VXRegister::NONE;
offset = 32;
@ -173,7 +176,7 @@ bool VXInstructionDecoder::decodeRegisterMemoryOperand(VXInstructionInfo &info,
default:
assert(0);
}
if ((m_effectiveModrmRm & 0x07) == 4)
if ((info.modrm_rm_ext & 0x07) == 4)
{
if (!decodeSIB(info))
{
@ -181,10 +184,10 @@ bool VXInstructionDecoder::decodeRegisterMemoryOperand(VXInstructionInfo &info,
}
operand.base =
static_cast<VXRegister>(static_cast<uint16_t>(VXRegister::EAX) +
(info.sib_base | (m_effectiveRexB << 3)));
info.sib_base_ext);
operand.index =
static_cast<VXRegister>(static_cast<uint16_t>(VXRegister::EAX) +
(info.sib_index | (m_effectiveRexX << 3)));
info.sib_index_ext);
operand.scale = (1 << info.sib_scale) & ~1;
if (operand.index == VXRegister::ESP)
{
@ -213,11 +216,11 @@ bool VXInstructionDecoder::decodeRegisterMemoryOperand(VXInstructionInfo &info,
break;
case 64:
operand.base =
static_cast<VXRegister>(static_cast<uint16_t>(VXRegister::RAX) + m_effectiveModrmRm);
static_cast<VXRegister>(static_cast<uint16_t>(VXRegister::RAX) + info.modrm_rm_ext);
switch (info.modrm_mod)
{
case 0:
if ((m_effectiveModrmRm & 0x07) == 5)
if ((info.modrm_rm_ext & 0x07) == 5)
{
info.flags |= IF_RELATIVE;
operand.base = VXRegister::RIP;
@ -233,7 +236,7 @@ bool VXInstructionDecoder::decodeRegisterMemoryOperand(VXInstructionInfo &info,
default:
assert(0);
}
if ((m_effectiveModrmRm & 0x07) == 4)
if ((info.modrm_rm_ext & 0x07) == 4)
{
if (!decodeSIB(info))
{
@ -241,10 +244,10 @@ bool VXInstructionDecoder::decodeRegisterMemoryOperand(VXInstructionInfo &info,
}
operand.base =
static_cast<VXRegister>(static_cast<uint16_t>(VXRegister::RAX) +
(info.sib_base | (m_effectiveRexB << 3)));
info.sib_base_ext);
operand.index =
static_cast<VXRegister>(static_cast<uint16_t>(VXRegister::RAX) +
(info.sib_index | (m_effectiveRexX << 3)));
info.sib_index_ext);
if (operand.index == VXRegister::RSP)
{
operand.index = VXRegister::NONE;
@ -353,16 +356,22 @@ bool VXInstructionDecoder::decodeModrm(VXInstructionInfo &info)
{
if (!(info.flags & IF_MODRM))
{
if (!inputNext(info) && (info.flags & IF_ERROR_MASK))
info.modrm = inputNext(info);
if (!info.modrm && (info.flags & IF_ERROR_MASK))
{
return false;
}
info.flags |= IF_MODRM;
info.modrm = inputCurrent();
info.modrm_mod = (info.modrm >> 6) & 0x03;
info.modrm_reg = (info.modrm >> 3) & 0x07;
info.modrm_rm = (info.modrm >> 0) & 0x07;
}
// The @c decodeModrm method might get called multiple times during the opcode- and the
// operand decoding, but the effective REX/VEX fields are not initialized before the end of
// the opcode decoding process. As the extended values are only used for the operand decoding,
// we should have no problems.
info.modrm_reg_ext = (info.eff_rexvex_r << 3) | info.modrm_reg;
info.modrm_rm_ext = (info.eff_rexvex_b << 3) | info.modrm_rm;
return true;
}
@ -372,15 +381,19 @@ bool VXInstructionDecoder::decodeSIB(VXInstructionInfo &info)
assert((info.modrm_rm & 0x7) == 4);
if (!(info.flags & IF_SIB))
{
if (!inputNext(info) && (info.flags & IF_ERROR_MASK))
info.sib = inputNext(info);
if (!info.sib && (info.flags & IF_ERROR_MASK))
{
return false;
}
info.flags |= IF_SIB;
info.sib = inputCurrent();
info.sib_scale = (info.sib >> 6) & 0x03;
info.sib_index = (info.sib >> 3) & 0x07;
info.sib_base = (info.sib >> 0) & 0x07;
// The @c decodeSib method is only called during the operand decoding, so updating the
// extended values at this point should be safe.
info.sib_index_ext = (info.eff_rexvex_x << 3) | info.sib_index;
info.sib_base_ext = (info.eff_rexvex_b << 3) | info.sib_base;
}
return true;
}
@ -393,12 +406,12 @@ bool VXInstructionDecoder::decodeVex(VXInstructionInfo &info)
switch (info.vex_op)
{
case 0xC4:
info.vex_b1 = inputNext(info);
info.vex_b1 = inputNext(info);
if (!info.vex_b1 || (info.flags & IF_ERROR_MASK))
{
return false;
}
info.vex_b2 = inputNext(info);
info.vex_b2 = inputNext(info);
if (!info.vex_b2 || (info.flags & IF_ERROR_MASK))
{
return false;
@ -413,7 +426,7 @@ bool VXInstructionDecoder::decodeVex(VXInstructionInfo &info)
info.vex_pp = (info.vex_b2 >> 0) & 0x03;
break;
case 0xC5:
info.vex_b1 = inputNext(info);
info.vex_b1 = inputNext(info);
if (!info.vex_b1 || (info.flags & IF_ERROR_MASK))
{
return false;
@ -449,38 +462,21 @@ uint16_t VXInstructionDecoder::getEffectiveOperandSize(const VXInstructionInfo &
case VXDefinedOperandSize::NA:
return 0;
case VXDefinedOperandSize::Z:
return (info.operandMode == 16) ? 16 : 32;
return (info.operand_mode == 16) ? 16 : 32;
case VXDefinedOperandSize::V:
return info.operandMode;
return info.operand_mode;
case VXDefinedOperandSize::Y:
return (info.operandMode == 16) ? 32 : info.operandMode;
return (info.operand_mode == 16) ? 32 : info.operand_mode;
case VXDefinedOperandSize::X:
assert(info.vex_op != 0);
return m_effectiveVexL ?
return (info.eff_vex_l) ?
getEffectiveOperandSize(info, VXDefinedOperandSize::QQ) :
getEffectiveOperandSize(info, VXDefinedOperandSize::DQ);
case VXDefinedOperandSize::RDQ:
return (m_disassemblerMode == VXDisassemblerMode::M64BIT) ? 64 : 32;
case VXDefinedOperandSize::B:
return 8;
case VXDefinedOperandSize::W:
return 16;
case VXDefinedOperandSize::D:
return 32;
case VXDefinedOperandSize::Q:
return 64;
case VXDefinedOperandSize::T:
return 80;
case VXDefinedOperandSize::O:
return 12;
case VXDefinedOperandSize::DQ:
return 128;
case VXDefinedOperandSize::QQ:
return 256;
default:
assert(0);
return Internal::GetSimpleOperandSize(operandSize);
}
return 0;
}
bool VXInstructionDecoder::decodeOperands(VXInstructionInfo& info)
@ -497,6 +493,7 @@ bool VXInstructionDecoder::decodeOperands(VXInstructionInfo& info)
{
if (info.operand[i - 1].type != VXOperandType::NONE)
{
info.operand[i - 1].access_mode = VXOperandAccessMode::READ;
if (!decodeOperand(info, info.operand[i], info.instrDefinition->operand[i].type,
info.instrDefinition->operand[i].size))
{
@ -504,6 +501,27 @@ bool VXInstructionDecoder::decodeOperands(VXInstructionInfo& info)
}
}
}
// Update operand access modes
if (info.operand[0].type != VXOperandType::NONE)
{
if (info.instrDefinition->flags & IDF_OPERAND1_WRITE)
{
info.operand[0].access_mode = VXOperandAccessMode::WRITE;
} else if (info.instrDefinition->flags & IDF_OPERAND1_READWRITE)
{
info.operand[0].access_mode = VXOperandAccessMode::READWRITE;
}
}
if (info.operand[1].type != VXOperandType::NONE)
{
if (info.instrDefinition->flags & IDF_OPERAND2_WRITE)
{
info.operand[1].access_mode = VXOperandAccessMode::WRITE;
} else if (info.instrDefinition->flags & IDF_OPERAND2_READWRITE)
{
info.operand[1].access_mode = VXOperandAccessMode::READWRITE;
}
}
return true;
}
@ -518,7 +536,7 @@ bool VXInstructionDecoder::decodeOperand(VXInstructionInfo &info, VXOperandInfo
break;
case VXDefinedOperandType::A:
operand.type = VXOperandType::POINTER;
if (info.operandMode == 16)
if (info.operand_mode == 16)
{
operand.size = 32;
operand.lval.ptr.off = inputNext<uint16_t>(info);
@ -534,10 +552,18 @@ bool VXInstructionDecoder::decodeOperand(VXInstructionInfo &info, VXOperandInfo
}
break;
case VXDefinedOperandType::C:
return decodeRegisterOperand(info, operand, RegisterClass::CONTROL, m_effectiveModrmReg,
if (!decodeModrm(info))
{
return false;
}
return decodeRegisterOperand(info, operand, RegisterClass::CONTROL, info.modrm_reg_ext,
operandSize);
case VXDefinedOperandType::D:
return decodeRegisterOperand(info, operand, RegisterClass::DEBUG, m_effectiveModrmReg,
if (!decodeModrm(info))
{
return false;
}
return decodeRegisterOperand(info, operand, RegisterClass::DEBUG, info.modrm_reg_ext,
operandSize);
case VXDefinedOperandType::F:
// TODO: FAR flag
@ -552,12 +578,18 @@ bool VXInstructionDecoder::decodeOperand(VXInstructionInfo &info, VXOperandInfo
return decodeRegisterMemoryOperand(info, operand, RegisterClass::GENERAL_PURPOSE,
operandSize);
case VXDefinedOperandType::G:
if (!decodeModrm(info))
{
return false;
}
return decodeRegisterOperand(info, operand, RegisterClass::GENERAL_PURPOSE,
m_effectiveModrmReg, operandSize);
info.modrm_reg_ext, operandSize);
case VXDefinedOperandType::H:
assert(info.vex_op != 0);
return decodeRegisterOperand(info, operand, RegisterClass::XMM, (0xF & ~info.vex_vvvv),
operandSize);
case VXDefinedOperandType::sI:
operand.signed_lval = true;
case VXDefinedOperandType::I:
return decodeImmediate(info, operand, operandSize);
case VXDefinedOperandType::I1:
@ -607,9 +639,13 @@ bool VXInstructionDecoder::decodeOperand(VXInstructionInfo &info, VXOperandInfo
operand.index = VXRegister::NONE;
operand.scale = 0;
operand.size = getEffectiveOperandSize(info, operandSize);
return decodeDisplacement(info, operand, info.addressMode);
return decodeDisplacement(info, operand, info.address_mode);
case VXDefinedOperandType::P:
return decodeRegisterOperand(info, operand, RegisterClass::MMX, m_effectiveModrmReg,
if (!decodeModrm(info))
{
return false;
}
return decodeRegisterOperand(info, operand, RegisterClass::MMX, info.modrm_reg_ext,
operandSize);
case VXDefinedOperandType::R:
// ModR/M byte may refer only to memory
@ -621,7 +657,11 @@ bool VXInstructionDecoder::decodeOperand(VXInstructionInfo &info, VXOperandInfo
return decodeRegisterMemoryOperand(info, operand, RegisterClass::GENERAL_PURPOSE,
operandSize);
case VXDefinedOperandType::S:
return decodeRegisterOperand(info, operand, RegisterClass::SEGMENT, m_effectiveModrmReg,
if (!decodeModrm(info))
{
return false;
}
return decodeRegisterOperand(info, operand, RegisterClass::SEGMENT, info.modrm_reg_ext,
operandSize);
case VXDefinedOperandType::U:
// ModR/M byte may refer only to memory
@ -633,7 +673,11 @@ bool VXInstructionDecoder::decodeOperand(VXInstructionInfo &info, VXOperandInfo
case VXDefinedOperandType::W:
return decodeRegisterMemoryOperand(info, operand, RegisterClass::XMM, operandSize);
case VXDefinedOperandType::V:
return decodeRegisterOperand(info, operand, RegisterClass::XMM, m_effectiveModrmReg,
if (!decodeModrm(info))
{
return false;
}
return decodeRegisterOperand(info, operand, RegisterClass::XMM, info.modrm_reg_ext,
operandSize);
case VXDefinedOperandType::R0:
case VXDefinedOperandType::R1:
@ -644,7 +688,7 @@ bool VXInstructionDecoder::decodeOperand(VXInstructionInfo &info, VXOperandInfo
case VXDefinedOperandType::R6:
case VXDefinedOperandType::R7:
return decodeRegisterOperand(info, operand, RegisterClass::GENERAL_PURPOSE,
((m_effectiveRexB << 3) | (static_cast<uint16_t>(operandType) -
((info.eff_rexvex_b << 3) | (static_cast<uint16_t>(operandType) -
static_cast<uint16_t>(VXDefinedOperandType::R0))), operandSize);
case VXDefinedOperandType::AL:
case VXDefinedOperandType::AX:
@ -711,84 +755,106 @@ void VXInstructionDecoder::resolveOperandAndAddressMode(VXInstructionInfo &info)
switch (m_disassemblerMode)
{
case VXDisassemblerMode::M16BIT:
info.operandMode = (info.flags & IF_PREFIX_OPERAND_SIZE_OVERRIDE) ? 32 : 16;
info.addressMode = (info.flags & IF_PREFIX_ADDRESS_SIZE_OVERRIDE) ? 32 : 16;
info.operand_mode = (info.flags & IF_PREFIX_OPERAND_SIZE) ? 32 : 16;
info.address_mode = (info.flags & IF_PREFIX_ADDRESS_SIZE) ? 32 : 16;
break;
case VXDisassemblerMode::M32BIT:
info.operandMode = (info.flags & IF_PREFIX_OPERAND_SIZE_OVERRIDE) ? 16 : 32;
info.addressMode = (info.flags & IF_PREFIX_ADDRESS_SIZE_OVERRIDE) ? 16 : 32;
info.operand_mode = (info.flags & IF_PREFIX_OPERAND_SIZE) ? 16 : 32;
info.address_mode = (info.flags & IF_PREFIX_ADDRESS_SIZE) ? 16 : 32;
break;
case VXDisassemblerMode::M64BIT:
if (m_effectiveRexW)
if (info.eff_rexvex_w)
{
info.operandMode = 64;
} else if ((info.flags & IF_PREFIX_OPERAND_SIZE_OVERRIDE))
info.operand_mode = 64;
} else if ((info.flags & IF_PREFIX_OPERAND_SIZE))
{
info.operandMode = 16;
info.operand_mode = 16;
} else
{
info.operandMode = (info.instrDefinition->flags & IDF_DEFAULT_64) ? 64 : 32;
info.operand_mode = (info.instrDefinition->flags & IDF_DEFAULT_64) ? 64 : 32;
}
info.addressMode = (info.flags & IF_PREFIX_ADDRESS_SIZE_OVERRIDE) ? 32 : 64;
info.address_mode = (info.flags & IF_PREFIX_ADDRESS_SIZE) ? 32 : 64;
break;
default:
assert(0);
}
}
void VXInstructionDecoder::calculateEffectiveRexVexValues(VXInstructionInfo &info) const
{
assert(info.instrDefinition);
uint8_t rex = info.rex;
if (info.flags & IF_PREFIX_VEX)
{
switch (info.vex_op)
{
case 0xC4:
rex = ((~(info.vex_b1 >> 5) & 0x07) | ((info.vex_b2 >> 4) & 0x08));
break;
case 0xC5:
rex = (~(info.vex_b1 >> 5)) & 4;
break;
default:
assert(0);
}
}
rex &= (info.instrDefinition->flags & 0x000F);
info.eff_rexvex_w = (rex >> 3) & 0x01;
info.eff_rexvex_r = (rex >> 2) & 0x01;
info.eff_rexvex_x = (rex >> 1) & 0x01;
info.eff_rexvex_b = (rex >> 0) & 0x01;
info.eff_vex_l = info.vex_l && (info.instrDefinition->flags & IDF_ACCEPTS_VEXL);
}
bool VXInstructionDecoder::decodePrefixes(VXInstructionInfo &info)
{
bool done = false;
do
{
if (!inputPeek(info) && (info.flags & IF_ERROR_MASK))
{
return false;
}
switch (inputCurrent())
switch (inputPeek(info))
{
case 0xF0:
info.flags |= IF_PREFIX_LOCK;
break;
case 0xF2:
// REPNZ and REPZ are mutally exclusive. The one that comes later has precedence.
info.flags |= IF_PREFIX_REPNZ;
info.flags &= ~IF_PREFIX_REPZ;
info.flags |= IF_PREFIX_REP;
info.flags &= ~IF_PREFIX_REPNE;
break;
case 0xF3:
// REPNZ and REPZ are mutally exclusive. The one that comes later has precedence.
info.flags |= IF_PREFIX_REPZ;
info.flags &= ~IF_PREFIX_REPNZ;
info.flags |= IF_PREFIX_REP;
info.flags &= ~IF_PREFIX_REPNE;
break;
case 0x2E:
info.flags |= IF_PREFIX_SEGMENT;
info.segmentRegister = VXRegister::CS;
info.segment = VXRegister::CS;
break;
case 0x36:
info.flags |= IF_PREFIX_SEGMENT;
info.segmentRegister = VXRegister::SS;
info.segment = VXRegister::SS;
break;
case 0x3E:
info.flags |= IF_PREFIX_SEGMENT;
info.segmentRegister = VXRegister::DS;
info.segment = VXRegister::DS;
break;
case 0x26:
info.flags |= IF_PREFIX_SEGMENT;
info.segmentRegister = VXRegister::ES;
info.segment = VXRegister::ES;
break;
case 0x64:
info.flags |= IF_PREFIX_SEGMENT;
info.segmentRegister = VXRegister::FS;
info.segment = VXRegister::FS;
break;
case 0x65:
info.flags |= IF_PREFIX_SEGMENT;
info.segmentRegister = VXRegister::GS;
info.segment = VXRegister::GS;
break;
case 0x66:
info.flags |= IF_PREFIX_OPERAND_SIZE_OVERRIDE;
info.flags |= IF_PREFIX_OPERAND_SIZE;
break;
case 0x67:
info.flags |= IF_PREFIX_ADDRESS_SIZE_OVERRIDE;
info.flags |= IF_PREFIX_ADDRESS_SIZE;
break;
default:
if ((m_disassemblerMode == VXDisassemblerMode::M64BIT) &&
@ -811,7 +877,7 @@ bool VXInstructionDecoder::decodePrefixes(VXInstructionInfo &info)
}
}
} while (!done);
// TODO: Add flags for multiple prefixes of the same group
// TODO: Check for multiple prefixes of the same group
// Parse REX Prefix
if (info.flags & IF_PREFIX_REX)
{
@ -833,7 +899,7 @@ bool VXInstructionDecoder::decodeOpcode(VXInstructionInfo &info)
}
// Update instruction info
info.opcode[0] = inputCurrent();
info.opcodeLength = 1;
info.opcode_length = 1;
// Iterate through opcode tree
VXOpcodeTreeNode node = GetOpcodeTreeChild(GetOpcodeTreeRoot(), inputCurrent());
VXOpcodeTreeNodeType nodeType;
@ -845,11 +911,28 @@ bool VXInstructionDecoder::decodeOpcode(VXInstructionInfo &info)
{
case VXOpcodeTreeNodeType::INSTRUCTION_DEFINITION:
{
// Decode opcode
if (!decodeInstructionNode(info, node))
// Check for invalid instruction
if (GetOpcodeNodeValue(node) == 0)
{
info.flags |= IF_ERROR_INVALID;
return false;
}
// Get instruction definition
const VXInstructionDefinition *instrDefinition = GetInstructionDefinition(node);
// Check for invalid 64 bit instruction
if ((m_disassemblerMode == VXDisassemblerMode::M64BIT) &&
(instrDefinition->flags & IDF_INVALID_64))
{
info.flags |= IF_ERROR_INVALID_64;
return false;
}
// Update instruction info
info.instrDefinition = instrDefinition;
info.mnemonic = instrDefinition->mnemonic;
// Update effective REX/VEX values
calculateEffectiveRexVexValues(info);
// Resolve operand and address mode
resolveOperandAndAddressMode(info);
// Decode operands
if (!decodeOperands(info))
{
@ -864,9 +947,9 @@ bool VXInstructionDecoder::decodeOpcode(VXInstructionInfo &info)
return false;
}
// Update instruction info
assert((info.opcodeLength > 0) && (info.opcodeLength < 3));
info.opcode[info.opcodeLength] = inputCurrent();
info.opcodeLength++;
assert((info.opcode_length > 0) && (info.opcode_length < 3));
info.opcode[info.opcode_length] = inputCurrent();
info.opcode_length++;
// Set child node index for next iteration
index = inputCurrent();
break;
@ -896,13 +979,13 @@ bool VXInstructionDecoder::decodeOpcode(VXInstructionInfo &info)
break;
case VXOpcodeTreeNodeType::MANDATORY:
// Check if there are any prefixes present
if (info.flags & IF_PREFIX_REPNZ)
if (info.flags & IF_PREFIX_REP)
{
index = 1; // F2
} else if (info.flags & IF_PREFIX_REPZ)
} else if (info.flags & IF_PREFIX_REPNE)
{
index = 2; // F3
} else if (info.flags & IF_PREFIX_OPERAND_SIZE_OVERRIDE)
} else if (info.flags & IF_PREFIX_OPERAND_SIZE)
{
index = 3; // 66
}
@ -912,14 +995,14 @@ bool VXInstructionDecoder::decodeOpcode(VXInstructionInfo &info)
}
if (index && (GetOpcodeTreeChild(node, index) != 0))
{
// Remove REPNZ and REPZ prefix
info.flags &= ~IF_PREFIX_REPNZ;
info.flags &= ~IF_PREFIX_REPZ;
// Remove OPERAND_SIZE_OVERRIDE prefix, if it was used as mandatory prefix for
// the instruction
// Remove REP and REPNE prefix
info.flags &= ~IF_PREFIX_REP;
info.flags &= ~IF_PREFIX_REPNE;
// Remove OPERAND_SIZE prefix, if it was used as mandatory prefix for the
// instruction
if (index == 3)
{
info.flags &= ~IF_PREFIX_OPERAND_SIZE_OVERRIDE;
info.flags &= ~IF_PREFIX_OPERAND_SIZE;
}
}
break;
@ -935,13 +1018,13 @@ bool VXInstructionDecoder::decodeOpcode(VXInstructionInfo &info)
switch (m_disassemblerMode)
{
case VXDisassemblerMode::M16BIT:
index = (info.flags & IF_PREFIX_ADDRESS_SIZE_OVERRIDE) ? 1 : 0;
index = (info.flags & IF_PREFIX_ADDRESS_SIZE) ? 1 : 0;
break;
case VXDisassemblerMode::M32BIT:
index = (info.flags & IF_PREFIX_ADDRESS_SIZE_OVERRIDE) ? 0 : 1;
index = (info.flags & IF_PREFIX_ADDRESS_SIZE) ? 0 : 1;
break;
case VXDisassemblerMode::M64BIT:
index = (info.flags & IF_PREFIX_ADDRESS_SIZE_OVERRIDE) ? 1 : 2;
index = (info.flags & IF_PREFIX_ADDRESS_SIZE) ? 1 : 2;
break;
default:
assert(0);
@ -951,14 +1034,13 @@ bool VXInstructionDecoder::decodeOpcode(VXInstructionInfo &info)
switch (m_disassemblerMode)
{
case VXDisassemblerMode::M16BIT:
index = (info.flags & IF_PREFIX_OPERAND_SIZE_OVERRIDE) ? 1 : 0;
index = (info.flags & IF_PREFIX_OPERAND_SIZE) ? 1 : 0;
break;
case VXDisassemblerMode::M32BIT:
index = (info.flags & IF_PREFIX_OPERAND_SIZE_OVERRIDE) ? 0 : 1;
index = (info.flags & IF_PREFIX_OPERAND_SIZE) ? 0 : 1;
break;
case VXDisassemblerMode::M64BIT:
index =
(info.rex_w) ? 2 : ((info.flags & IF_PREFIX_OPERAND_SIZE_OVERRIDE) ? 0 : 1);
index = (info.rex_w) ? 2 : ((info.flags & IF_PREFIX_OPERAND_SIZE) ? 0 : 1);
break;
default:
assert(0);
@ -988,10 +1070,15 @@ bool VXInstructionDecoder::decodeOpcode(VXInstructionInfo &info)
// As all 3dnow instructions got the same operands and flag definitions, we just
// decode a random instruction and determine the specific opcode later.
assert(GetOpcodeTreeChild(node, 0x0C) != 0);
if (!decodeInstructionNode(info, GetOpcodeTreeChild(node, 0x0C)))
{
return false;
}
const VXInstructionDefinition *instrDefinition =
GetInstructionDefinition(GetOpcodeTreeChild(node, 0x0C));
// Update instruction info
info.instrDefinition = instrDefinition;
info.mnemonic = instrDefinition->mnemonic;
// Update effective REX/VEX values
calculateEffectiveRexVexValues(info);
// Resolve operand and address mode
resolveOperandAndAddressMode(info);
// Decode operands
if (!decodeOperands(info))
{
@ -1003,16 +1090,45 @@ bool VXInstructionDecoder::decodeOpcode(VXInstructionInfo &info)
{
return false;
}
// Update instruction mnemonic
const VXInstructionDefinition *instrDefinition =
// Update instruction info
instrDefinition =
GetInstructionDefinition(GetOpcodeTreeChild(node, info.opcode[2]));
if (!instrDefinition)
if (!instrDefinition ||
(instrDefinition->mnemonic == VXInstructionMnemonic::INVALID))
{
info.flags |= IF_ERROR_INVALID;
return false;
}
info.instrDefinition = instrDefinition;
info.mnemonic = instrDefinition->mnemonic;
info.mnemonic = instrDefinition->mnemonic;
// Update operand access modes
for (unsigned int i = 0; i < 4; ++i)
{
if (info.operand[i].type != VXOperandType::NONE)
{
info.operand[i - 1].access_mode = VXOperandAccessMode::READ;
}
}
if (info.operand[0].type != VXOperandType::NONE)
{
if (info.instrDefinition->flags & IDF_OPERAND1_WRITE)
{
info.operand[0].access_mode = VXOperandAccessMode::WRITE;
} else if (info.instrDefinition->flags & IDF_OPERAND1_READWRITE)
{
info.operand[0].access_mode = VXOperandAccessMode::READWRITE;
}
}
if (info.operand[1].type != VXOperandType::NONE)
{
if (info.instrDefinition->flags & IDF_OPERAND2_WRITE)
{
info.operand[1].access_mode = VXOperandAccessMode::WRITE;
} else if (info.instrDefinition->flags & IDF_OPERAND2_READWRITE)
{
info.operand[1].access_mode = VXOperandAccessMode::READWRITE;
}
}
// Terminate loop
return true;
}
@ -1025,26 +1141,23 @@ bool VXInstructionDecoder::decodeOpcode(VXInstructionInfo &info)
{
return false;
}
// Update instruction info
// Update instruction info (error cases are checked by the @c decodeVex method)
switch (info.vex_m_mmmm)
{
case 1:
info.opcodeLength = 1;
info.opcode_length = 1;
info.opcode[0] = 0x0F;
break;
case 2:
info.opcodeLength = 2;
info.opcode_length = 2;
info.opcode[0] = 0x0F;
info.opcode[1] = 0x38;
break;
case 3:
info.opcodeLength = 2;
info.opcode_length = 2;
info.opcode[0] = 0x0F;
info.opcode[1] = 0x3A;
break;
default:
// TODO: ERROR
break;
}
// Set child node index for next iteration
index = info.vex_m_mmmm + (info.vex_pp << 2);
@ -1069,76 +1182,27 @@ bool VXInstructionDecoder::decodeOpcode(VXInstructionInfo &info)
return false;
}
bool VXInstructionDecoder::decodeInstructionNode(VXInstructionInfo &info, VXOpcodeTreeNode node)
VXInstructionDecoder::VXInstructionDecoder()
: m_dataSource(nullptr)
, m_disassemblerMode(VXDisassemblerMode::M32BIT)
, m_preferredVendor(VXInstructionSetVendor::ANY)
, m_instructionPointer(0)
{
// Check for invalid instruction
if (Internal::GetOpcodeNodeValue(node) == 0)
{
info.flags |= IF_ERROR_INVALID;
return false;
}
// Get instruction definition
bool hasModrm = false;
const VXInstructionDefinition *instrDefinition =
Internal::GetInstructionDefinition(node, hasModrm);
// Check for invalid 64 bit instruction
if ((m_disassemblerMode == VXDisassemblerMode::M64BIT) &&
(instrDefinition->flags & IDF_INVALID_64))
{
info.flags |= IF_ERROR_INVALID_64;
return false;
}
// Update instruction info
info.instrDefinition = instrDefinition;
info.mnemonic = instrDefinition->mnemonic;
// Decode modrm byte
if (hasModrm && !decodeModrm(info))
{
return false;
}
// Update values required for operand decoding
uint8_t rex = info.rex;
if (info.flags & IF_PREFIX_VEX)
{
switch (info.vex_op)
{
case 0xC4:
rex = ((~(info.vex_b1 >> 5) & 0x07) | ((info.vex_b2 >> 4) & 0x08));
break;
case 0xC5:
rex = (~(info.vex_b1 >> 5)) & 4;
break;
default:
assert(0);
}
}
// Calculate effective values by adding the corresponding part of the flags bitmask
rex &= (instrDefinition->flags & 0x000F);
// Store effective values in the current disassembler instance
m_effectiveRexW = (rex >> 3) & 0x01;
m_effectiveRexR = (rex >> 2) & 0x01;
m_effectiveRexX = (rex >> 1) & 0x01;
m_effectiveRexB = (rex >> 0) & 0x01;
m_effectiveModrmReg = (m_effectiveRexR << 3) | info.modrm_reg;
m_effectiveModrmRm = (m_effectiveRexB << 3) | info.modrm_rm;
m_effectiveVexL = info.vex_l && (instrDefinition->flags & IDF_ACCEPTS_VEXL);
// Resolve operand and address mode
resolveOperandAndAddressMode(info);
return true;
}
VXInstructionDecoder::VXInstructionDecoder(void const *buffer, size_t bufferLen,
VXDisassemblerMode disassemblerMode, VXInstructionSetVendor preferredVendor)
: m_inputBuffer(buffer)
, m_inputBufferLen(bufferLen)
, m_inputBufferOffset(0)
}
VXInstructionDecoder::VXInstructionDecoder(VXBaseDataSource *input,
VXDisassemblerMode disassemblerMode, VXInstructionSetVendor preferredVendor,
uint64_t instructionPointer)
: m_dataSource(input)
, m_disassemblerMode(disassemblerMode)
, m_preferredVendor(preferredVendor)
, m_preferredVendor(preferredVendor)
, m_instructionPointer(instructionPointer)
{
}
bool VXInstructionDecoder::decodeNextInstruction(VXInstructionInfo &info)
bool VXInstructionDecoder::decodeInstruction(VXInstructionInfo &info)
{
// Clear instruction info
memset(&info, 0, sizeof(info));
@ -1157,8 +1221,8 @@ bool VXInstructionDecoder::decodeNextInstruction(VXInstructionInfo &info)
default:
assert(0);
}
// Set instruction pointer
info.instructionPointer = m_instructionPointer;
// Set instruction address
info.instrAddress = m_instructionPointer;
// Decode
if (!decodePrefixes(info) || !decodeOpcode(info))
{
@ -1188,29 +1252,31 @@ bool VXInstructionDecoder::decodeNextInstruction(VXInstructionInfo &info)
info.operand[1].type = VXOperandType::NONE;
}
}
if ((info.mnemonic == VXInstructionMnemonic::NOP) && (info.flags & IF_PREFIX_REPZ))
if ((info.mnemonic == VXInstructionMnemonic::NOP) && (info.flags & IF_PREFIX_REP))
{
info.mnemonic = VXInstructionMnemonic::PAUSE;
info.flags &= ~IF_PREFIX_REPZ;
info.flags &= ~IF_PREFIX_REP;
}
// Increment instruction pointer
m_instructionPointer += info.length;
// Set instruction pointer
info.instrPointer = m_instructionPointer;
return true;
DecodeError:
// Increment instruction pointer.
m_instructionPointer += 1;
// Backup all error flags, the instruction length and the instruction pointer
// Backup all error flags, the instruction length and the instruction address
uint32_t flags = info.flags & (IF_ERROR_MASK | 0x00000007);
uint8_t length = info.length;
uint8_t firstByte = info.instructionBytes[0];
uint64_t instrPointer = info.instructionPointer;
uint8_t firstByte = info.data[0];
uint64_t instrAddress = info.instrAddress;
// Clear instruction info
memset(&info, 0, sizeof(info));
// Restore saved values
info.flags = flags;
info.length = length;
info.instructionBytes[0] = firstByte;
info.instructionPointer = instrPointer;
info.data[0] = firstByte;
info.instrAddress = instrAddress;
info.instrDefinition = Internal::GetInstructionDefinition(0);
// Return with error, if the end of the input source was reached while decoding the
// invalid instruction
@ -1223,7 +1289,7 @@ DecodeError:
// source while decoding the invalid instruction.
if (info.length != 1)
{
m_inputBufferOffset = m_inputBufferOffset - info.length + 1;
m_dataSource->setPosition(m_dataSource->getPosition() - info.length + 1);
info.length = 1;
}
return true;

View File

@ -32,6 +32,7 @@
#pragma once
#include <type_traits>
#include <istream>
#include "VXDisassemblerTypes.h"
namespace Verteron
@ -40,18 +41,347 @@ namespace Verteron
namespace Disassembler
{
///////////////////////////////////////////////////////////////////////////////////////////////////
/**
* @brief The base class for all data-source implementations.
*/
class VXBaseDataSource
{
private:
uint8_t m_currentInput;
protected:
/**
* @brief Override this method in your custom data source implementations.
* Reads the next byte from the data source. This method increases the current
* input position by one.
* @return The current input byte.
*/
virtual uint8_t internalInputPeek() = 0;
/**
* @brief Override this method in your custom data source implementations.
* Reads the next byte from the data source. This method does NOT increase the
* current input position.
* @return The current input byte.
*/
virtual uint8_t internalInputNext() = 0;
protected:
/**
* @brief Default constructor.
*/
VXBaseDataSource() { };
public:
/**
* @brief Destructor.
*/
virtual ~VXBaseDataSource() { };
public:
/**
* @brief Reads the next byte from the data source. This method does NOT increase the
* current input position or the @c length field of the @c info parameter.
* @param info The instruction info.
* @return The current input byte. If the result is zero, you should always check the
* @c flags field of the @c info parameter for error flags.
* Possible error values are @c IF_ERROR_END_OF_INPUT or @c IF_ERROR_LENGTH.
*/
uint8_t inputPeek(VXInstructionInfo &info);
/**
* @brief Reads the next byte from the data source. This method increases the current
* input position and the @c length field of the @info parameter.
* This method also appends the new byte to to @c data field of the @c info
* parameter.
* @param info The instruction info.
* @return The current input byte. If the result is zero, you should always check the
* @c flags field of the @c info parameter for error flags.
* Possible error values are @c IF_ERROR_END_OF_INPUT or @c IF_ERROR_LENGTH.
*/
uint8_t inputNext(VXInstructionInfo &info);
/**
* @brief Reads the next byte(s) from the data source. This method increases the current
* input position and the @c length field of the @info parameter.
* This method also appends the new byte(s) to to @c data field of the @c info
* parameter.
* @param info The instruction info.
* @return The current input data. If the result is zero, you should always check the
* @c flags field of the @c info parameter for error flags.
* Possible error values are @c IF_ERROR_END_OF_INPUT or @c IF_ERROR_LENGTH.
*/
template <typename T>
T inputNext(VXInstructionInfo &info);
/**
* @brief Returns the current input byte. The current input byte is set everytime the
* @c inputPeek or @c inputNext method is called.
* @return The current input byte.
*/
uint8_t inputCurrent() const;
public:
/**
* @brief Override this method in your custom data source implementations.
* Signals, if the end of the data source is reached.
* @return True if end of input, false if not.
*/
virtual bool isEndOfInput() const = 0;
/**
* @brief Override this method in your custom data source implementations.
* Returns the current input position.
* @return The current input position.
*/
virtual uint64_t getPosition() const = 0;
/**
* @brief Override this method in your custom data source implementations.
* Sets a new input position.
* @param position The new input position.
* @return Returns false, if the new position exceeds the maximum input length.
*/
virtual bool setPosition(uint64_t position) = 0;
};
inline uint8_t VXBaseDataSource::inputPeek(VXInstructionInfo &info)
{
if (info.length == 15)
{
info.flags |= IF_ERROR_LENGTH;
return 0;
}
if (isEndOfInput())
{
info.flags |= IF_ERROR_END_OF_INPUT;
return 0;
}
m_currentInput = internalInputPeek();
return m_currentInput;
}
inline uint8_t VXBaseDataSource::inputNext(VXInstructionInfo &info)
{
if (info.length == 15)
{
info.flags |= IF_ERROR_LENGTH;
return 0;
}
if (isEndOfInput())
{
info.flags |= IF_ERROR_END_OF_INPUT;
return 0;
}
m_currentInput = internalInputNext();
info.data[info.length] = m_currentInput;
info.length++;
return m_currentInput;
}
template <typename T>
inline T VXBaseDataSource::inputNext(VXInstructionInfo &info)
{
static_assert(std::is_integral<T>::value, "integral type required");
T result = 0;
for (unsigned i = 0; i < (sizeof(T) / sizeof(uint8_t)); ++i)
{
T b = inputNext(info);
if (!b && (info.flags & IF_ERROR_MASK))
{
return 0;
}
result |= (b << (i * 8));
}
return result;
}
inline uint8_t VXBaseDataSource::inputCurrent() const
{
return m_currentInput;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/**
* @brief Implements a memory buffer based data source.
*/
class VXBufferDataSource : public VXBaseDataSource
{
private:
const void *m_inputBuffer;
uint64_t m_inputBufferLen;
uint64_t m_inputBufferPos;
protected:
/**
* @brief Reads the next byte from the data source. This method increases the current
* input position by one.
* @return The current input byte.
*/
uint8_t internalInputPeek() override;
/**
* @brief Reads the next byte from the data source. This method does NOT increase the
* current input position.
* @return The current input byte.
*/
uint8_t internalInputNext() override;
public:
/**
* @brief Constructor.
* @param buffer The input buffer.
* @param bufferLen The length of the input buffer.
*/
VXBufferDataSource(const void* buffer, size_t bufferLen)
: m_inputBuffer(buffer)
, m_inputBufferLen(bufferLen)
, m_inputBufferPos(0) { };
public:
/**
* @brief Signals, if the end of the data source is reached.
* @return True if end of input, false if not.
*/
bool isEndOfInput() const override;
/**
* @brief Returns the current input position.
* @return The current input position.
*/
uint64_t getPosition() const override;
/**
* @brief Sets a new input position.
* @param position The new input position.
* @return Returns false, if the new position exceeds the maximum input length.
*/
bool setPosition(uint64_t position) override;
};
inline uint8_t VXBufferDataSource::internalInputPeek()
{
return *(static_cast<const uint8_t*>(m_inputBuffer) + m_inputBufferPos);
}
inline uint8_t VXBufferDataSource::internalInputNext()
{
++m_inputBufferPos;
return *(static_cast<const uint8_t*>(m_inputBuffer) + m_inputBufferPos - 1);
}
inline bool VXBufferDataSource::isEndOfInput() const
{
return (m_inputBufferPos >= m_inputBufferLen);
}
inline uint64_t VXBufferDataSource::getPosition() const
{
return m_inputBufferPos;
}
inline bool VXBufferDataSource::setPosition(uint64_t position)
{
m_inputBufferPos = position;
return isEndOfInput();
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/**
* @brief Implements a stream based data source.
*/
class VXStreamDataSource : public VXBaseDataSource
{
private:
std::istream *m_inputStream;
protected:
/**
* @brief Reads the next byte from the data source. This method increases the current
* input position by one.
* @return The current input byte.
*/
uint8_t internalInputPeek() override;
/**
* @brief Reads the next byte from the data source. This method does NOT increase the
* current input position.
* @return The current input byte.
*/
uint8_t internalInputNext() override;
public:
/**
* @brief Constructor.
* @param stream The input stream.
*/
explicit VXStreamDataSource(std::istream *stream)
: m_inputStream(stream) { };
public:
/**
* @brief Signals, if the end of the data source is reached.
* @return True if end of input, false if not.
*/
bool isEndOfInput() const override;
/**
* @brief Returns the current input position.
* @return The current input position.
*/
uint64_t getPosition() const override;
/**
* @brief Sets a new input position.
* @param position The new input position.
* @return Returns false, if the new position exceeds the maximum input length.
*/
bool setPosition(uint64_t position) override;
};
inline uint8_t VXStreamDataSource::internalInputPeek()
{
if (!m_inputStream)
{
return 0;
}
return m_inputStream->peek();
}
inline uint8_t VXStreamDataSource::internalInputNext()
{
if (!m_inputStream)
{
return 0;
}
return m_inputStream->get();
}
inline bool VXStreamDataSource::isEndOfInput() const
{
if (!m_inputStream)
{
return true;
}
// We use good() instead of eof() to make sure the decoding will fail, if an stream internal
// error occured.
return m_inputStream->good();
}
inline uint64_t VXStreamDataSource::getPosition() const
{
if (!m_inputStream)
{
return 0;
}
return m_inputStream->tellg();
}
inline bool VXStreamDataSource::setPosition(uint64_t position)
{
if (!m_inputStream)
{
return false;
}
m_inputStream->seekg(position);
return isEndOfInput();
}
///////////////////////////////////////////////////////////////////////////////////////////////////
/**
* @brief Values that represent a disassembler mode.
*/
enum class VXDisassemblerMode
{
M16BIT = 16,
M32BIT = 32,
M64BIT = 64
M16BIT,
M32BIT,
M64BIT
};
/**
* @brief Values that represent an instruction vendor.
* @brief Values that represent an instruction-set vendor.
*/
enum class VXInstructionSetVendor
{
@ -69,54 +399,48 @@ class VXInstructionDecoder
private:
enum class RegisterClass
{
GENERAL_PURPOSE = 0,
MMX = 1,
CONTROL = 2,
DEBUG = 3,
SEGMENT = 4,
XMM = 5
GENERAL_PURPOSE,
MMX,
CONTROL,
DEBUG,
SEGMENT,
XMM
};
private:
VXBaseDataSource *m_dataSource;
VXDisassemblerMode m_disassemblerMode;
VXInstructionSetVendor m_preferredVendor;
uint64_t m_instructionPointer;
const void *m_inputBuffer;
size_t m_inputBufferLen;
size_t m_inputBufferOffset;
uint8_t m_currentInput;
private:
uint8_t m_effectiveRexW;
uint8_t m_effectiveRexR;
uint8_t m_effectiveRexX;
uint8_t m_effectiveRexB;
uint8_t m_effectiveModrmReg;
uint8_t m_effectiveModrmRm;
bool m_effectiveVexL;
private:
/**
* @brief Reads the next byte from the input data source. This method does NOT increase the
* current input offset and the @c length or @c instructionBytes field of the @c info
* parameter.
* @brief Reads the next byte from the data source. This method does NOT increase the
* current input position or the @c length field of the @c info parameter.
* @param info The instruction info.
* @return Returns the current input byte. If the result is zero, you should always check
* the @flags field of the @c info parameter for the @c IF_ERROR_MASK.
* @return The current input byte. If the result is zero, you should always check the
* @c flags field of the @c info parameter for error flags.
* Possible error values are @c IF_ERROR_END_OF_INPUT or @c IF_ERROR_LENGTH.
*/
uint8_t inputPeek(VXInstructionInfo &info);
/**
* @brief Reads the next byte from the input data source. This method increases the current
* input offset and the @c length field of the @info parameter.
* @brief Reads the next byte from the data source. This method increases the current
* input position and the @c length field of the @info parameter.
* This method also appends the new byte to to @c data field of the @c info
* parameter.
* @param info The instruction info.
* @return Returns the current input byte. If the result is zero, you should always check
* the @flags field of the @c info parameter for the @c IF_ERROR_MASK.
* @return The current input byte. If the result is zero, you should always check the
* @c flags field of the @c info parameter for error flags.
* Possible error values are @c IF_ERROR_END_OF_INPUT or @c IF_ERROR_LENGTH.
*/
uint8_t inputNext(VXInstructionInfo &info);
/**
* @brief Reads the next byte(s) from the data source. This method increases the current
* input offset and the @c length field of the @info parameter.
* @tparam T Generic integral type parameter.
* input position and the @c length field of the @info parameter.
* This method also appends the new byte(s) to to @c data field of the @c info
* parameter.
* @param info The instruction info.
* @return Returns the current input byte(s). If the result is zero, you should always check
* the @flags field of the @c info parameter for the @c IF_ERROR_MASK.
* @return The current input data. If the result is zero, you should always check the
* @c flags field of the @c info parameter for error flags.
* Possible error values are @c IF_ERROR_END_OF_INPUT or @c IF_ERROR_LENGTH.
*/
template <typename T>
T inputNext(VXInstructionInfo &info);
@ -188,15 +512,6 @@ private:
* @return True if it succeeds, false if it fails.
*/
bool decodeVex(VXInstructionInfo &info);
private:
/**
* @brief Resolves the effective operand and address mode of the instruction.
* This method requires a non-null value in the @c instrDefinition field of the
* @c info struct.
* @param info The @c VXInstructionInfo struct that receives the effective operand and
* address mode.
*/
void resolveOperandAndAddressMode(VXInstructionInfo &info) const;
private:
/**
* @brief Returns the effective operand size.
@ -222,6 +537,23 @@ private:
*/
bool decodeOperand(VXInstructionInfo &info, VXOperandInfo &operand,
VXDefinedOperandType operandType, VXDefinedOperandSize operandSize);
private:
/**
* @brief Resolves the effective operand and address mode of the instruction.
* This method requires a non-null value in the @c instrDefinition field of the
* @c info struct.
* @param info The @c VXInstructionInfo struct that receives the effective operand and
* address mode.
*/
void resolveOperandAndAddressMode(VXInstructionInfo &info) const;
/**
* @brief Calculates the effective REX/VEX.w, r, x, b, l values.
* This method requires a non-null value in the @c instrDefinition field of the
* @c info struct.
* @param info The @c VXInstructionInfo struct that receives the effective operand and
* address mode.
*/
void calculateEffectiveRexVexValues(VXInstructionInfo &info) const;
private:
/**
* @brief Collects and decodes optional instruction prefixes.
@ -235,24 +567,20 @@ private:
* @return True if it succeeds, false if it fails.
*/
bool decodeOpcode(VXInstructionInfo &info);
/**
* @brief Decodes an instruction node.
* @param info The @c VXInstructionInfo struct that receives the decoded data.
* @param node The instruction node.
* @return True if it succeeds, false if it fails.
*/
bool decodeInstructionNode(VXInstructionInfo &info, VXOpcodeTreeNode node);
public:
/**
* @brief Constructor.
* @param buffer The input buffer.
* @param bufferLen The length of the input buffer.
* @param disassemblerMode The disassembler mode.
* @param preferredVendor The preferred instruction-set vendor.
* @brief Default constructor.
*/
VXInstructionDecoder(const void *buffer, size_t bufferLen,
VXInstructionDecoder();
/**
* @brief Constructor.
* @param input A reference to the input data source.
* @param instructionPointer The initial instruction pointer.
*/
explicit VXInstructionDecoder(VXBaseDataSource *input,
VXDisassemblerMode disassemblerMode = VXDisassemblerMode::M32BIT,
VXInstructionSetVendor preferredVendor = VXInstructionSetVendor::ANY);
VXInstructionSetVendor preferredVendor = VXInstructionSetVendor::ANY,
uint64_t instructionPointer = 0);
public:
/**
* @brief Decodes the next instruction from the input data source.
@ -262,43 +590,35 @@ public:
* length.
* In all other cases (valid and invalid instructions) the return value is true.
*/
bool decodeNextInstruction(VXInstructionInfo &info);
/**
* @brief Decodes a single instruction.
* @param info The @c VXInstructionInfo struct that receives the information
* about the decoded instruction.
* @param buffer The input buffer.
* @param bufferLen The length of the input buffer.
* @param disassemblerMode The disassembler mode.
* @param preferredVendor The preferred instruction-set vendor.
* @return This method returns false, if the current position has exceeded the maximum input
* length.
* In all other cases (valid and invalid instructions) the return value is true.
*/
static bool decodeInstruction(VXInstructionInfo &info, const void *buffer, size_t bufferLen,
VXDisassemblerMode disassemblerMode = VXDisassemblerMode::M32BIT,
VXInstructionSetVendor preferredVendor = VXInstructionSetVendor::ANY);
bool decodeInstruction(VXInstructionInfo &info);
public:
/**
* @brief Returns the current input position.
* @return The current input position.
* @brief Returns a pointer to the current data source.
* @return A pointer to the current data source.
*/
uintptr_t getPosition() const;
VXBaseDataSource* getDataSource() const;
/**
* @brief Changes the input position.
* @param position The new input position.
* @return True if it succeeds, false if the new position exceeds the maximum input length.
* @brief Sets a new data source.
* @param input A reference to the new input data source.
*/
bool setPosition(uintptr_t position);
void setDataSource(VXBaseDataSource *input);
/**
* @brief Returns the current instruction pointer. The instruction pointer is used to
* properly format relative instructions.
* @brief Returns the current disassembler mode.
* @return The current disassembler mode.
*/
VXDisassemblerMode getDisassemblerMode() const;
/**
* @brief Sets the current disassembler mode.
* @param disassemblerMode The new disassembler mode.
*/
void setDisassemblerMode(VXDisassemblerMode disassemblerMode);
/**
* @brief Returns the current instruction pointer.
* @return The current instruction pointer.
*/
uint64_t getInstructionPointer() const;
/**
* @brief Sets the current instruction pointer. The instruction pointer is used to
* properly format relative instructions.
* @brief Sets a new instruction pointer.
* @param instructionPointer The new instruction pointer.
*/
void setInstructionPointer(uint64_t instructionPointer);
@ -306,92 +626,75 @@ public:
inline uint8_t VXInstructionDecoder::inputPeek(VXInstructionInfo &info)
{
if (info.length == 15)
{
info.flags |= IF_ERROR_LENGTH;
return 0;
}
if (m_inputBufferOffset == m_inputBufferLen)
if (!m_dataSource)
{
info.flags |= IF_ERROR_END_OF_INPUT;
return 0;
}
m_currentInput = *(static_cast<const uint8_t*>(m_inputBuffer) + m_inputBufferOffset);
return m_currentInput;
return m_dataSource->inputPeek(info);
}
inline uint8_t VXInstructionDecoder::inputNext(VXInstructionInfo &info)
{
if (info.length == 15)
{
info.flags |= IF_ERROR_LENGTH;
return 0;
}
if (m_inputBufferOffset == m_inputBufferLen)
if (!m_dataSource)
{
info.flags |= IF_ERROR_END_OF_INPUT;
return 0;
}
m_currentInput = *(static_cast<const uint8_t*>(m_inputBuffer) + m_inputBufferOffset);
m_inputBufferOffset++;
info.instructionBytes[info.length] = m_currentInput;
info.length++;
return m_currentInput;
return m_dataSource->inputNext(info);
}
template <typename T>
inline T VXInstructionDecoder::inputNext(VXInstructionInfo &info)
{
static_assert(std::is_integral<T>::value, "integral type required");
T result = 0;
for (unsigned i = 0; i < (sizeof(T) / sizeof(uint8_t)); i++)
if (!m_dataSource)
{
T b = inputNext(info);
if (!b && (info.flags & IF_ERROR_MASK))
{
return 0;
}
result |= (b << (i * 8));
info.flags |= IF_ERROR_END_OF_INPUT;
return 0;
}
return result;
return m_dataSource->inputNext<T>(info);
}
inline uint8_t VXInstructionDecoder::inputCurrent() const
{
return m_currentInput;
}
inline uintptr_t VXInstructionDecoder::getPosition() const
{
return m_inputBufferOffset;
}
inline bool VXInstructionDecoder::setPosition(uintptr_t position)
{
if (position < m_inputBufferLen)
if (!m_dataSource)
{
m_inputBufferOffset = position;
return true;
}
return false;
return 0;
}
return m_dataSource->inputCurrent();
}
inline VXBaseDataSource* VXInstructionDecoder::getDataSource() const
{
return m_dataSource;
}
inline void VXInstructionDecoder::setDataSource(VXBaseDataSource *input)
{
m_dataSource = input;
}
inline VXDisassemblerMode VXInstructionDecoder::getDisassemblerMode() const
{
return m_disassemblerMode;
}
inline void VXInstructionDecoder::setDisassemblerMode(VXDisassemblerMode disassemblerMode)
{
m_disassemblerMode = disassemblerMode;
}
inline uint64_t VXInstructionDecoder::getInstructionPointer() const
{
return m_instructionPointer;
return m_instructionPointer;
}
inline void VXInstructionDecoder::setInstructionPointer(uint64_t instructionPointer)
{
m_instructionPointer = instructionPointer;
m_instructionPointer = instructionPointer;
}
inline bool VXInstructionDecoder::decodeInstruction(VXInstructionInfo &info, const void *buffer,
size_t bufferLen, VXDisassemblerMode disassemblerMode, VXInstructionSetVendor preferredVendor)
{
return VXInstructionDecoder(
buffer, bufferLen, disassemblerMode, preferredVendor).decodeNextInstruction(info);
}
///////////////////////////////////////////////////////////////////////////////////////////////////
}

View File

@ -195,12 +195,12 @@ uint64_t VXBaseInstructionFormatter::calcAbsoluteTarget(const VXInstructionInfo
switch (operand.size)
{
case 8:
return (info.instructionPointer + info.length + operand.lval.sbyte);
return (info.instrPointer + operand.lval.sbyte);
case 16:
return (info.instructionPointer + info.length + operand.lval.sword);
return (info.instrPointer + operand.lval.sword);
case 32:
case 64:
return (info.instructionPointer + info.length + operand.lval.sdword);
return (info.instrPointer + operand.lval.sdword);
default:
assert(0);
}
@ -253,7 +253,7 @@ void VXIntelInstructionFormatter::formatOperand(const VXInstructionInfo &info,
// TODO: resolve symbols for displacement only and RIP based memory operands
if (info.flags & IF_PREFIX_SEGMENT)
{
outputAppendFormatted("%s:", registerToString(info.segmentRegister));
outputAppendFormatted("%s:", registerToString(info.segment));
}
outputAppend("[");
if (operand.base == VXRegister::RIP)
@ -395,10 +395,10 @@ void VXIntelInstructionFormatter::internalFormatInstruction(const VXInstructionI
{
outputAppend("lock ");
}
if (info.flags & IF_PREFIX_REPZ)
if (info.flags & IF_PREFIX_REP)
{
outputAppend("rep ");
} else if (info.flags & IF_PREFIX_REPNZ)
} else if (info.flags & IF_PREFIX_REPNE)
{
outputAppend("repne ");
}

View File

@ -102,7 +102,6 @@ protected:
* @param info The instruction info.
*/
virtual void internalFormatInstruction(const VXInstructionInfo &info);
public:
/**
* @brief Default constructor.
*/
@ -113,6 +112,7 @@ public:
* resolver should be used.
*/
explicit VXBaseInstructionFormatter(VXBaseSymbolResolver *symbolResolver);
public:
/**
* @brief Destructor.
*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff