1//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===----------------------------------------------------------------------===//
9// This file is part of the X86 Disassembler.
10// It contains code to translate the data produced by the decoder into
14// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
15// 64-bit X86 instruction sets. The main decode sequence for an assembly
16// instruction in this disassembler is:
18// 1. Read the prefix bytes and determine the attributes of the instruction.
19// These attributes, recorded in enum attributeBits
20// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
21// provides a mapping from bitmasks to contexts, which are represented by
22// enum InstructionContext (ibid.).
24// 2. Read the opcode, and determine what kind of opcode it is. The
25// disassembler distinguishes four kinds of opcodes, which are enumerated in
26// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
27// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
28// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
30// 3. Depending on the opcode type, look in one of four ClassDecision structures
31// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
32// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
33// a ModRMDecision (ibid.).
35// 4. Some instructions, such as escape opcodes or extended opcodes, or even
36// instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
37// ModR/M byte to complete decode. The ModRMDecision's type is an entry from
38// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
39// ModR/M byte is required and how to interpret it.
41// 5. After resolving the ModRMDecision, the disassembler has a unique ID
42// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
43// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
44// meanings of its operands.
46// 6. For each operand, its encoding is an entry from OperandEncoding
47// (X86DisassemblerDecoderCommon.h) and its type is an entry from
48// OperandType (ibid.). The encoding indicates how to read it from the
49// instruction; the type indicates how to interpret the value once it has
50// been read. For example, a register operand could be stored in the R/M
51// field of the ModR/M byte, the REG field of the ModR/M byte, or added to
52// the main opcode. This is orthogonal from its meaning (an GPR or an XMM
53// register, for instance). Given this information, the operands can be
54// extracted and interpreted.
56// 7. As the last step, the disassembler translates the instruction information
57// and operands into a format understandable by the client - in this case, an
58// MCInst for use by the MC infrastructure.
60// The disassembler is broken broadly into two parts: the table emitter that
61// emits the instruction decode tables discussed above during compilation, and
62// the disassembler itself. The table emitter is documented in more detail in
63// utils/TableGen/X86DisassemblerEmitter.h.
65// X86Disassembler.cpp contains the code responsible for step 7, and for
66// invoking the decoder to execute steps 1-6.
67// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
68// table emitter and the disassembler.
69// X86DisassemblerDecoder.h contains the public interface of the decoder,
70// factored out into C for possible use by other projects.
71// X86DisassemblerDecoder.c contains the source code of the decoder, which is
72// responsible for steps 1-6.
74//===----------------------------------------------------------------------===//
95 #define DEBUG_TYPE "x86-disassembler"
97 #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
99// Specifies whether a ModR/M byte is needed and (if so) which
100// instruction each possible value of the ModR/M byte corresponds to. Once
101// this information is known, we have narrowed down to a single instruction.
107// Specifies which set of ModR/M->instruction tables to look at
108// given a particular opcode.
113// Specifies which opcode->instruction tables to look at given
114// a particular context (set of attributes). Since there are many possible
115// contexts, the decoder first uses CONTEXTS_SYM to determine which context
116// applies given a specific set of attributes. Hence there are only IC_max
117// entries in this table, rather than 2^(ATTR_max).
122#include "X86GenDisassemblerTables.inc"
130 dec = &
ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
133 dec = &
TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
136 dec = &
THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
139 dec = &
THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
142 dec = &
XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
145 dec = &
XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
148 dec = &
XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
155 dec = &
MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
158 dec = &
MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
161 dec = &
MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
164 dec = &
MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
180 return modRMTable[dec->
instructionIDs + ((modRM & 0x38) >> 3) + 8];
182 case MODRM_SPLITMISC:
195 byte = insn->
bytes[offset];
200 auto r = insn->
bytes;
202 if (offset +
sizeof(
T) > r.size())
210 return insn->
mode ==
MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
217// Consumes all of an instruction's prefix bytes, and marks the
218// instruction as having them. Also sets the instruction's default operand,
219// address, and other relevant data sizes to report operands correctly.
221// insn must not be empty.
230 // If we fail reading prefixes, just stop here and let the opcode reader
235 // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
236 // break and let it be disassembled as a normal "instruction".
240 if ((
byte == 0xf2 ||
byte == 0xf3) && !
peek(insn,
nextByte)) {
241 // If the byte is 0xf2 or 0xf3, and any of the following conditions are
243 // - it is followed by a LOCK (0xf0) prefix
244 // - it is followed by an xchg instruction
245 // then it should be disassembled as a xacquire/xrelease not repne/rep.
249 if (!(
byte == 0xf3 &&
nextByte == 0x90))
// PAUSE instruction support
252 // Also if the byte is 0xf3, and the following condition is met:
253 // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
254 // "mov mem, imm" (opcode 0xc6/0xc7) instructions.
255 // then it should be disassembled as an xrelease not rep.
263 // Go to REX prefix after the current one
266 // We should be able to read next byte after REX prefix
267 if (
peek(insn, nnextByte))
277 case 0xf2:
// REPNE/REPNZ
278 case 0xf3: {
// REP or REPE/REPZ
283 // 1. There could be several 0x66
284 // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
285 // it's not mandatory prefix
286 // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
287 // 0x0f exactly after it to be mandatory prefix
288 // 4. if (nextByte == 0xd5) it's REX2 and we need
289 // 0x0f exactly after it to be mandatory prefix
292 // The last of 0xf2 /0xf3 is mandatory prefix
297 case 0x2e:
// CS segment override -OR- Branch not taken
300 case 0x36:
// SS segment override -OR- Branch taken
303 case 0x3e:
// DS segment override
306 case 0x26:
// ES segment override
309 case 0x64:
// FS segment override
312 case 0x65:
// GS segment override
315 case 0x66: {
// Operand-size override {
320 // 0x66 can't overwrite existing mandatory prefix and should be ignored
325 case 0x67:
// Address-size override
328 default:
// Not a prefix byte
333 if (
isREX(insn,
byte)) {
354 if (
peek(insn, byte2)) {
379 // We simulate the REX prefix for simplicity's sake
386 // We simulate the REX2 prefix for simplicity's sake
395 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
399 }
else if (
byte == 0xc4) {
401 if (
peek(insn, byte1)) {
416 // We simulate the REX prefix for simplicity's sake
430 }
else if (
byte == 0xc5) {
432 if (
peek(insn, byte1)) {
462 }
else if (
byte == 0x8f) {
464 if (
peek(insn, byte1)) {
469 if ((byte1 & 0x38) != 0x0)
// 0 in these 3 bits is a POP instruction.
479 // We simulate the REX prefix for simplicity's sake
501 }
else if (
isREX2(insn,
byte)) {
503 if (
peek(insn, byte1)) {
510 // We simulate the REX prefix for simplicity's sake
548// Consumes the SIB byte to determine addressing information.
560 sibBaseBase = SIB_BASE_EAX;
564 sibBaseBase = SIB_BASE_RAX;
643// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
659 // This goes by insn->registerSize to pick the correct register, which messes
660 // up if we're using (say) XMM or 8-bit register operands. That gets fixed in
689 EABase eaBaseBase = EA_BASE_BX_SI;
731 // In determining whether RIP-relative mode is used (rm=5),
732 // or whether a SIB byte is present (rm=4),
733 // the extension bits (REX.b and EVEX.x) are ignored.
735 case 0x4:
// SIB byte is present
740 case 0x5:
// RIP-relative
757 case 0x4:
// SIB byte is present
758 insn->
eaBase = EA_BASE_sib;
776 }
// switch (insn->addressSize)
781 #define GENERIC_FIXUP_FUNC(name, base, prefix) \
782 static uint16_t name(struct InternalInstruction *insn, OperandType type, \
783 uint8_t index, uint8_t *valid) { \
787 debug("Unhandled register type"); \
791 return base + index; \
793 if (insn->rexPrefix && index >= 4 && index <= 7) \
794 return prefix##_SPL + (index - 4); \
796 return prefix##_AL + index; \
798 return prefix##_AX + index; \
800 return prefix##_EAX + index; \
802 return prefix##_RAX + index; \
804 return prefix##_ZMM0 + index; \
806 return prefix##_YMM0 + index; \
808 return prefix##_XMM0 + index; \
812 return prefix##_TMM0 + index; \
817 return prefix##_K0 + index; \
821 return prefix##_K0_K1 + (index / 2); \
823 return prefix##_MM0 + (index & 0x7); \
824 case TYPE_SEGMENTREG: \
825 if ((index & 7) > 5) \
827 return prefix##_ES + (index & 7); \
828 case TYPE_DEBUGREG: \
831 return prefix##_DR0 + index; \
832 case TYPE_CONTROLREG: \
835 return prefix##_CR0 + index; \
837 return prefix##_XMM0 + index; \
839 return prefix##_YMM0 + index; \
841 return prefix##_ZMM0 + index; \
845// Consult an operand type to determine the meaning of the reg or R/M field. If
846// the operand is an XMM operand, for example, an operand would be XMM0 instead
847// of AX, which readModRM() would otherwise misinterpret it as.
849// @param insn - The instruction containing the operand.
850// @param type - The operand type.
851// @param index - The existing value of the field as reported by readModRM().
852// @param valid - The address of a uint8_t. The target is set to 1 if the
853// field is valid for the register class; 0 if not.
854// @return - The proper value.
858// Consult an operand specifier to determine which of the fixup*Value functions
859// to use in correcting readModRM()'ss interpretation.
861// @param insn - See fixup*Value().
862// @param op - The operand specifier.
863// @return - 0 if fixup was successful; -1 if the register returned was
864// invalid for its class.
872 debug(
"Expected a REG or R/M encoding in fixupReg");
882 insn->reg - insn->regBase, &valid);
889 // EVEX_X can extend the register id to 32 for a non-GPR register that is
891 // mode : MODE_64_BIT
892 // Only 8 vector registers are available in 32 bit mode
894 // RM encodes a register
911 if (insn->eaBase >= insn->eaRegBase) {
912 insn->eaBase = (
EABase)fixupRMValue(
913 insn, (
OperandType)
op->type, insn->eaBase - insn->eaRegBase, &valid);
923// Read the opcode (except the ModR/M byte in the case of extended or escape
934 dbgs() <<
format(
"Unhandled mmm field for instruction (0x%hhx)",
963 dbgs() <<
format(
"Unhandled m-mmmm field for instruction (0x%hhx)",
992 dbgs() <<
format(
"Unhandled m-mmmm field for instruction (0x%hhx)",
1006 // m bit indicates opcode map 1
1014 if (current == 0x0f) {
1016 dbgs() <<
format(
"Found a two-byte escape prefix (0x%hhx)", current));
1020 if (current == 0x38) {
1027 }
else if (current == 0x3a) {
1034 }
else if (current == 0x0f) {
1036 dbgs() <<
format(
"Found a 3dnow escape prefix (0x%hhx)", current));
1038 // Consume operands before the opcode to comply with the 3DNow encoding
1051 // The opcode with mandatory prefix must start with opcode escape.
1052 // If not it's legacy repeat prefix
1055 // At this point we have consumed the full opcode.
1056 // Anything we consume from here on must be unconsumed.
1062// Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
1064 for (
int i = 0;; i++) {
1065 if (orig[i] ==
'0円' && equiv[i] ==
'0円')
1067 if (orig[i] ==
'0円' || equiv[i] ==
'0円')
1069 if (orig[i] != equiv[i]) {
1070 if ((orig[i] ==
'Q' || orig[i] ==
'L') && equiv[i] ==
'W')
1072 if ((orig[i] ==
'6' || orig[i] ==
'3') && equiv[i] ==
'1')
1074 if ((orig[i] ==
'4' || orig[i] ==
'2') && equiv[i] ==
'6')
1081// Determine whether this instruction is a 64-bit instruction.
1083 for (
int i = 0;; ++i) {
1084 if (
name[i] ==
'0円')
1086 if (
name[i] ==
'6' &&
name[i + 1] ==
'4')
1091// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1092// for extended and escape opcodes, and using a supplied attribute mask.
1156 switch (insn->
opcode & 0xfe) {
1175 // Below NF instructions are not in map4.
1180 case 0xf3:
// BLSI, BLSR, BLSMSK
1191// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1192// for extended and escape opcodes. Determines the attributes and context for
1193// the instruction before doing so.
1227 // aaa is not used a opmask in MAP4
1292 // If we don't have mandatory prefix we should use legacy prefixes here
1299 // Special support for PAUSE
1332 // Absolute jump and pushp/popp need special handling
1334 (insn->
opcode == 0xA1 || (insn->
opcode & 0xf0) == 0x50))
1338 // JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1339 // of the AdSize prefix is inverted w.r.t. 32-bit mode.
1342 // If we're in 16-bit mode and this is one of the relative jumps and opsize
1343 // prefix isn't present, we need to force the opsize attribute since the
1344 // prefix is inverted relative to 32-bit mode.
1358 // The following clauses compensate for limitations of the tables.
1362 // The tables can't distinquish between cases where the W-bit is used to
1363 // select register size and cases where its a required part of the opcode.
1379 auto SpecName = mii->
getName(instructionIDWithREXW);
1380 // If not a 64-bit instruction. Switch the opcode.
1381 if (!
is64Bit(SpecName.data())) {
1389 // Absolute moves, umonitor, and movdir64b need special handling.
1390 // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1392 // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1398 // Make sure we observed the prefixes in any position.
1404 // In 16-bit, invert the attributes.
1408 // The OpSize attribute is only valid with the absolute moves.
1423 // The instruction tables make no distinction between instructions that
1424 // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1425 // particular spot (i.e., many MMX operations). In general we're
1426 // conservative, but in the specific case where OpSize is present but not in
1427 // the right place we check if there's a 16-bit operation.
1436 // ModRM required with OpSize but not present. Give up and return the
1437 // version without OpSize set.
1443 specName = mii->
getName(instructionID);
1444 specWithOpSizeName = mii->
getName(instructionIDWithOpsize);
1459 // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
1460 // as XCHG %r8, %eax.
1462 uint16_t instructionIDWithNewOpcode;
1467 // Borrow opcode from one of the other XCHGar opcodes
1485 insn->
spec = specWithNewOpcode;
1496// Read an operand from the opcode field of an instruction and interprets it
1497// appropriately given the operand width. Handles AddRegFrm instructions.
1499// @param insn - the instruction whose opcode field is to be read.
1500// @param size - The width (in bytes) of the register being specified.
1501// 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1503// @return - 0 on success; nonzero otherwise.
1510 auto setOpcodeRegister = [&](
unsigned base) {
1519 setOpcodeRegister(MODRM_REG_AL);
1528 setOpcodeRegister(MODRM_REG_AX);
1531 setOpcodeRegister(MODRM_REG_EAX);
1534 setOpcodeRegister(MODRM_REG_RAX);
1541// Consume an immediate operand from an instruction, given the desired operand
1544// @param insn - The instruction whose operand is to be read.
1545// @param size - The width (in bytes) of the operand.
1546// @return - 0 if the immediate was successfully consumed; nonzero
1591// Consume vvvv from an instruction if it has a VEX prefix.
1609 vvvv &= 0xf;
// Can only clear bit 4. Bit 3 must be cleared later.
1611 insn->
vvvv =
static_cast<Reg >(vvvv);
1615// Read an mask register from the opcode field of an instruction.
1617// @param insn - The instruction whose opcode field is to be read.
1618// @return - 0 on success; nonzero otherwise.
1630// Consults the specifier for an instruction and consumes all
1631// operands for that instruction, interpreting them as it goes.
1633 int hasVVVV, needVVVV;
1638 // If non-zero vvvv specified, make sure one of the operands uses it.
1640 needVVVV = hasVVVV && (insn->
vvvv != 0);
1643 switch (
Op.encoding) {
1649 // VSIB can use the V2 bit so check only the other bits.
1651 needVVVV = hasVVVV & ((insn->
vvvv & 0xf) != 0);
1655 // Reject if SIB wasn't used.
1656 if (insn->
eaBase != EA_BASE_sib && insn->
eaBase != EA_BASE_sib64)
1659 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1663 // If EVEX.v2 is set this is one of the 16-31 registers.
1668 // Adjust the index register to the correct size.
1671 debug(
"Unhandled VSIB index type");
1687 // Apply the AVX512 compressed displacement scaling factor.
1692 // Reject if SIB wasn't used.
1693 if (insn->
eaBase != EA_BASE_sib && insn->
eaBase != EA_BASE_sib64)
1706 // Apply the AVX512 compressed displacement scaling factor.
1712 // Saw a register immediate so don't read again and instead split the
1713 // previous immediate. FIXME: This is a hack.
1721 if (
Op.type == TYPE_XMM ||
Op.type == TYPE_YMM)
1770 needVVVV =
false;
// oszc shares the same bits with VVVV
1781 needVVVV = 0;
// Mark that we have found a VVVV operand.
1789 case ENCODING_WRITEMASK:
1796 LLVM_DEBUG(
dbgs() <<
"Encountered an operand with an unknown encoding.");
1801 // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
1810// Fill-ins to make the compiler happy. These constants are never actually
1811// assigned; they are just filler to make an automatically-generated switch
1832/// Generic disassembler for all X86 platforms. All each platform class should
1833/// have to do is subclass the constructor, and provide a different
1834/// disassemblerMode value.
1836 std::unique_ptr<const MCInstrInfo> MII;
1838 X86GenericDisassembler(
const MCSubtargetInfo &STI, MCContext &Ctx,
1839 std::unique_ptr<const MCInstrInfo> MII);
1842 ArrayRef<uint8_t> Bytes, uint64_t
Address,
1843 raw_ostream &cStream)
const override;
1851X86GenericDisassembler::X86GenericDisassembler(
1854 std::unique_ptr<const MCInstrInfo> MII)
1857 if (FB[X86::Is16Bit]) {
1860 }
else if (FB[X86::Is32Bit]) {
1863 }
else if (FB[X86::Is64Bit]) {
1874 CommentStream = &CStream;
1876 InternalInstruction Insn;
1877 memset(&Insn, 0,
sizeof(InternalInstruction));
1907 // It should not be 'pause' f3 90
1913 Instr.setFlags(Flags);
1919// Private code that translates from struct InternalInstructions to MCInsts.
1922/// translateRegister - Translates an internal register to the appropriate LLVM
1923/// register, and appends it as an operand to an MCInst.
1925/// @param mcInst - The MCInst to append to.
1926/// @param reg - The Reg to append.
1928#define ENTRY(x) X86::x,
1932 MCPhysReg llvmRegnum = llvmRegnums[reg];
1937 0,
// SEG_OVERRIDE_NONE
1946/// translateSrcIndex - Appends a source index operand to an MCInst.
1948/// @param mcInst - The MCInst to append to.
1949/// @param insn - The internal instruction.
1954 baseRegNo = insn.
hasAdSize ? X86::ESI : X86::RSI;
1956 baseRegNo = insn.
hasAdSize ? X86::SI : X86::ESI;
1959 baseRegNo = insn.
hasAdSize ? X86::ESI : X86::SI;
1970/// translateDstIndex - Appends a destination index operand to an MCInst.
1972/// @param mcInst - The MCInst to append to.
1973/// @param insn - The internal instruction.
1979 baseRegNo = insn.
hasAdSize ? X86::EDI : X86::RDI;
1981 baseRegNo = insn.
hasAdSize ? X86::DI : X86::EDI;
1984 baseRegNo = insn.
hasAdSize ? X86::EDI : X86::DI;
1991/// translateImmediate - Appends an immediate operand to an MCInst.
1993/// @param mcInst - The MCInst to append to.
1994/// @param immediate - The immediate value to append.
1995/// @param operand - The operand, as stored in the descriptor table.
1996/// @param insn - The internal instruction.
2001 // Sign-extend the immediate if necessary.
2007 if (type == TYPE_REL) {
2018 if(immediate & 0x80)
2019 immediate |= ~(0xffull);
2022 if(immediate & 0x8000)
2023 immediate |= ~(0xffffull);
2026 if(immediate & 0x80000000)
2027 immediate |= ~(0xffffffffull);
2034 if(immediate & 0x80)
2035 immediate |= ~(0xffull);
2038 if(immediate & 0x8000)
2039 immediate |= ~(0xffffull);
2042 if(immediate & 0x80000000)
2043 immediate |= ~(0xffffffffull);
2047 // By default sign-extend all X86 immediates based on their encoding.
2048 else if (type == TYPE_IMM) {
2053 if(immediate & 0x80)
2054 immediate |= ~(0xffull);
2057 if(immediate & 0x8000)
2058 immediate |= ~(0xffffull);
2061 if(immediate & 0x80000000)
2062 immediate |= ~(0xffffffffull);
2080 // operand is 64 bits wide. Do nothing.
2089 if (type == TYPE_MOFFS) {
2096/// translateRMRegister - Translates a register stored in the R/M field of the
2097/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
2098/// @param mcInst - The MCInst to append to.
2099/// @param insn - The internal instruction to extract the R/M field
2101/// @return - 0 on success; -1 otherwise
2104 if (insn.
eaBase == EA_BASE_sib || insn.
eaBase == EA_BASE_sib64) {
2105 debug(
"A R/M register operand may not have a SIB byte");
2111 debug(
"Unexpected EA base register");
2114 debug(
"EA_BASE_NONE for ModR/M base");
2116#define ENTRY(x) case EA_BASE_##x:
2119 debug(
"A R/M register operand may not have a base; "
2120 "the operand must be a register.");
2124 mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2132/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
2133/// fields of an internal instruction (and possibly its SIB byte) to a memory
2134/// operand in LLVM's format, and appends it to an MCInst.
2136/// @param mcInst - The MCInst to append to.
2137/// @param insn - The instruction to extract Mod, R/M, and SIB fields
2139/// @param ForceSIB - The instruction must use SIB.
2140/// @return - 0 on success; nonzero otherwise
2143 bool ForceSIB =
false) {
2144 // Addresses in an MCInst are represented as five operands:
2145 // 1. basereg (register) The R/M base, or (if there is a SIB) the
2147 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
2149 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
2150 // the index (which is multiplied by the
2152 // 4. displacement (immediate) 0, or the displacement if there is one
2153 // 5. segmentreg (register) x86_registerNONE for now, but could be set
2154 // if we have segment overrides
2163 if (insn.
eaBase == EA_BASE_sib || insn.
eaBase == EA_BASE_sib64) {
2167 debug(
"Unexpected sibBase");
2170 case SIB_BASE_##x: \
2171 baseReg = MCOperand::createReg(X86::x); break;
2182 debug(
"Unexpected sibIndex");
2185 case SIB_INDEX_##x: \
2186 indexReg = MCOperand::createReg(X86::x); break;
2195 // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
2196 // but no index is used and modrm alone should have been enough.
2197 // -No base register in 32-bit mode. In 64-bit mode this is used to
2198 // avoid rip-relative addressing.
2199 // -Any base register used other than ESP/RSP/R12D/R12. Using these as a
2200 // base always requires a SIB byte.
2201 // -A scale other than 1 is used.
2207 insn.
sibBase != SIB_BASE_R12D && insn.
sibBase != SIB_BASE_R12))) {
2219 debug(
"EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2256 debug(
"Unexpected eaBase");
2258 // Here, we will use the fill-ins defined above. However,
2259 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
2260 // sib and sib64 were handled in the top-level if, so they're only
2261 // placeholders to keep the compiler happy.
2264 baseReg = MCOperand::createReg(X86::x); break;
2267#define ENTRY(x) case EA_REG_##x:
2270 debug(
"A R/M memory operand may not be a register; "
2271 "the base field must be a base.");
2298/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
2299/// byte of an instruction to LLVM form, and appends it to an MCInst.
2301/// @param mcInst - The MCInst to append to.
2302/// @param operand - The operand, as stored in the descriptor table.
2303/// @param insn - The instruction to extract Mod, R/M, and SIB fields
2305/// @return - 0 on success; nonzero otherwise
2308 switch (operand.
type) {
2310 debug(
"Unexpected type for a R/M operand");
2325 case TYPE_CONTROLREG:
2338/// translateFPRegister - Translates a stack position on the FPU stack to its
2339/// LLVM form, and appends it to an MCInst.
2341/// @param mcInst - The MCInst to append to.
2342/// @param stackPos - The stack position to translate.
2348/// translateMaskRegister - Translates a 3-bit mask register number to
2349/// LLVM form, and appends it to an MCInst.
2351/// @param mcInst - The MCInst to append to.
2352/// @param maskRegNum - Number of mask register from 0 to 7.
2353/// @return - false on success; true otherwise.
2356 if (maskRegNum >= 8) {
2357 debug(
"Invalid mask register number");
2365/// translateOperand - Translates an operand stored in an internal instruction
2366/// to LLVM's format and appends it to an MCInst.
2368/// @param mcInst - The MCInst to append to.
2369/// @param operand - The operand, as stored in the descriptor table.
2370/// @param insn - The internal instruction.
2371/// @return - false on success; true otherwise.
2377 debug(
"Unhandled operand encoding during translation");
2382 case ENCODING_WRITEMASK:
2435/// translateInstruction - Translates an internal instruction and all its
2436/// operands to an MCInst.
2438/// @param mcInst - The MCInst to populate with the instruction's data.
2439/// @param insn - The internal instruction.
2440/// @return - false on success; true otherwise.
2445 debug(
"Instruction has no specification");
2451 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
2452 // prefix bytes should be disassembled as xrelease and xacquire then set the
2453 // opcode to those instead of the rep and repne opcodes.
2455 if(mcInst.
getOpcode() == X86::REP_PREFIX)
2457 else if(mcInst.
getOpcode() == X86::REPNE_PREFIX)
2464 if (
Op.encoding != ENCODING_NONE) {
2477 std::unique_ptr<const MCInstrInfo> MII(
T.createMCInstrInfo());
2478 return new X86GenericDisassembler(STI, Ctx, std::move(MII));
2482 // Register the disassembler.
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isBranch(unsigned Opcode)
#define LLVM_C_ABI
LLVM_C_ABI is the export/visibility macro used to mark symbols declared in llvm-c as exported when bu...
static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx)
static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)
static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII)
Check if the instruction is a prefix.
#define CASE_ENCODING_VSIB
#define THREEDNOW_MAP_SYM
#define rFromEVEX2of4(evex)
#define lFromEVEX4of4(evex)
#define l2FromEVEX4of4(evex)
#define rFromVEX2of3(vex)
#define zFromEVEX4of4(evex)
#define bFromXOP2of3(xop)
#define xFromVEX2of3(vex)
#define mmmmmFromVEX2of3(vex)
#define rmFromModRM(modRM)
#define bFromEVEX4of4(evex)
#define rFromVEX2of2(vex)
#define ppFromEVEX3of4(evex)
#define v2FromEVEX4of4(evex)
#define modFromModRM(modRM)
#define rFromXOP2of3(xop)
#define lFromXOP3of3(xop)
#define lFromVEX2of2(vex)
#define scFromEVEX4of4(evex)
#define scaleFromSIB(sib)
#define regFromModRM(modRM)
#define b2FromEVEX2of4(evex)
#define vvvvFromVEX2of2(vex)
#define nfFromEVEX4of4(evex)
#define ppFromXOP3of3(xop)
#define vvvvFromVEX3of3(vex)
#define r2FromEVEX2of4(evex)
#define uFromEVEX3of4(evex)
#define xFromXOP2of3(xop)
#define wFromEVEX3of4(evex)
#define bFromVEX2of3(vex)
#define wFromVEX3of3(vex)
#define mmmmmFromXOP2of3(xop)
#define aaaFromEVEX4of4(evex)
#define lFromVEX3of3(vex)
#define mmmFromEVEX2of4(evex)
#define ppFromVEX3of3(vex)
#define bFromEVEX2of4(evex)
#define xFromEVEX2of4(evex)
#define ppFromVEX2of2(vex)
#define indexFromSIB(sib)
#define vvvvFromXOP3of3(xop)
#define wFromXOP3of3(xop)
#define oszcFromEVEX3of4(evex)
#define vvvvFromEVEX3of4(evex)
static void translateRegister(MCInst &mcInst, Reg reg)
translateRegister - Translates an internal register to the appropriate LLVM register,...
static bool isREX2(struct InternalInstruction *insn, uint8_t prefix)
static int getInstructionID(struct InternalInstruction *insn, const MCInstrInfo *mii)
static bool readOpcode(struct InternalInstruction *insn)
static MCDisassembler * createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum)
translateMaskRegister - Translates a 3-bit mask register number to LLVM form, and appends it to an MC...
static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn)
translateDstIndex - Appends a destination index operand to an MCInst.
static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateImmediate - Appends an immediate operand to an MCInst.
static int readOperands(struct InternalInstruction *insn)
static void translateFPRegister(MCInst &mcInst, uint8_t stackPos)
translateFPRegister - Translates a stack position on the FPU stack to its LLVM form,...
static bool is64Bit(const char *name)
static const uint8_t segmentRegnums[SEG_OVERRIDE_max]
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
static int getInstructionIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
static int readSIB(struct InternalInstruction *insn)
static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
static int readVVVV(struct InternalInstruction *insn)
static bool isNF(InternalInstruction *insn)
static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn)
translateSrcIndex - Appends a source index operand to an MCInst.
#define GENERIC_FIXUP_FUNC(name, base, prefix)
static int readMaskRegister(struct InternalInstruction *insn)
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateRM - Translates an operand stored in the R/M (and possibly SIB) byte of an instruction to LL...
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
static int readDisplacement(struct InternalInstruction *insn)
static bool isCCMPOrCTEST(InternalInstruction *insn)
LLVM_C_ABI void LLVMInitializeX86Disassembler()
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
static int readModRM(struct InternalInstruction *insn)
static bool is16BitEquivalent(const char *orig, const char *equiv)
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis, bool ForceSIB=false)
translateRMMemory - Translates a memory operand stored in the Mod and R/M fields of an internal instr...
static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis)
translateInstruction - Translates an internal instruction and all its operands to an MCInst.
static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn)
translateRMRegister - Translates a register stored in the R/M field of the ModR/M byte to its LLVM eq...
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateOperand - Translates an operand stored in an internal instruction to LLVM's format and appen...
static int readPrefixes(struct InternalInstruction *insn)
static bool peek(struct InternalInstruction *insn, uint8_t &byte)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Context object for machine code objects.
Superclass for all disassemblers.
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) const
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const
DecodeStatus
Ternary decode status.
Instances of this class represent a single low-level machine instruction.
unsigned getOpcode() const
void addOperand(const MCOperand Op)
void setOpcode(unsigned Op)
Interface to description of machine instruction set.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
StringRef - Represent a constant reference to a string, i.e.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Target - Wrapper for Target specific information.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EABase
All possible values of the base field for effective-address computations, a.k.a.
Reg
All possible values of the reg field in the ModR/M byte.
DisassemblerMode
Decoding mode for the Intel disassembler.
SIBBase
All possible values of the SIB base field.
SIBIndex
All possible values of the SIB index field.
Define some predicates that are used for node matching.
NodeAddr< InstrNode * > Instr
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt mod(const DynamicAPInt &LHS, const DynamicAPInt &RHS)
is always non-negative.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Target & getTheX86_32Target()
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
@ Success
The lock was released successfully.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Target & getTheX86_64Target()
Implement std::hash so that hash_code can be used in STL containers.
OpcodeDecision opcodeDecisions[IC_max]
ModRMDecision modRMDecisions[256]
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.
The specification for how to extract and interpret a full instruction and its operands.
The x86 internal instruction, which is produced by the decoder.
ArrayRef< OperandSpecifier > operands
EADisplacement eaDisplacement
uint8_t rex2ExtensionPrefix[2]
uint8_t vectorExtensionPrefix[4]
SegmentOverride segmentOverride
uint8_t numImmediatesConsumed
llvm::ArrayRef< uint8_t > bytes
uint8_t numImmediatesTranslated
const InstructionSpecifier * spec
VectorExtensionType vectorExtensionType
uint8_t displacementOffset
The specification for how to extract and interpret one operand.