X86Disassembler.cpp

Go to the documentation of this file.

1//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//

2//

3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4// See https://llvm.org/LICENSE.txt for license information.

5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

6//

7//===----------------------------------------------------------------------===//

8//

9// This file is part of the X86 Disassembler.

10// It contains code to translate the data produced by the decoder into

11// MCInsts.

12//

13//

14// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and

15// 64-bit X86 instruction sets. The main decode sequence for an assembly

16// instruction in this disassembler is:

17//

18// 1. Read the prefix bytes and determine the attributes of the instruction.

19// These attributes, recorded in enum attributeBits

20// (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM

21// provides a mapping from bitmasks to contexts, which are represented by

22// enum InstructionContext (ibid.).

23//

24// 2. Read the opcode, and determine what kind of opcode it is. The

25// disassembler distinguishes four kinds of opcodes, which are enumerated in

26// OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte

27// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a

28// (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.

29//

30// 3. Depending on the opcode type, look in one of four ClassDecision structures

31// (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which

32// OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get

33// a ModRMDecision (ibid.).

34//

35// 4. Some instructions, such as escape opcodes or extended opcodes, or even

36// instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the

37// ModR/M byte to complete decode. The ModRMDecision's type is an entry from

38// ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the

39// ModR/M byte is required and how to interpret it.

40//

41// 5. After resolving the ModRMDecision, the disassembler has a unique ID

42// of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in

43// INSTRUCTIONS_SYM yields the name of the instruction and the encodings and

44// meanings of its operands.

45//

46// 6. For each operand, its encoding is an entry from OperandEncoding

47// (X86DisassemblerDecoderCommon.h) and its type is an entry from

48// OperandType (ibid.). The encoding indicates how to read it from the

49// instruction; the type indicates how to interpret the value once it has

50// been read. For example, a register operand could be stored in the R/M

51// field of the ModR/M byte, the REG field of the ModR/M byte, or added to

52// the main opcode. This is orthogonal from its meaning (an GPR or an XMM

53// register, for instance). Given this information, the operands can be

54// extracted and interpreted.

55//

56// 7. As the last step, the disassembler translates the instruction information

57// and operands into a format understandable by the client - in this case, an

58// MCInst for use by the MC infrastructure.

59//

60// The disassembler is broken broadly into two parts: the table emitter that

61// emits the instruction decode tables discussed above during compilation, and

62// the disassembler itself. The table emitter is documented in more detail in

63// utils/TableGen/X86DisassemblerEmitter.h.

64//

65// X86Disassembler.cpp contains the code responsible for step 7, and for

66// invoking the decoder to execute steps 1-6.

67// X86DisassemblerDecoderCommon.h contains the definitions needed by both the

68// table emitter and the disassembler.

69// X86DisassemblerDecoder.h contains the public interface of the decoder,

70// factored out into C for possible use by other projects.

71// X86DisassemblerDecoder.c contains the source code of the decoder, which is

72// responsible for steps 1-6.

73//

74//===----------------------------------------------------------------------===//

75

76#include "MCTargetDesc/X86BaseInfo.h"

77#include "MCTargetDesc/X86MCTargetDesc.h"

78#include "TargetInfo/X86TargetInfo.h"

79#include "X86DisassemblerDecoder.h"

80#include "llvm-c/Visibility.h"

81#include "llvm/MC/MCContext.h"

82#include "llvm/MC/MCDisassembler/MCDisassembler.h"

83#include "llvm/MC/MCExpr.h"

84#include "llvm/MC/MCInst.h"

85#include "llvm/MC/MCInstrInfo.h"

86#include "llvm/MC/MCSubtargetInfo.h"

87#include "llvm/MC/TargetRegistry.h"

88#include "llvm/Support/Debug.h"

89#include "llvm/Support/Format.h"

90#include "llvm/Support/raw_ostream.h"

91

92using namespace llvm;

93using namespace llvm::X86Disassembler;

94

95 #define DEBUG_TYPE "x86-disassembler"

96

97 #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);

98

99// Specifies whether a ModR/M byte is needed and (if so) which

100// instruction each possible value of the ModR/M byte corresponds to. Once

101// this information is known, we have narrowed down to a single instruction.

102 struct ModRMDecision {

103 uint8_t modrm_type;

104 uint16_t instructionIDs;

105};

106

107// Specifies which set of ModR/M->instruction tables to look at

108// given a particular opcode.

109 struct OpcodeDecision {

110 ModRMDecision modRMDecisions[256];

111};

112

113// Specifies which opcode->instruction tables to look at given

114// a particular context (set of attributes). Since there are many possible

115// contexts, the decoder first uses CONTEXTS_SYM to determine which context

116// applies given a specific set of attributes. Hence there are only IC_max

117// entries in this table, rather than 2^(ATTR_max).

118 struct ContextDecision {

119 OpcodeDecision opcodeDecisions[IC_max];

120};

121

122#include "X86GenDisassemblerTables.inc"

123

124 static InstrUID decode(OpcodeType type, InstructionContext insnContext,

125 uint8_t opcode, uint8_t modRM) {

126 const struct ModRMDecision *dec;

127

128 switch (type) {

129 case ONEBYTE:

130 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

131 break;

132 case TWOBYTE:

133 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

134 break;

135 case THREEBYTE_38:

136 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

137 break;

138 case THREEBYTE_3A:

139 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

140 break;

141 case XOP8_MAP:

142 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

143 break;

144 case XOP9_MAP:

145 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

146 break;

147 case XOPA_MAP:

148 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

149 break;

150 case THREEDNOW_MAP:

151 dec =

152 &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

153 break;

154 case MAP4:

155 dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

156 break;

157 case MAP5:

158 dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

159 break;

160 case MAP6:

161 dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

162 break;

163 case MAP7:

164 dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

165 break;

166 }

167

168 switch (dec->modrm_type) {

169 default:

170 llvm_unreachable("Corrupt table! Unknown modrm_type");

171 return 0;

172 case MODRM_ONEENTRY:

173 return modRMTable[dec->instructionIDs];

174 case MODRM_SPLITRM:

175 if (modFromModRM(modRM) == 0x3)

176 return modRMTable[dec->instructionIDs + 1];

177 return modRMTable[dec->instructionIDs];

178 case MODRM_SPLITREG:

179 if (modFromModRM(modRM) == 0x3)

180 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];

181 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];

182 case MODRM_SPLITMISC:

183 if (modFromModRM(modRM) == 0x3)

184 return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];

185 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];

186 case MODRM_FULL:

187 return modRMTable[dec->instructionIDs + modRM];

188 }

189}

190

191 static bool peek(struct InternalInstruction *insn, uint8_t &byte) {

192 uint64_t offset = insn->readerCursor - insn->startLocation;

193 if (offset >= insn->bytes.size())

194 return true;

195 byte = insn->bytes[offset];

196 return false;

197}

198

199 template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {

200 auto r = insn->bytes;

201 uint64_t offset = insn->readerCursor - insn->startLocation;

202 if (offset + sizeof(T) > r.size())

203 return true;

204 ptr = support::endian::read<T>(&r[offset], llvm::endianness::little);

205 insn->readerCursor += sizeof(T);

206 return false;

207}

208

209 static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {

210 return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;

211}

212

213 static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) {

214 return insn->mode == MODE_64BIT && prefix == 0xd5;

215}

216

217// Consumes all of an instruction's prefix bytes, and marks the

218// instruction as having them. Also sets the instruction's default operand,

219// address, and other relevant data sizes to report operands correctly.

220//

221// insn must not be empty.

222 static int readPrefixes(struct InternalInstruction *insn) {

223 bool isPrefix = true;

224 uint8_t byte = 0;

225 uint8_t nextByte;

226

227 LLVM_DEBUG(dbgs() << "readPrefixes()");

228

229 while (isPrefix) {

230 // If we fail reading prefixes, just stop here and let the opcode reader

231 // deal with it.

232 if (consume(insn, byte))

233 break;

234

235 // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then

236 // break and let it be disassembled as a normal "instruction".

237 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK

238 break;

239

240 if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) {

241 // If the byte is 0xf2 or 0xf3, and any of the following conditions are

242 // met:

243 // - it is followed by a LOCK (0xf0) prefix

244 // - it is followed by an xchg instruction

245 // then it should be disassembled as a xacquire/xrelease not repne/rep.

246 if (((nextByte == 0xf0) ||

247 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {

248 insn->xAcquireRelease = true;

249 if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support

250 break;

251 }

252 // Also if the byte is 0xf3, and the following condition is met:

253 // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or

254 // "mov mem, imm" (opcode 0xc6/0xc7) instructions.

255 // then it should be disassembled as an xrelease not rep.

256 if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||

257 nextByte == 0xc6 || nextByte == 0xc7)) {

258 insn->xAcquireRelease = true;

259 break;

260 }

261 if (isREX(insn, nextByte)) {

262 uint8_t nnextByte;

263 // Go to REX prefix after the current one

264 if (consume(insn, nnextByte))

265 return -1;

266 // We should be able to read next byte after REX prefix

267 if (peek(insn, nnextByte))

268 return -1;

269 --insn->readerCursor;

270 }

271 }

272

273 switch (byte) {

274 case 0xf0: // LOCK

275 insn->hasLockPrefix = true;

276 break;

277 case 0xf2: // REPNE/REPNZ

278 case 0xf3: { // REP or REPE/REPZ

279 uint8_t nextByte;

280 if (peek(insn, nextByte))

281 break;

282 // TODO:

283 // 1. There could be several 0x66

284 // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then

285 // it's not mandatory prefix

286 // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need

287 // 0x0f exactly after it to be mandatory prefix

288 // 4. if (nextByte == 0xd5) it's REX2 and we need

289 // 0x0f exactly after it to be mandatory prefix

290 if (isREX(insn, nextByte) || isREX2(insn, nextByte) || nextByte == 0x0f ||

291 nextByte == 0x66)

292 // The last of 0xf2 /0xf3 is mandatory prefix

293 insn->mandatoryPrefix = byte;

294 insn->repeatPrefix = byte;

295 break;

296 }

297 case 0x2e: // CS segment override -OR- Branch not taken

298 insn->segmentOverride = SEG_OVERRIDE_CS;

299 break;

300 case 0x36: // SS segment override -OR- Branch taken

301 insn->segmentOverride = SEG_OVERRIDE_SS;

302 break;

303 case 0x3e: // DS segment override

304 insn->segmentOverride = SEG_OVERRIDE_DS;

305 break;

306 case 0x26: // ES segment override

307 insn->segmentOverride = SEG_OVERRIDE_ES;

308 break;

309 case 0x64: // FS segment override

310 insn->segmentOverride = SEG_OVERRIDE_FS;

311 break;

312 case 0x65: // GS segment override

313 insn->segmentOverride = SEG_OVERRIDE_GS;

314 break;

315 case 0x66: { // Operand-size override {

316 uint8_t nextByte;

317 insn->hasOpSize = true;

318 if (peek(insn, nextByte))

319 break;

320 // 0x66 can't overwrite existing mandatory prefix and should be ignored

321 if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))

322 insn->mandatoryPrefix = byte;

323 break;

324 }

325 case 0x67: // Address-size override

326 insn->hasAdSize = true;

327 break;

328 default: // Not a prefix byte

329 isPrefix = false;

330 break;

331 }

332

333 if (isREX(insn, byte)) {

334 insn->rexPrefix = byte;

335 isPrefix = true;

336 LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));

337 } else if (isPrefix) {

338 insn->rexPrefix = 0;

339 }

340

341 if (isPrefix)

342 LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));

343 }

344

345 insn->vectorExtensionType = TYPE_NO_VEX_XOP;

346

347 if (byte == 0x62) {

348 uint8_t byte1, byte2;

349 if (consume(insn, byte1)) {

350 LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");

351 return -1;

352 }

353

354 if (peek(insn, byte2)) {

355 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");

356 return -1;

357 }

358

359 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) {

360 insn->vectorExtensionType = TYPE_EVEX;

361 } else {

362 --insn->readerCursor; // unconsume byte1

363 --insn->readerCursor; // unconsume byte

364 }

365

366 if (insn->vectorExtensionType == TYPE_EVEX) {

367 insn->vectorExtensionPrefix[0] = byte;

368 insn->vectorExtensionPrefix[1] = byte1;

369 if (consume(insn, insn->vectorExtensionPrefix[2])) {

370 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");

371 return -1;

372 }

373 if (consume(insn, insn->vectorExtensionPrefix[3])) {

374 LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");

375 return -1;

376 }

377

378 if (insn->mode == MODE_64BIT) {

379 // We simulate the REX prefix for simplicity's sake

380 insn->rexPrefix = 0x40 |

381 (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |

382 (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |

383 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |

384 (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);

385

386 // We simulate the REX2 prefix for simplicity's sake

387 insn->rex2ExtensionPrefix[1] =

388 (r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 6) |

389 (uFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 5) |

390 (b2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4);

391 }

392

393 LLVM_DEBUG(

394 dbgs() << format(

395 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",

396 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],

397 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]));

398 }

399 } else if (byte == 0xc4) {

400 uint8_t byte1;

401 if (peek(insn, byte1)) {

402 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");

403 return -1;

404 }

405

406 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)

407 insn->vectorExtensionType = TYPE_VEX_3B;

408 else

409 --insn->readerCursor;

410

411 if (insn->vectorExtensionType == TYPE_VEX_3B) {

412 insn->vectorExtensionPrefix[0] = byte;

413 consume(insn, insn->vectorExtensionPrefix[1]);

414 consume(insn, insn->vectorExtensionPrefix[2]);

415

416 // We simulate the REX prefix for simplicity's sake

417

418 if (insn->mode == MODE_64BIT)

419 insn->rexPrefix = 0x40 |

420 (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) |

421 (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) |

422 (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) |

423 (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);

424

425 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",

426 insn->vectorExtensionPrefix[0],

427 insn->vectorExtensionPrefix[1],

428 insn->vectorExtensionPrefix[2]));

429 }

430 } else if (byte == 0xc5) {

431 uint8_t byte1;

432 if (peek(insn, byte1)) {

433 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");

434 return -1;

435 }

436

437 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)

438 insn->vectorExtensionType = TYPE_VEX_2B;

439 else

440 --insn->readerCursor;

441

442 if (insn->vectorExtensionType == TYPE_VEX_2B) {

443 insn->vectorExtensionPrefix[0] = byte;

444 consume(insn, insn->vectorExtensionPrefix[1]);

445

446 if (insn->mode == MODE_64BIT)

447 insn->rexPrefix =

448 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);

449

450 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {

451 default:

452 break;

453 case VEX_PREFIX_66:

454 insn->hasOpSize = true;

455 break;

456 }

457

458 LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",

459 insn->vectorExtensionPrefix[0],

460 insn->vectorExtensionPrefix[1]));

461 }

462 } else if (byte == 0x8f) {

463 uint8_t byte1;

464 if (peek(insn, byte1)) {

465 LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");

466 return -1;

467 }

468

469 if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction.

470 insn->vectorExtensionType = TYPE_XOP;

471 else

472 --insn->readerCursor;

473

474 if (insn->vectorExtensionType == TYPE_XOP) {

475 insn->vectorExtensionPrefix[0] = byte;

476 consume(insn, insn->vectorExtensionPrefix[1]);

477 consume(insn, insn->vectorExtensionPrefix[2]);

478

479 // We simulate the REX prefix for simplicity's sake

480

481 if (insn->mode == MODE_64BIT)

482 insn->rexPrefix = 0x40 |

483 (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) |

484 (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) |

485 (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) |

486 (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);

487

488 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {

489 default:

490 break;

491 case VEX_PREFIX_66:

492 insn->hasOpSize = true;

493 break;

494 }

495

496 LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",

497 insn->vectorExtensionPrefix[0],

498 insn->vectorExtensionPrefix[1],

499 insn->vectorExtensionPrefix[2]));

500 }

501 } else if (isREX2(insn, byte)) {

502 uint8_t byte1;

503 if (peek(insn, byte1)) {

504 LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");

505 return -1;

506 }

507 insn->rex2ExtensionPrefix[0] = byte;

508 consume(insn, insn->rex2ExtensionPrefix[1]);

509

510 // We simulate the REX prefix for simplicity's sake

511 insn->rexPrefix = 0x40 | (wFromREX2(insn->rex2ExtensionPrefix[1]) << 3) |

512 (rFromREX2(insn->rex2ExtensionPrefix[1]) << 2) |

513 (xFromREX2(insn->rex2ExtensionPrefix[1]) << 1) |

514 (bFromREX2(insn->rex2ExtensionPrefix[1]) << 0);

515 LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx",

516 insn->rex2ExtensionPrefix[0],

517 insn->rex2ExtensionPrefix[1]));

518 } else

519 --insn->readerCursor;

520

521 if (insn->mode == MODE_16BIT) {

522 insn->registerSize = (insn->hasOpSize ? 4 : 2);

523 insn->addressSize = (insn->hasAdSize ? 4 : 2);

524 insn->displacementSize = (insn->hasAdSize ? 4 : 2);

525 insn->immediateSize = (insn->hasOpSize ? 4 : 2);

526 } else if (insn->mode == MODE_32BIT) {

527 insn->registerSize = (insn->hasOpSize ? 2 : 4);

528 insn->addressSize = (insn->hasAdSize ? 2 : 4);

529 insn->displacementSize = (insn->hasAdSize ? 2 : 4);

530 insn->immediateSize = (insn->hasOpSize ? 2 : 4);

531 } else if (insn->mode == MODE_64BIT) {

532 insn->displacementSize = 4;

533 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {

534 insn->registerSize = 8;

535 insn->addressSize = (insn->hasAdSize ? 4 : 8);

536 insn->immediateSize = 4;

537 insn->hasOpSize = false;

538 } else {

539 insn->registerSize = (insn->hasOpSize ? 2 : 4);

540 insn->addressSize = (insn->hasAdSize ? 4 : 8);

541 insn->immediateSize = (insn->hasOpSize ? 2 : 4);

542 }

543 }

544

545 return 0;

546}

547

548// Consumes the SIB byte to determine addressing information.

549 static int readSIB(struct InternalInstruction *insn) {

550 SIBBase sibBaseBase = SIB_BASE_NONE;

551 uint8_t index, base;

552

553 LLVM_DEBUG(dbgs() << "readSIB()");

554 switch (insn->addressSize) {

555 case 2:

556 default:

557 llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");

558 case 4:

559 insn->sibIndexBase = SIB_INDEX_EAX;

560 sibBaseBase = SIB_BASE_EAX;

561 break;

562 case 8:

563 insn->sibIndexBase = SIB_INDEX_RAX;

564 sibBaseBase = SIB_BASE_RAX;

565 break;

566 }

567

568 if (consume(insn, insn->sib))

569 return -1;

570

571 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3) |

572 (x2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);

573

574 if (index == 0x4) {

575 insn->sibIndex = SIB_INDEX_NONE;

576 } else {

577 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);

578 }

579

580 insn->sibScale = 1 << scaleFromSIB(insn->sib);

581

582 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3) |

583 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);

584

585 switch (base) {

586 case 0x5:

587 case 0xd:

588 switch (modFromModRM(insn->modRM)) {

589 case 0x0:

590 insn->eaDisplacement = EA_DISP_32;

591 insn->sibBase = SIB_BASE_NONE;

592 break;

593 case 0x1:

594 insn->eaDisplacement = EA_DISP_8;

595 insn->sibBase = (SIBBase)(sibBaseBase + base);

596 break;

597 case 0x2:

598 insn->eaDisplacement = EA_DISP_32;

599 insn->sibBase = (SIBBase)(sibBaseBase + base);

600 break;

601 default:

602 llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");

603 }

604 break;

605 default:

606 insn->sibBase = (SIBBase)(sibBaseBase + base);

607 break;

608 }

609

610 return 0;

611}

612

613 static int readDisplacement(struct InternalInstruction *insn) {

614 int8_t d8;

615 int16_t d16;

616 int32_t d32;

617 LLVM_DEBUG(dbgs() << "readDisplacement()");

618

619 insn->displacementOffset = insn->readerCursor - insn->startLocation;

620 switch (insn->eaDisplacement) {

621 case EA_DISP_NONE:

622 break;

623 case EA_DISP_8:

624 if (consume(insn, d8))

625 return -1;

626 insn->displacement = d8;

627 break;

628 case EA_DISP_16:

629 if (consume(insn, d16))

630 return -1;

631 insn->displacement = d16;

632 break;

633 case EA_DISP_32:

634 if (consume(insn, d32))

635 return -1;

636 insn->displacement = d32;

637 break;

638 }

639

640 return 0;

641}

642

643// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.

644 static int readModRM(struct InternalInstruction *insn) {

645 uint8_t mod, rm, reg;

646 LLVM_DEBUG(dbgs() << "readModRM()");

647

648 if (insn->consumedModRM)

649 return 0;

650

651 if (consume(insn, insn->modRM))

652 return -1;

653 insn->consumedModRM = true;

654

655 mod = modFromModRM(insn->modRM);

656 rm = rmFromModRM(insn->modRM);

657 reg = regFromModRM(insn->modRM);

658

659 // This goes by insn->registerSize to pick the correct register, which messes

660 // up if we're using (say) XMM or 8-bit register operands. That gets fixed in

661 // fixupReg().

662 switch (insn->registerSize) {

663 case 2:

664 insn->regBase = MODRM_REG_AX;

665 insn->eaRegBase = EA_REG_AX;

666 break;

667 case 4:

668 insn->regBase = MODRM_REG_EAX;

669 insn->eaRegBase = EA_REG_EAX;

670 break;

671 case 8:

672 insn->regBase = MODRM_REG_RAX;

673 insn->eaRegBase = EA_REG_RAX;

674 break;

675 }

676

677 reg |= (rFromREX(insn->rexPrefix) << 3) |

678 (r2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);

679 rm |= (bFromREX(insn->rexPrefix) << 3) |

680 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);

681

682 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)

683 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;

684

685 insn->reg = (Reg)(insn->regBase + reg);

686

687 switch (insn->addressSize) {

688 case 2: {

689 EABase eaBaseBase = EA_BASE_BX_SI;

690

691 switch (mod) {

692 case 0x0:

693 if (rm == 0x6) {

694 insn->eaBase = EA_BASE_NONE;

695 insn->eaDisplacement = EA_DISP_16;

696 if (readDisplacement(insn))

697 return -1;

698 } else {

699 insn->eaBase = (EABase)(eaBaseBase + rm);

700 insn->eaDisplacement = EA_DISP_NONE;

701 }

702 break;

703 case 0x1:

704 insn->eaBase = (EABase)(eaBaseBase + rm);

705 insn->eaDisplacement = EA_DISP_8;

706 insn->displacementSize = 1;

707 if (readDisplacement(insn))

708 return -1;

709 break;

710 case 0x2:

711 insn->eaBase = (EABase)(eaBaseBase + rm);

712 insn->eaDisplacement = EA_DISP_16;

713 if (readDisplacement(insn))

714 return -1;

715 break;

716 case 0x3:

717 insn->eaBase = (EABase)(insn->eaRegBase + rm);

718 if (readDisplacement(insn))

719 return -1;

720 break;

721 }

722 break;

723 }

724 case 4:

725 case 8: {

726 EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);

727

728 switch (mod) {

729 case 0x0:

730 insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this

731 // In determining whether RIP-relative mode is used (rm=5),

732 // or whether a SIB byte is present (rm=4),

733 // the extension bits (REX.b and EVEX.x) are ignored.

734 switch (rm & 7) {

735 case 0x4: // SIB byte is present

736 insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);

737 if (readSIB(insn) || readDisplacement(insn))

738 return -1;

739 break;

740 case 0x5: // RIP-relative

741 insn->eaBase = EA_BASE_NONE;

742 insn->eaDisplacement = EA_DISP_32;

743 if (readDisplacement(insn))

744 return -1;

745 break;

746 default:

747 insn->eaBase = (EABase)(eaBaseBase + rm);

748 break;

749 }

750 break;

751 case 0x1:

752 insn->displacementSize = 1;

753 [[fallthrough]];

754 case 0x2:

755 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);

756 switch (rm & 7) {

757 case 0x4: // SIB byte is present

758 insn->eaBase = EA_BASE_sib;

759 if (readSIB(insn) || readDisplacement(insn))

760 return -1;

761 break;

762 default:

763 insn->eaBase = (EABase)(eaBaseBase + rm);

764 if (readDisplacement(insn))

765 return -1;

766 break;

767 }

768 break;

769 case 0x3:

770 insn->eaDisplacement = EA_DISP_NONE;

771 insn->eaBase = (EABase)(insn->eaRegBase + rm);

772 break;

773 }

774 break;

775 }

776 } // switch (insn->addressSize)

777

778 return 0;

779}

780

781 #define GENERIC_FIXUP_FUNC(name, base, prefix) \

782 static uint16_t name(struct InternalInstruction *insn, OperandType type, \

783 uint8_t index, uint8_t *valid) { \

784 *valid = 1; \

785 switch (type) { \

786 default: \

787 debug("Unhandled register type"); \

788 *valid = 0; \

789 return 0; \

790 case TYPE_Rv: \

791 return base + index; \

792 case TYPE_R8: \

793 if (insn->rexPrefix && index >= 4 && index <= 7) \

794 return prefix##_SPL + (index - 4); \

795 else \

796 return prefix##_AL + index; \

797 case TYPE_R16: \

798 return prefix##_AX + index; \

799 case TYPE_R32: \

800 return prefix##_EAX + index; \

801 case TYPE_R64: \

802 return prefix##_RAX + index; \

803 case TYPE_ZMM: \

804 return prefix##_ZMM0 + index; \

805 case TYPE_YMM: \

806 return prefix##_YMM0 + index; \

807 case TYPE_XMM: \

808 return prefix##_XMM0 + index; \

809 case TYPE_TMM: \

810 if (index > 7) \

811 *valid = 0; \

812 return prefix##_TMM0 + index; \

813 case TYPE_VK: \

814 index &= 0xf; \

815 if (index > 7) \

816 *valid = 0; \

817 return prefix##_K0 + index; \

818 case TYPE_VK_PAIR: \

819 if (index > 7) \

820 *valid = 0; \

821 return prefix##_K0_K1 + (index / 2); \

822 case TYPE_MM64: \

823 return prefix##_MM0 + (index & 0x7); \

824 case TYPE_SEGMENTREG: \

825 if ((index & 7) > 5) \

826 *valid = 0; \

827 return prefix##_ES + (index & 7); \

828 case TYPE_DEBUGREG: \

829 if (index > 15) \

830 *valid = 0; \

831 return prefix##_DR0 + index; \

832 case TYPE_CONTROLREG: \

833 if (index > 15) \

834 *valid = 0; \

835 return prefix##_CR0 + index; \

836 case TYPE_MVSIBX: \

837 return prefix##_XMM0 + index; \

838 case TYPE_MVSIBY: \

839 return prefix##_YMM0 + index; \

840 case TYPE_MVSIBZ: \

841 return prefix##_ZMM0 + index; \

842 } \

843 }

844

845// Consult an operand type to determine the meaning of the reg or R/M field. If

846// the operand is an XMM operand, for example, an operand would be XMM0 instead

847// of AX, which readModRM() would otherwise misinterpret it as.

848//

849// @param insn - The instruction containing the operand.

850// @param type - The operand type.

851// @param index - The existing value of the field as reported by readModRM().

852// @param valid - The address of a uint8_t. The target is set to 1 if the

853// field is valid for the register class; 0 if not.

854// @return - The proper value.

855GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)

856GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)

857

858// Consult an operand specifier to determine which of the fixup*Value functions

859// to use in correcting readModRM()'ss interpretation.

860//

861// @param insn - See fixup*Value().

862// @param op - The operand specifier.

863// @return - 0 if fixup was successful; -1 if the register returned was

864// invalid for its class.

865 static int fixupReg(struct InternalInstruction *insn,

866 const struct OperandSpecifier *op) {

867 uint8_t valid;

868 LLVM_DEBUG(dbgs() << "fixupReg()");

869

870 switch ((OperandEncoding)op->encoding) {

871 default:

872 debug("Expected a REG or R/M encoding in fixupReg");

873 return -1;

874 case ENCODING_VVVV:

875 insn->vvvv =

876 (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);

877 if (!valid)

878 return -1;

879 break;

880 case ENCODING_REG:

881 insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type,

882 insn->reg - insn->regBase, &valid);

883 if (!valid)

884 return -1;

885 break;

886 CASE_ENCODING_RM:

887 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&

888 modFromModRM(insn->modRM) == 3) {

889 // EVEX_X can extend the register id to 32 for a non-GPR register that is

890 // encoded in RM.

891 // mode : MODE_64_BIT

892 // Only 8 vector registers are available in 32 bit mode

893 // mod : 3

894 // RM encodes a register

895 switch (op->type) {

896 case TYPE_Rv:

897 case TYPE_R8:

898 case TYPE_R16:

899 case TYPE_R32:

900 case TYPE_R64:

901 break;

902 default:

903 insn->eaBase =

904 (EABase)(insn->eaBase +

905 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4));

906 break;

907 }

908 }

909 [[fallthrough]];

910 case ENCODING_SIB:

911 if (insn->eaBase >= insn->eaRegBase) {

912 insn->eaBase = (EABase)fixupRMValue(

913 insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);

914 if (!valid)

915 return -1;

916 }

917 break;

918 }

919

920 return 0;

921}

922

923// Read the opcode (except the ModR/M byte in the case of extended or escape

924// opcodes).

925 static bool readOpcode(struct InternalInstruction *insn) {

926 uint8_t current;

927 LLVM_DEBUG(dbgs() << "readOpcode()");

928

929 insn->opcodeType = ONEBYTE;

930 if (insn->vectorExtensionType == TYPE_EVEX) {

931 switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {

932 default:

933 LLVM_DEBUG(

934 dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",

935 mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])));

936 return true;

937 case VEX_LOB_0F:

938 insn->opcodeType = TWOBYTE;

939 return consume(insn, insn->opcode);

940 case VEX_LOB_0F38:

941 insn->opcodeType = THREEBYTE_38;

942 return consume(insn, insn->opcode);

943 case VEX_LOB_0F3A:

944 insn->opcodeType = THREEBYTE_3A;

945 return consume(insn, insn->opcode);

946 case VEX_LOB_MAP4:

947 insn->opcodeType = MAP4;

948 return consume(insn, insn->opcode);

949 case VEX_LOB_MAP5:

950 insn->opcodeType = MAP5;

951 return consume(insn, insn->opcode);

952 case VEX_LOB_MAP6:

953 insn->opcodeType = MAP6;

954 return consume(insn, insn->opcode);

955 case VEX_LOB_MAP7:

956 insn->opcodeType = MAP7;

957 return consume(insn, insn->opcode);

958 }

959 } else if (insn->vectorExtensionType == TYPE_VEX_3B) {

960 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {

961 default:

962 LLVM_DEBUG(

963 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",

964 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));

965 return true;

966 case VEX_LOB_0F:

967 insn->opcodeType = TWOBYTE;

968 return consume(insn, insn->opcode);

969 case VEX_LOB_0F38:

970 insn->opcodeType = THREEBYTE_38;

971 return consume(insn, insn->opcode);

972 case VEX_LOB_0F3A:

973 insn->opcodeType = THREEBYTE_3A;

974 return consume(insn, insn->opcode);

975 case VEX_LOB_MAP5:

976 insn->opcodeType = MAP5;

977 return consume(insn, insn->opcode);

978 case VEX_LOB_MAP6:

979 insn->opcodeType = MAP6;

980 return consume(insn, insn->opcode);

981 case VEX_LOB_MAP7:

982 insn->opcodeType = MAP7;

983 return consume(insn, insn->opcode);

984 }

985 } else if (insn->vectorExtensionType == TYPE_VEX_2B) {

986 insn->opcodeType = TWOBYTE;

987 return consume(insn, insn->opcode);

988 } else if (insn->vectorExtensionType == TYPE_XOP) {

989 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {

990 default:

991 LLVM_DEBUG(

992 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",

993 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));

994 return true;

995 case XOP_MAP_SELECT_8:

996 insn->opcodeType = XOP8_MAP;

997 return consume(insn, insn->opcode);

998 case XOP_MAP_SELECT_9:

999 insn->opcodeType = XOP9_MAP;

1000 return consume(insn, insn->opcode);

1001 case XOP_MAP_SELECT_A:

1002 insn->opcodeType = XOPA_MAP;

1003 return consume(insn, insn->opcode);

1004 }

1005 } else if (mFromREX2(insn->rex2ExtensionPrefix[1])) {

1006 // m bit indicates opcode map 1

1007 insn->opcodeType = TWOBYTE;

1008 return consume(insn, insn->opcode);

1009 }

1010

1011 if (consume(insn, current))

1012 return true;

1013

1014 if (current == 0x0f) {

1015 LLVM_DEBUG(

1016 dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));

1017 if (consume(insn, current))

1018 return true;

1019

1020 if (current == 0x38) {

1021 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",

1022 current));

1023 if (consume(insn, current))

1024 return true;

1025

1026 insn->opcodeType = THREEBYTE_38;

1027 } else if (current == 0x3a) {

1028 LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",

1029 current));

1030 if (consume(insn, current))

1031 return true;

1032

1033 insn->opcodeType = THREEBYTE_3A;

1034 } else if (current == 0x0f) {

1035 LLVM_DEBUG(

1036 dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));

1037

1038 // Consume operands before the opcode to comply with the 3DNow encoding

1039 if (readModRM(insn))

1040 return true;

1041

1042 if (consume(insn, current))

1043 return true;

1044

1045 insn->opcodeType = THREEDNOW_MAP;

1046 } else {

1047 LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");

1048 insn->opcodeType = TWOBYTE;

1049 }

1050 } else if (insn->mandatoryPrefix)

1051 // The opcode with mandatory prefix must start with opcode escape.

1052 // If not it's legacy repeat prefix

1053 insn->mandatoryPrefix = 0;

1054

1055 // At this point we have consumed the full opcode.

1056 // Anything we consume from here on must be unconsumed.

1057 insn->opcode = current;

1058

1059 return false;

1060}

1061

1062// Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).

1063 static bool is16BitEquivalent(const char *orig, const char *equiv) {

1064 for (int i = 0;; i++) {

1065 if (orig[i] == '0円' && equiv[i] == '0円')

1066 return true;

1067 if (orig[i] == '0円' || equiv[i] == '0円')

1068 return false;

1069 if (orig[i] != equiv[i]) {

1070 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')

1071 continue;

1072 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')

1073 continue;

1074 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')

1075 continue;

1076 return false;

1077 }

1078 }

1079}

1080

1081// Determine whether this instruction is a 64-bit instruction.

1082 static bool is64Bit(const char *name) {

1083 for (int i = 0;; ++i) {

1084 if (name[i] == '0円')

1085 return false;

1086 if (name[i] == '6' && name[i + 1] == '4')

1087 return true;

1088 }

1089}

1090

1091// Determine the ID of an instruction, consuming the ModR/M byte as appropriate

1092// for extended and escape opcodes, and using a supplied attribute mask.

1093 static int getInstructionIDWithAttrMask(uint16_t *instructionID,

1094 struct InternalInstruction *insn,

1095 uint16_t attrMask) {

1096 auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);

1097 const ContextDecision *decision;

1098 switch (insn->opcodeType) {

1099 case ONEBYTE:

1100 decision = &ONEBYTE_SYM;

1101 break;

1102 case TWOBYTE:

1103 decision = &TWOBYTE_SYM;

1104 break;

1105 case THREEBYTE_38:

1106 decision = &THREEBYTE38_SYM;

1107 break;

1108 case THREEBYTE_3A:

1109 decision = &THREEBYTE3A_SYM;

1110 break;

1111 case XOP8_MAP:

1112 decision = &XOP8_MAP_SYM;

1113 break;

1114 case XOP9_MAP:

1115 decision = &XOP9_MAP_SYM;

1116 break;

1117 case XOPA_MAP:

1118 decision = &XOPA_MAP_SYM;

1119 break;

1120 case THREEDNOW_MAP:

1121 decision = &THREEDNOW_MAP_SYM;

1122 break;

1123 case MAP4:

1124 decision = &MAP4_SYM;

1125 break;

1126 case MAP5:

1127 decision = &MAP5_SYM;

1128 break;

1129 case MAP6:

1130 decision = &MAP6_SYM;

1131 break;

1132 case MAP7:

1133 decision = &MAP7_SYM;

1134 break;

1135 }

1136

1137 if (decision->opcodeDecisions[insnCtx]

1138 .modRMDecisions[insn->opcode]

1139 .modrm_type != MODRM_ONEENTRY) {

1140 if (readModRM(insn))

1141 return -1;

1142 *instructionID =

1143 decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM);

1144 } else {

1145 *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0);

1146 }

1147

1148 return 0;

1149}

1150

1151 static bool isCCMPOrCTEST(InternalInstruction *insn) {

1152 if (insn->opcodeType != MAP4)

1153 return false;

1154 if (insn->opcode == 0x83 && regFromModRM(insn->modRM) == 7)

1155 return true;

1156 switch (insn->opcode & 0xfe) {

1157 default:

1158 return false;

1159 case 0x38:

1160 case 0x3a:

1161 case 0x84:

1162 return true;

1163 case 0x80:

1164 return regFromModRM(insn->modRM) == 7;

1165 case 0xf6:

1166 return regFromModRM(insn->modRM) == 0;

1167 }

1168}

1169

1170 static bool isNF(InternalInstruction *insn) {

1171 if (!nfFromEVEX4of4(insn->vectorExtensionPrefix[3]))

1172 return false;

1173 if (insn->opcodeType == MAP4)

1174 return true;

1175 // Below NF instructions are not in map4.

1176 if (insn->opcodeType == THREEBYTE_38 &&

1177 ppFromEVEX3of4(insn->vectorExtensionPrefix[2]) == VEX_PREFIX_NONE) {

1178 switch (insn->opcode) {

1179 case 0xf2: // ANDN

1180 case 0xf3: // BLSI, BLSR, BLSMSK

1181 case 0xf5: // BZHI

1182 case 0xf7: // BEXTR

1183 return true;

1184 default:

1185 break;

1186 }

1187 }

1188 return false;

1189}

1190

1191// Determine the ID of an instruction, consuming the ModR/M byte as appropriate

1192// for extended and escape opcodes. Determines the attributes and context for

1193// the instruction before doing so.

1194 static int getInstructionID(struct InternalInstruction *insn,

1195 const MCInstrInfo *mii) {

1196 uint16_t attrMask;

1197 uint16_t instructionID;

1198

1199 LLVM_DEBUG(dbgs() << "getID()");

1200

1201 attrMask = ATTR_NONE;

1202

1203 if (insn->mode == MODE_64BIT)

1204 attrMask |= ATTR_64BIT;

1205

1206 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {

1207 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;

1208

1209 if (insn->vectorExtensionType == TYPE_EVEX) {

1210 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {

1211 case VEX_PREFIX_66:

1212 attrMask |= ATTR_OPSIZE;

1213 break;

1214 case VEX_PREFIX_F3:

1215 attrMask |= ATTR_XS;

1216 break;

1217 case VEX_PREFIX_F2:

1218 attrMask |= ATTR_XD;

1219 break;

1220 }

1221

1222 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))

1223 attrMask |= ATTR_EVEXKZ;

1224 if (isNF(insn) && !readModRM(insn) &&

1225 !isCCMPOrCTEST(insn)) // NF bit is the MSB of aaa.

1226 attrMask |= ATTR_EVEXNF;

1227 // aaa is not used a opmask in MAP4

1228 else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]) &&

1229 (insn->opcodeType != MAP4))

1230 attrMask |= ATTR_EVEXK;

1231 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) {

1232 attrMask |= ATTR_EVEXB;

1233 if (uFromEVEX3of4(insn->vectorExtensionPrefix[2]) && !readModRM(insn) &&

1234 modFromModRM(insn->modRM) == 3)

1235 attrMask |= ATTR_EVEXU;

1236 }

1237 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))

1238 attrMask |= ATTR_VEXL;

1239 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))

1240 attrMask |= ATTR_EVEXL2;

1241 } else if (insn->vectorExtensionType == TYPE_VEX_3B) {

1242 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {

1243 case VEX_PREFIX_66:

1244 attrMask |= ATTR_OPSIZE;

1245 break;

1246 case VEX_PREFIX_F3:

1247 attrMask |= ATTR_XS;

1248 break;

1249 case VEX_PREFIX_F2:

1250 attrMask |= ATTR_XD;

1251 break;

1252 }

1253

1254 if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))

1255 attrMask |= ATTR_VEXL;

1256 } else if (insn->vectorExtensionType == TYPE_VEX_2B) {

1257 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {

1258 case VEX_PREFIX_66:

1259 attrMask |= ATTR_OPSIZE;

1260 if (insn->hasAdSize)

1261 attrMask |= ATTR_ADSIZE;

1262 break;

1263 case VEX_PREFIX_F3:

1264 attrMask |= ATTR_XS;

1265 break;

1266 case VEX_PREFIX_F2:

1267 attrMask |= ATTR_XD;

1268 break;

1269 }

1270

1271 if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))

1272 attrMask |= ATTR_VEXL;

1273 } else if (insn->vectorExtensionType == TYPE_XOP) {

1274 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {

1275 case VEX_PREFIX_66:

1276 attrMask |= ATTR_OPSIZE;

1277 break;

1278 case VEX_PREFIX_F3:

1279 attrMask |= ATTR_XS;

1280 break;

1281 case VEX_PREFIX_F2:

1282 attrMask |= ATTR_XD;

1283 break;

1284 }

1285

1286 if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))

1287 attrMask |= ATTR_VEXL;

1288 } else {

1289 return -1;

1290 }

1291 } else if (!insn->mandatoryPrefix) {

1292 // If we don't have mandatory prefix we should use legacy prefixes here

1293 if (insn->hasOpSize && (insn->mode != MODE_16BIT))

1294 attrMask |= ATTR_OPSIZE;

1295 if (insn->hasAdSize)

1296 attrMask |= ATTR_ADSIZE;

1297 if (insn->opcodeType == ONEBYTE) {

1298 if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))

1299 // Special support for PAUSE

1300 attrMask |= ATTR_XS;

1301 } else {

1302 if (insn->repeatPrefix == 0xf2)

1303 attrMask |= ATTR_XD;

1304 else if (insn->repeatPrefix == 0xf3)

1305 attrMask |= ATTR_XS;

1306 }

1307 } else {

1308 switch (insn->mandatoryPrefix) {

1309 case 0xf2:

1310 attrMask |= ATTR_XD;

1311 break;

1312 case 0xf3:

1313 attrMask |= ATTR_XS;

1314 break;

1315 case 0x66:

1316 if (insn->mode != MODE_16BIT)

1317 attrMask |= ATTR_OPSIZE;

1318 if (insn->hasAdSize)

1319 attrMask |= ATTR_ADSIZE;

1320 break;

1321 case 0x67:

1322 attrMask |= ATTR_ADSIZE;

1323 break;

1324 }

1325 }

1326

1327 if (insn->rexPrefix & 0x08) {

1328 attrMask |= ATTR_REXW;

1329 attrMask &= ~ATTR_ADSIZE;

1330 }

1331

1332 // Absolute jump and pushp/popp need special handling

1333 if (insn->rex2ExtensionPrefix[0] == 0xd5 && insn->opcodeType == ONEBYTE &&

1334 (insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50))

1335 attrMask |= ATTR_REX2;

1336

1337 if (insn->mode == MODE_16BIT) {

1338 // JCXZ/JECXZ need special handling for 16-bit mode because the meaning

1339 // of the AdSize prefix is inverted w.r.t. 32-bit mode.

1340 if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3)

1341 attrMask ^= ATTR_ADSIZE;

1342 // If we're in 16-bit mode and this is one of the relative jumps and opsize

1343 // prefix isn't present, we need to force the opsize attribute since the

1344 // prefix is inverted relative to 32-bit mode.

1345 if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&

1346 (insn->opcode == 0xE8 || insn->opcode == 0xE9))

1347 attrMask |= ATTR_OPSIZE;

1348

1349 if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&

1350 insn->opcode >= 0x80 && insn->opcode <= 0x8F)

1351 attrMask |= ATTR_OPSIZE;

1352 }

1353

1354

1355 if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))

1356 return -1;

1357

1358 // The following clauses compensate for limitations of the tables.

1359

1360 if (insn->mode != MODE_64BIT &&

1361 insn->vectorExtensionType != TYPE_NO_VEX_XOP) {

1362 // The tables can't distinquish between cases where the W-bit is used to

1363 // select register size and cases where its a required part of the opcode.

1364 if ((insn->vectorExtensionType == TYPE_EVEX &&

1365 wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||

1366 (insn->vectorExtensionType == TYPE_VEX_3B &&

1367 wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||

1368 (insn->vectorExtensionType == TYPE_XOP &&

1369 wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {

1370

1371 uint16_t instructionIDWithREXW;

1372 if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn,

1373 attrMask | ATTR_REXW)) {

1374 insn->instructionID = instructionID;

1375 insn->spec = &INSTRUCTIONS_SYM[instructionID];

1376 return 0;

1377 }

1378

1379 auto SpecName = mii->getName(instructionIDWithREXW);

1380 // If not a 64-bit instruction. Switch the opcode.

1381 if (!is64Bit(SpecName.data())) {

1382 insn->instructionID = instructionIDWithREXW;

1383 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];

1384 return 0;

1385 }

1386 }

1387 }

1388

1389 // Absolute moves, umonitor, and movdir64b need special handling.

1390 // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are

1391 // inverted w.r.t.

1392 // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in

1393 // any position.

1394 if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||

1395 (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||

1396 (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8) ||

1397 (insn->opcodeType == MAP4 && insn->opcode == 0xF8)) {

1398 // Make sure we observed the prefixes in any position.

1399 if (insn->hasAdSize)

1400 attrMask |= ATTR_ADSIZE;

1401 if (insn->hasOpSize)

1402 attrMask |= ATTR_OPSIZE;

1403

1404 // In 16-bit, invert the attributes.

1405 if (insn->mode == MODE_16BIT) {

1406 attrMask ^= ATTR_ADSIZE;

1407

1408 // The OpSize attribute is only valid with the absolute moves.

1409 if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))

1410 attrMask ^= ATTR_OPSIZE;

1411 }

1412

1413 if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))

1414 return -1;

1415

1416 insn->instructionID = instructionID;

1417 insn->spec = &INSTRUCTIONS_SYM[instructionID];

1418 return 0;

1419 }

1420

1421 if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&

1422 !(attrMask & ATTR_OPSIZE)) {

1423 // The instruction tables make no distinction between instructions that

1424 // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a

1425 // particular spot (i.e., many MMX operations). In general we're

1426 // conservative, but in the specific case where OpSize is present but not in

1427 // the right place we check if there's a 16-bit operation.

1428 const struct InstructionSpecifier *spec;

1429 uint16_t instructionIDWithOpsize;

1430 llvm::StringRef specName, specWithOpSizeName;

1431

1432 spec = &INSTRUCTIONS_SYM[instructionID];

1433

1434 if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn,

1435 attrMask | ATTR_OPSIZE)) {

1436 // ModRM required with OpSize but not present. Give up and return the

1437 // version without OpSize set.

1438 insn->instructionID = instructionID;

1439 insn->spec = spec;

1440 return 0;

1441 }

1442

1443 specName = mii->getName(instructionID);

1444 specWithOpSizeName = mii->getName(instructionIDWithOpsize);

1445

1446 if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&

1447 (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {

1448 insn->instructionID = instructionIDWithOpsize;

1449 insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];

1450 } else {

1451 insn->instructionID = instructionID;

1452 insn->spec = spec;

1453 }

1454 return 0;

1455 }

1456

1457 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&

1458 insn->rexPrefix & 0x01) {

1459 // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode

1460 // as XCHG %r8, %eax.

1461 const struct InstructionSpecifier *spec;

1462 uint16_t instructionIDWithNewOpcode;

1463 const struct InstructionSpecifier *specWithNewOpcode;

1464

1465 spec = &INSTRUCTIONS_SYM[instructionID];

1466

1467 // Borrow opcode from one of the other XCHGar opcodes

1468 insn->opcode = 0x91;

1469

1470 if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn,

1471 attrMask)) {

1472 insn->opcode = 0x90;

1473

1474 insn->instructionID = instructionID;

1475 insn->spec = spec;

1476 return 0;

1477 }

1478

1479 specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];

1480

1481 // Change back

1482 insn->opcode = 0x90;

1483

1484 insn->instructionID = instructionIDWithNewOpcode;

1485 insn->spec = specWithNewOpcode;

1486

1487 return 0;

1488 }

1489

1490 insn->instructionID = instructionID;

1491 insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];

1492

1493 return 0;

1494}

1495

1496// Read an operand from the opcode field of an instruction and interprets it

1497// appropriately given the operand width. Handles AddRegFrm instructions.

1498//

1499// @param insn - the instruction whose opcode field is to be read.

1500// @param size - The width (in bytes) of the register being specified.

1501// 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means

1502// RAX.

1503// @return - 0 on success; nonzero otherwise.

1504 static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {

1505 LLVM_DEBUG(dbgs() << "readOpcodeRegister()");

1506

1507 if (size == 0)

1508 size = insn->registerSize;

1509

1510 auto setOpcodeRegister = [&](unsigned base) {

1511 insn->opcodeRegister =

1512 (Reg)(base + ((bFromREX(insn->rexPrefix) << 3) |

1513 (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4) |

1514 (insn->opcode & 7)));

1515 };

1516

1517 switch (size) {

1518 case 1:

1519 setOpcodeRegister(MODRM_REG_AL);

1520 if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&

1521 insn->opcodeRegister < MODRM_REG_AL + 0x8) {

1522 insn->opcodeRegister =

1523 (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));

1524 }

1525

1526 break;

1527 case 2:

1528 setOpcodeRegister(MODRM_REG_AX);

1529 break;

1530 case 4:

1531 setOpcodeRegister(MODRM_REG_EAX);

1532 break;

1533 case 8:

1534 setOpcodeRegister(MODRM_REG_RAX);

1535 break;

1536 }

1537

1538 return 0;

1539}

1540

1541// Consume an immediate operand from an instruction, given the desired operand

1542// size.

1543//

1544// @param insn - The instruction whose operand is to be read.

1545// @param size - The width (in bytes) of the operand.

1546// @return - 0 if the immediate was successfully consumed; nonzero

1547// otherwise.

1548 static int readImmediate(struct InternalInstruction *insn, uint8_t size) {

1549 uint8_t imm8;

1550 uint16_t imm16;

1551 uint32_t imm32;

1552 uint64_t imm64;

1553

1554 LLVM_DEBUG(dbgs() << "readImmediate()");

1555

1556 assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates");

1557

1558 insn->immediateSize = size;

1559 insn->immediateOffset = insn->readerCursor - insn->startLocation;

1560

1561 switch (size) {

1562 case 1:

1563 if (consume(insn, imm8))

1564 return -1;

1565 insn->immediates[insn->numImmediatesConsumed] = imm8;

1566 break;

1567 case 2:

1568 if (consume(insn, imm16))

1569 return -1;

1570 insn->immediates[insn->numImmediatesConsumed] = imm16;

1571 break;

1572 case 4:

1573 if (consume(insn, imm32))

1574 return -1;

1575 insn->immediates[insn->numImmediatesConsumed] = imm32;

1576 break;

1577 case 8:

1578 if (consume(insn, imm64))

1579 return -1;

1580 insn->immediates[insn->numImmediatesConsumed] = imm64;

1581 break;

1582 default:

1583 llvm_unreachable("invalid size");

1584 }

1585

1586 insn->numImmediatesConsumed++;

1587

1588 return 0;

1589}

1590

1591// Consume vvvv from an instruction if it has a VEX prefix.

1592 static int readVVVV(struct InternalInstruction *insn) {

1593 LLVM_DEBUG(dbgs() << "readVVVV()");

1594

1595 int vvvv;

1596 if (insn->vectorExtensionType == TYPE_EVEX)

1597 vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |

1598 vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));

1599 else if (insn->vectorExtensionType == TYPE_VEX_3B)

1600 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);

1601 else if (insn->vectorExtensionType == TYPE_VEX_2B)

1602 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);

1603 else if (insn->vectorExtensionType == TYPE_XOP)

1604 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);

1605 else

1606 return -1;

1607

1608 if (insn->mode != MODE_64BIT)

1609 vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.

1610

1611 insn->vvvv = static_cast<Reg >(vvvv);

1612 return 0;

1613}

1614

1615// Read an mask register from the opcode field of an instruction.

1616//

1617// @param insn - The instruction whose opcode field is to be read.

1618// @return - 0 on success; nonzero otherwise.

1619 static int readMaskRegister(struct InternalInstruction *insn) {

1620 LLVM_DEBUG(dbgs() << "readMaskRegister()");

1621

1622 if (insn->vectorExtensionType != TYPE_EVEX)

1623 return -1;

1624

1625 insn->writemask =

1626 static_cast<Reg >(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));

1627 return 0;

1628}

1629

1630// Consults the specifier for an instruction and consumes all

1631// operands for that instruction, interpreting them as it goes.

1632 static int readOperands(struct InternalInstruction *insn) {

1633 int hasVVVV, needVVVV;

1634 int sawRegImm = 0;

1635

1636 LLVM_DEBUG(dbgs() << "readOperands()");

1637

1638 // If non-zero vvvv specified, make sure one of the operands uses it.

1639 hasVVVV = !readVVVV(insn);

1640 needVVVV = hasVVVV && (insn->vvvv != 0);

1641

1642 for (const auto &Op : x86OperandSets[insn->spec->operands]) {

1643 switch (Op.encoding) {

1644 case ENCODING_NONE:

1645 case ENCODING_SI:

1646 case ENCODING_DI:

1647 break;

1648 CASE_ENCODING_VSIB:

1649 // VSIB can use the V2 bit so check only the other bits.

1650 if (needVVVV)

1651 needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);

1652 if (readModRM(insn))

1653 return -1;

1654

1655 // Reject if SIB wasn't used.

1656 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)

1657 return -1;

1658

1659 // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.

1660 if (insn->sibIndex == SIB_INDEX_NONE)

1661 insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);

1662

1663 // If EVEX.v2 is set this is one of the 16-31 registers.

1664 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&

1665 v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))

1666 insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);

1667

1668 // Adjust the index register to the correct size.

1669 switch ((OperandType)Op.type) {

1670 default:

1671 debug("Unhandled VSIB index type");

1672 return -1;

1673 case TYPE_MVSIBX:

1674 insn->sibIndex =

1675 (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));

1676 break;

1677 case TYPE_MVSIBY:

1678 insn->sibIndex =

1679 (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));

1680 break;

1681 case TYPE_MVSIBZ:

1682 insn->sibIndex =

1683 (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));

1684 break;

1685 }

1686

1687 // Apply the AVX512 compressed displacement scaling factor.

1688 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)

1689 insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);

1690 break;

1691 case ENCODING_SIB:

1692 // Reject if SIB wasn't used.

1693 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)

1694 return -1;

1695 if (readModRM(insn))

1696 return -1;

1697 if (fixupReg(insn, &Op))

1698 return -1;

1699 break;

1700 case ENCODING_REG:

1701 CASE_ENCODING_RM:

1702 if (readModRM(insn))

1703 return -1;

1704 if (fixupReg(insn, &Op))

1705 return -1;

1706 // Apply the AVX512 compressed displacement scaling factor.

1707 if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)

1708 insn->displacement *= 1 << (Op.encoding - ENCODING_RM);

1709 break;

1710 case ENCODING_IB:

1711 if (sawRegImm) {

1712 // Saw a register immediate so don't read again and instead split the

1713 // previous immediate. FIXME: This is a hack.

1714 insn->immediates[insn->numImmediatesConsumed] =

1715 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;

1716 ++insn->numImmediatesConsumed;

1717 break;

1718 }

1719 if (readImmediate(insn, 1))

1720 return -1;

1721 if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)

1722 sawRegImm = 1;

1723 break;

1724 case ENCODING_IW:

1725 if (readImmediate(insn, 2))

1726 return -1;

1727 break;

1728 case ENCODING_ID:

1729 if (readImmediate(insn, 4))

1730 return -1;

1731 break;

1732 case ENCODING_IO:

1733 if (readImmediate(insn, 8))

1734 return -1;

1735 break;

1736 case ENCODING_Iv:

1737 if (readImmediate(insn, insn->immediateSize))

1738 return -1;

1739 break;

1740 case ENCODING_Ia:

1741 if (readImmediate(insn, insn->addressSize))

1742 return -1;

1743 break;

1744 case ENCODING_IRC:

1745 insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |

1746 lFromEVEX4of4(insn->vectorExtensionPrefix[3]);

1747 break;

1748 case ENCODING_RB:

1749 if (readOpcodeRegister(insn, 1))

1750 return -1;

1751 break;

1752 case ENCODING_RW:

1753 if (readOpcodeRegister(insn, 2))

1754 return -1;

1755 break;

1756 case ENCODING_RD:

1757 if (readOpcodeRegister(insn, 4))

1758 return -1;

1759 break;

1760 case ENCODING_RO:

1761 if (readOpcodeRegister(insn, 8))

1762 return -1;

1763 break;

1764 case ENCODING_Rv:

1765 if (readOpcodeRegister(insn, 0))

1766 return -1;

1767 break;

1768 case ENCODING_CF:

1769 insn->immediates[1] = oszcFromEVEX3of4(insn->vectorExtensionPrefix[2]);

1770 needVVVV = false; // oszc shares the same bits with VVVV

1771 break;

1772 case ENCODING_CC:

1773 if (isCCMPOrCTEST(insn))

1774 insn->immediates[2] = scFromEVEX4of4(insn->vectorExtensionPrefix[3]);

1775 else

1776 insn->immediates[1] = insn->opcode & 0xf;

1777 break;

1778 case ENCODING_FP:

1779 break;

1780 case ENCODING_VVVV:

1781 needVVVV = 0; // Mark that we have found a VVVV operand.

1782 if (!hasVVVV)

1783 return -1;

1784 if (insn->mode != MODE_64BIT)

1785 insn->vvvv = static_cast<Reg >(insn->vvvv & 0x7);

1786 if (fixupReg(insn, &Op))

1787 return -1;

1788 break;

1789 case ENCODING_WRITEMASK:

1790 if (readMaskRegister(insn))

1791 return -1;

1792 break;

1793 case ENCODING_DUP:

1794 break;

1795 default:

1796 LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");

1797 return -1;

1798 }

1799 }

1800

1801 // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail

1802 if (needVVVV)

1803 return -1;

1804

1805 return 0;

1806}

1807

1808namespace llvm {

1809

1810// Fill-ins to make the compiler happy. These constants are never actually

1811// assigned; they are just filler to make an automatically-generated switch

1812// statement work.

1813namespace X86 {

1814 enum {

1815 BX_SI = 500,

1816 BX_DI = 501,

1817 BP_SI = 502,

1818 BP_DI = 503,

1819 sib = 504,

1820 sib64 = 505

1821 };

1822} // namespace X86

1823

1824} // namespace llvm

1825

1826static bool translateInstruction(MCInst &target,

1827 InternalInstruction &source,

1828 const MCDisassembler *Dis);

1829

1830namespace {

1831

1832/// Generic disassembler for all X86 platforms. All each platform class should

1833/// have to do is subclass the constructor, and provide a different

1834/// disassemblerMode value.

1835class X86GenericDisassembler : public MCDisassembler {

1836 std::unique_ptr<const MCInstrInfo> MII;

1837public:

1838 X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,

1839 std::unique_ptr<const MCInstrInfo> MII);

1840public:

1841 DecodeStatus getInstruction(MCInst &instr, uint64_t &size,

1842 ArrayRef<uint8_t> Bytes, uint64_t Address,

1843 raw_ostream &cStream) const override;

1844

1845private:

1846 DisassemblerMode fMode;

1847};

1848

1849} // namespace

1850

1851X86GenericDisassembler::X86GenericDisassembler(

1852 const MCSubtargetInfo &STI,

1853 MCContext &Ctx,

1854 std::unique_ptr<const MCInstrInfo> MII)

1855 : MCDisassembler(STI, Ctx), MII(std::move(MII)) {

1856 const FeatureBitset &FB = STI.getFeatureBits();

1857 if (FB[X86::Is16Bit]) {

1858 fMode = MODE_16BIT;

1859 return;

1860 } else if (FB[X86::Is32Bit]) {

1861 fMode = MODE_32BIT;

1862 return;

1863 } else if (FB[X86::Is64Bit]) {

1864 fMode = MODE_64BIT;

1865 return;

1866 }

1867

1868 llvm_unreachable("Invalid CPU mode");

1869}

1870

1871MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(

1872 MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,

1873 raw_ostream &CStream) const {

1874 CommentStream = &CStream;

1875

1876 InternalInstruction Insn;

1877 memset(&Insn, 0, sizeof(InternalInstruction));

1878 Insn.bytes = Bytes;

1879 Insn.startLocation = Address;

1880 Insn.readerCursor = Address;

1881 Insn.mode = fMode;

1882

1883 if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) ||

1884 getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 ||

1885 readOperands(&Insn)) {

1886 Size = Insn.readerCursor - Address;

1887 return Fail;

1888 }

1889

1890 Insn.operands = x86OperandSets[Insn.spec->operands];

1891 Insn.length = Insn.readerCursor - Insn.startLocation;

1892 Size = Insn.length;

1893 if (Size > 15)

1894 LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");

1895

1896 bool Ret = translateInstruction(Instr, Insn, this);

1897 if (!Ret) {

1898 unsigned Flags = X86::IP_NO_PREFIX;

1899 if (Insn.hasAdSize)

1900 Flags |= X86::IP_HAS_AD_SIZE;

1901 if (!Insn.mandatoryPrefix) {

1902 if (Insn.hasOpSize)

1903 Flags |= X86::IP_HAS_OP_SIZE;

1904 if (Insn.repeatPrefix == 0xf2)

1905 Flags |= X86::IP_HAS_REPEAT_NE;

1906 else if (Insn.repeatPrefix == 0xf3 &&

1907 // It should not be 'pause' f3 90

1908 Insn.opcode != 0x90)

1909 Flags |= X86::IP_HAS_REPEAT;

1910 if (Insn.hasLockPrefix)

1911 Flags |= X86::IP_HAS_LOCK;

1912 }

1913 Instr.setFlags(Flags);

1914 }

1915 return (!Ret) ? Success : Fail;

1916}

1917

1918//

1919// Private code that translates from struct InternalInstructions to MCInsts.

1920//

1921

1922/// translateRegister - Translates an internal register to the appropriate LLVM

1923/// register, and appends it as an operand to an MCInst.

1924///

1925/// @param mcInst - The MCInst to append to.

1926/// @param reg - The Reg to append.

1927 static void translateRegister(MCInst &mcInst, Reg reg) {

1928#define ENTRY(x) X86::x,

1929 static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};

1930#undef ENTRY

1931

1932 MCPhysReg llvmRegnum = llvmRegnums[reg];

1933 mcInst.addOperand(MCOperand::createReg(llvmRegnum));

1934}

1935

1936 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {

1937 0, // SEG_OVERRIDE_NONE

1938 X86::CS,

1939 X86::SS,

1940 X86::DS,

1941 X86::ES,

1942 X86::FS,

1943 X86::GS

1944};

1945

1946/// translateSrcIndex - Appends a source index operand to an MCInst.

1947///

1948/// @param mcInst - The MCInst to append to.

1949/// @param insn - The internal instruction.

1950 static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {

1951 unsigned baseRegNo;

1952

1953 if (insn.mode == MODE_64BIT)

1954 baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;

1955 else if (insn.mode == MODE_32BIT)

1956 baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;

1957 else {

1958 assert(insn.mode == MODE_16BIT);

1959 baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;

1960 }

1961 MCOperand baseReg = MCOperand::createReg(baseRegNo);

1962 mcInst.addOperand(baseReg);

1963

1964 MCOperand segmentReg;

1965 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);

1966 mcInst.addOperand(segmentReg);

1967 return false;

1968}

1969

1970/// translateDstIndex - Appends a destination index operand to an MCInst.

1971///

1972/// @param mcInst - The MCInst to append to.

1973/// @param insn - The internal instruction.

1974

1975 static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {

1976 unsigned baseRegNo;

1977

1978 if (insn.mode == MODE_64BIT)

1979 baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;

1980 else if (insn.mode == MODE_32BIT)

1981 baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;

1982 else {

1983 assert(insn.mode == MODE_16BIT);

1984 baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;

1985 }

1986 MCOperand baseReg = MCOperand::createReg(baseRegNo);

1987 mcInst.addOperand(baseReg);

1988 return false;

1989}

1990

1991/// translateImmediate - Appends an immediate operand to an MCInst.

1992///

1993/// @param mcInst - The MCInst to append to.

1994/// @param immediate - The immediate value to append.

1995/// @param operand - The operand, as stored in the descriptor table.

1996/// @param insn - The internal instruction.

1997 static void translateImmediate(MCInst &mcInst, uint64_t immediate,

1998 const OperandSpecifier &operand,

1999 InternalInstruction &insn,

2000 const MCDisassembler *Dis) {

2001 // Sign-extend the immediate if necessary.

2002

2003 OperandType type = (OperandType)operand.type;

2004

2005 bool isBranch = false;

2006 uint64_t pcrel = 0;

2007 if (type == TYPE_REL) {

2008 isBranch = true;

2009 pcrel = insn.startLocation + insn.length;

2010 switch (operand.encoding) {

2011 default:

2012 break;

2013 case ENCODING_Iv:

2014 switch (insn.displacementSize) {

2015 default:

2016 break;

2017 case 1:

2018 if(immediate & 0x80)

2019 immediate |= ~(0xffull);

2020 break;

2021 case 2:

2022 if(immediate & 0x8000)

2023 immediate |= ~(0xffffull);

2024 break;

2025 case 4:

2026 if(immediate & 0x80000000)

2027 immediate |= ~(0xffffffffull);

2028 break;

2029 case 8:

2030 break;

2031 }

2032 break;

2033 case ENCODING_IB:

2034 if(immediate & 0x80)

2035 immediate |= ~(0xffull);

2036 break;

2037 case ENCODING_IW:

2038 if(immediate & 0x8000)

2039 immediate |= ~(0xffffull);

2040 break;

2041 case ENCODING_ID:

2042 if(immediate & 0x80000000)

2043 immediate |= ~(0xffffffffull);

2044 break;

2045 }

2046 }

2047 // By default sign-extend all X86 immediates based on their encoding.

2048 else if (type == TYPE_IMM) {

2049 switch (operand.encoding) {

2050 default:

2051 break;

2052 case ENCODING_IB:

2053 if(immediate & 0x80)

2054 immediate |= ~(0xffull);

2055 break;

2056 case ENCODING_IW:

2057 if(immediate & 0x8000)

2058 immediate |= ~(0xffffull);

2059 break;

2060 case ENCODING_ID:

2061 if(immediate & 0x80000000)

2062 immediate |= ~(0xffffffffull);

2063 break;

2064 case ENCODING_IO:

2065 break;

2066 }

2067 }

2068

2069 switch (type) {

2070 case TYPE_XMM:

2071 mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));

2072 return;

2073 case TYPE_YMM:

2074 mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));

2075 return;

2076 case TYPE_ZMM:

2077 mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));

2078 return;

2079 default:

2080 // operand is 64 bits wide. Do nothing.

2081 break;

2082 }

2083

2084 if (!Dis->tryAddingSymbolicOperand(

2085 mcInst, immediate + pcrel, insn.startLocation, isBranch,

2086 insn.immediateOffset, insn.immediateSize, insn.length))

2087 mcInst.addOperand(MCOperand::createImm(immediate));

2088

2089 if (type == TYPE_MOFFS) {

2090 MCOperand segmentReg;

2091 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);

2092 mcInst.addOperand(segmentReg);

2093 }

2094}

2095

2096/// translateRMRegister - Translates a register stored in the R/M field of the

2097/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.

2098/// @param mcInst - The MCInst to append to.

2099/// @param insn - The internal instruction to extract the R/M field

2100/// from.

2101/// @return - 0 on success; -1 otherwise

2102 static bool translateRMRegister(MCInst &mcInst,

2103 InternalInstruction &insn) {

2104 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {

2105 debug("A R/M register operand may not have a SIB byte");

2106 return true;

2107 }

2108

2109 switch (insn.eaBase) {

2110 default:

2111 debug("Unexpected EA base register");

2112 return true;

2113 case EA_BASE_NONE:

2114 debug("EA_BASE_NONE for ModR/M base");

2115 return true;

2116#define ENTRY(x) case EA_BASE_##x:

2117 ALL_EA_BASES

2118#undef ENTRY

2119 debug("A R/M register operand may not have a base; "

2120 "the operand must be a register.");

2121 return true;

2122#define ENTRY(x) \

2123 case EA_REG_##x: \

2124 mcInst.addOperand(MCOperand::createReg(X86::x)); break;

2125 ALL_REGS

2126#undef ENTRY

2127 }

2128

2129 return false;

2130}

2131

2132/// translateRMMemory - Translates a memory operand stored in the Mod and R/M

2133/// fields of an internal instruction (and possibly its SIB byte) to a memory

2134/// operand in LLVM's format, and appends it to an MCInst.

2135///

2136/// @param mcInst - The MCInst to append to.

2137/// @param insn - The instruction to extract Mod, R/M, and SIB fields

2138/// from.

2139/// @param ForceSIB - The instruction must use SIB.

2140/// @return - 0 on success; nonzero otherwise

2141 static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,

2142 const MCDisassembler *Dis,

2143 bool ForceSIB = false) {

2144 // Addresses in an MCInst are represented as five operands:

2145 // 1. basereg (register) The R/M base, or (if there is a SIB) the

2146 // SIB base

2147 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified

2148 // scale amount

2149 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)

2150 // the index (which is multiplied by the

2151 // scale amount)

2152 // 4. displacement (immediate) 0, or the displacement if there is one

2153 // 5. segmentreg (register) x86_registerNONE for now, but could be set

2154 // if we have segment overrides

2155

2156 MCOperand baseReg;

2157 MCOperand scaleAmount;

2158 MCOperand indexReg;

2159 MCOperand displacement;

2160 MCOperand segmentReg;

2161 uint64_t pcrel = 0;

2162

2163 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {

2164 if (insn.sibBase != SIB_BASE_NONE) {

2165 switch (insn.sibBase) {

2166 default:

2167 debug("Unexpected sibBase");

2168 return true;

2169#define ENTRY(x) \

2170 case SIB_BASE_##x: \

2171 baseReg = MCOperand::createReg(X86::x); break;

2172 ALL_SIB_BASES

2173#undef ENTRY

2174 }

2175 } else {

2176 baseReg = MCOperand::createReg(X86::NoRegister);

2177 }

2178

2179 if (insn.sibIndex != SIB_INDEX_NONE) {

2180 switch (insn.sibIndex) {

2181 default:

2182 debug("Unexpected sibIndex");

2183 return true;

2184#define ENTRY(x) \

2185 case SIB_INDEX_##x: \

2186 indexReg = MCOperand::createReg(X86::x); break;

2187 EA_BASES_32BIT

2188 EA_BASES_64BIT

2189 REGS_XMM

2190 REGS_YMM

2191 REGS_ZMM

2192#undef ENTRY

2193 }

2194 } else {

2195 // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,

2196 // but no index is used and modrm alone should have been enough.

2197 // -No base register in 32-bit mode. In 64-bit mode this is used to

2198 // avoid rip-relative addressing.

2199 // -Any base register used other than ESP/RSP/R12D/R12. Using these as a

2200 // base always requires a SIB byte.

2201 // -A scale other than 1 is used.

2202 if (!ForceSIB &&

2203 (insn.sibScale != 1 ||

2204 (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||

2205 (insn.sibBase != SIB_BASE_NONE &&

2206 insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&

2207 insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {

2208 indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ :

2209 X86::RIZ);

2210 } else

2211 indexReg = MCOperand::createReg(X86::NoRegister);

2212 }

2213

2214 scaleAmount = MCOperand::createImm(insn.sibScale);

2215 } else {

2216 switch (insn.eaBase) {

2217 case EA_BASE_NONE:

2218 if (insn.eaDisplacement == EA_DISP_NONE) {

2219 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");

2220 return true;

2221 }

2222 if (insn.mode == MODE_64BIT){

2223 pcrel = insn.startLocation + insn.length;

2224 Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel,

2225 insn.startLocation +

2226 insn.displacementOffset);

2227 // Section 2.2.1.6

2228 baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP :

2229 X86::RIP);

2230 }

2231 else

2232 baseReg = MCOperand::createReg(X86::NoRegister);

2233

2234 indexReg = MCOperand::createReg(X86::NoRegister);

2235 break;

2236 case EA_BASE_BX_SI:

2237 baseReg = MCOperand::createReg(X86::BX);

2238 indexReg = MCOperand::createReg(X86::SI);

2239 break;

2240 case EA_BASE_BX_DI:

2241 baseReg = MCOperand::createReg(X86::BX);

2242 indexReg = MCOperand::createReg(X86::DI);

2243 break;

2244 case EA_BASE_BP_SI:

2245 baseReg = MCOperand::createReg(X86::BP);

2246 indexReg = MCOperand::createReg(X86::SI);

2247 break;

2248 case EA_BASE_BP_DI:

2249 baseReg = MCOperand::createReg(X86::BP);

2250 indexReg = MCOperand::createReg(X86::DI);

2251 break;

2252 default:

2253 indexReg = MCOperand::createReg(X86::NoRegister);

2254 switch (insn.eaBase) {

2255 default:

2256 debug("Unexpected eaBase");

2257 return true;

2258 // Here, we will use the fill-ins defined above. However,

2259 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and

2260 // sib and sib64 were handled in the top-level if, so they're only

2261 // placeholders to keep the compiler happy.

2262#define ENTRY(x) \

2263 case EA_BASE_##x: \

2264 baseReg = MCOperand::createReg(X86::x); break;

2265 ALL_EA_BASES

2266#undef ENTRY

2267#define ENTRY(x) case EA_REG_##x:

2268 ALL_REGS

2269#undef ENTRY

2270 debug("A R/M memory operand may not be a register; "

2271 "the base field must be a base.");

2272 return true;

2273 }

2274 }

2275

2276 scaleAmount = MCOperand::createImm(1);

2277 }

2278

2279 displacement = MCOperand::createImm(insn.displacement);

2280

2281 segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);

2282

2283 mcInst.addOperand(baseReg);

2284 mcInst.addOperand(scaleAmount);

2285 mcInst.addOperand(indexReg);

2286

2287 const uint8_t dispSize =

2288 (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize;

2289

2290 if (!Dis->tryAddingSymbolicOperand(

2291 mcInst, insn.displacement + pcrel, insn.startLocation, false,

2292 insn.displacementOffset, dispSize, insn.length))

2293 mcInst.addOperand(displacement);

2294 mcInst.addOperand(segmentReg);

2295 return false;

2296}

2297

2298/// translateRM - Translates an operand stored in the R/M (and possibly SIB)

2299/// byte of an instruction to LLVM form, and appends it to an MCInst.

2300///

2301/// @param mcInst - The MCInst to append to.

2302/// @param operand - The operand, as stored in the descriptor table.

2303/// @param insn - The instruction to extract Mod, R/M, and SIB fields

2304/// from.

2305/// @return - 0 on success; nonzero otherwise

2306 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,

2307 InternalInstruction &insn, const MCDisassembler *Dis) {

2308 switch (operand.type) {

2309 default:

2310 debug("Unexpected type for a R/M operand");

2311 return true;

2312 case TYPE_R8:

2313 case TYPE_R16:

2314 case TYPE_R32:

2315 case TYPE_R64:

2316 case TYPE_Rv:

2317 case TYPE_MM64:

2318 case TYPE_XMM:

2319 case TYPE_YMM:

2320 case TYPE_ZMM:

2321 case TYPE_TMM:

2322 case TYPE_VK_PAIR:

2323 case TYPE_VK:

2324 case TYPE_DEBUGREG:

2325 case TYPE_CONTROLREG:

2326 case TYPE_BNDR:

2327 return translateRMRegister(mcInst, insn);

2328 case TYPE_M:

2329 case TYPE_MVSIBX:

2330 case TYPE_MVSIBY:

2331 case TYPE_MVSIBZ:

2332 return translateRMMemory(mcInst, insn, Dis);

2333 case TYPE_MSIB:

2334 return translateRMMemory(mcInst, insn, Dis, true);

2335 }

2336}

2337

2338/// translateFPRegister - Translates a stack position on the FPU stack to its

2339/// LLVM form, and appends it to an MCInst.

2340///

2341/// @param mcInst - The MCInst to append to.

2342/// @param stackPos - The stack position to translate.

2343 static void translateFPRegister(MCInst &mcInst,

2344 uint8_t stackPos) {

2345 mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos));

2346}

2347

2348/// translateMaskRegister - Translates a 3-bit mask register number to

2349/// LLVM form, and appends it to an MCInst.

2350///

2351/// @param mcInst - The MCInst to append to.

2352/// @param maskRegNum - Number of mask register from 0 to 7.

2353/// @return - false on success; true otherwise.

2354 static bool translateMaskRegister(MCInst &mcInst,

2355 uint8_t maskRegNum) {

2356 if (maskRegNum >= 8) {

2357 debug("Invalid mask register number");

2358 return true;

2359 }

2360

2361 mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum));

2362 return false;

2363}

2364

2365/// translateOperand - Translates an operand stored in an internal instruction

2366/// to LLVM's format and appends it to an MCInst.

2367///

2368/// @param mcInst - The MCInst to append to.

2369/// @param operand - The operand, as stored in the descriptor table.

2370/// @param insn - The internal instruction.

2371/// @return - false on success; true otherwise.

2372 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,

2373 InternalInstruction &insn,

2374 const MCDisassembler *Dis) {

2375 switch (operand.encoding) {

2376 default:

2377 debug("Unhandled operand encoding during translation");

2378 return true;

2379 case ENCODING_REG:

2380 translateRegister(mcInst, insn.reg);

2381 return false;

2382 case ENCODING_WRITEMASK:

2383 return translateMaskRegister(mcInst, insn.writemask);

2384 case ENCODING_SIB:

2385 CASE_ENCODING_RM:

2386 CASE_ENCODING_VSIB:

2387 return translateRM(mcInst, operand, insn, Dis);

2388 case ENCODING_IB:

2389 case ENCODING_IW:

2390 case ENCODING_ID:

2391 case ENCODING_IO:

2392 case ENCODING_Iv:

2393 case ENCODING_Ia:

2394 translateImmediate(mcInst,

2395 insn.immediates[insn.numImmediatesTranslated++],

2396 operand,

2397 insn,

2398 Dis);

2399 return false;

2400 case ENCODING_IRC:

2401 mcInst.addOperand(MCOperand::createImm(insn.RC));

2402 return false;

2403 case ENCODING_SI:

2404 return translateSrcIndex(mcInst, insn);

2405 case ENCODING_DI:

2406 return translateDstIndex(mcInst, insn);

2407 case ENCODING_RB:

2408 case ENCODING_RW:

2409 case ENCODING_RD:

2410 case ENCODING_RO:

2411 case ENCODING_Rv:

2412 translateRegister(mcInst, insn.opcodeRegister);

2413 return false;

2414 case ENCODING_CF:

2415 mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));

2416 return false;

2417 case ENCODING_CC:

2418 if (isCCMPOrCTEST(&insn))

2419 mcInst.addOperand(MCOperand::createImm(insn.immediates[2]));

2420 else

2421 mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));

2422 return false;

2423 case ENCODING_FP:

2424 translateFPRegister(mcInst, insn.modRM & 7);

2425 return false;

2426 case ENCODING_VVVV:

2427 translateRegister(mcInst, insn.vvvv);

2428 return false;

2429 case ENCODING_DUP:

2430 return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],

2431 insn, Dis);

2432 }

2433}

2434

2435/// translateInstruction - Translates an internal instruction and all its

2436/// operands to an MCInst.

2437///

2438/// @param mcInst - The MCInst to populate with the instruction's data.

2439/// @param insn - The internal instruction.

2440/// @return - false on success; true otherwise.

2441 static bool translateInstruction(MCInst &mcInst,

2442 InternalInstruction &insn,

2443 const MCDisassembler *Dis) {

2444 if (!insn.spec) {

2445 debug("Instruction has no specification");

2446 return true;

2447 }

2448

2449 mcInst.clear();

2450 mcInst.setOpcode(insn.instructionID);

2451 // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3

2452 // prefix bytes should be disassembled as xrelease and xacquire then set the

2453 // opcode to those instead of the rep and repne opcodes.

2454 if (insn.xAcquireRelease) {

2455 if(mcInst.getOpcode() == X86::REP_PREFIX)

2456 mcInst.setOpcode(X86::XRELEASE_PREFIX);

2457 else if(mcInst.getOpcode() == X86::REPNE_PREFIX)

2458 mcInst.setOpcode(X86::XACQUIRE_PREFIX);

2459 }

2460

2461 insn.numImmediatesTranslated = 0;

2462

2463 for (const auto &Op : insn.operands) {

2464 if (Op.encoding != ENCODING_NONE) {

2465 if (translateOperand(mcInst, Op, insn, Dis)) {

2466 return true;

2467 }

2468 }

2469 }

2470

2471 return false;

2472}

2473

2474 static MCDisassembler *createX86Disassembler(const Target &T,

2475 const MCSubtargetInfo &STI,

2476 MCContext &Ctx) {

2477 std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());

2478 return new X86GenericDisassembler(STI, Ctx, std::move(MII));

2479}

2480

2481 extern "C" LLVM_C_ABI void LLVMInitializeX86Disassembler() {

2482 // Register the disassembler.

2483 TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(),

2484 createX86Disassembler);

2485 TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(),

2486 createX86Disassembler);

2487}

Fail

#define Fail

Definition AArch64Disassembler.cpp:43

DecodeStatus

MCDisassembler::DecodeStatus DecodeStatus

Definition AArch64Disassembler.cpp:36

assert

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const

aarch64 promote const

Definition AArch64PromoteConstant.cpp:228

instr

@ instr

Definition AArch64StackTagging.cpp:85

Format.h

op

#define op(i)

InlinePriorityMode::Size

@ Size

Definition InlineOrder.cpp:25

T

#define T

Definition Mips16ISelLowering.cpp:353

isBranch

static bool isBranch(unsigned Opcode)

Definition R600InstrInfo.cpp:632

name

static const char * name

Definition SMEABIPass.cpp:52

Debug.h

LLVM_DEBUG

#define LLVM_DEBUG(...)

Definition Debug.h:114

TargetRegistry.h

Visibility.h

LLVM_C_ABI

#define LLVM_C_ABI

LLVM_C_ABI is the export/visibility macro used to mark symbols declared in llvm-c as exported when bu...

Definition Visibility.h:40

readOpcode

static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx)

Definition WasmObjectFile.cpp:171

nextByte

static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)

Definition WebAssemblyDisassembler.cpp:78

isPrefix

static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII)

Check if the instruction is a prefix.

Definition X86AsmBackend.cpp:264

X86BaseInfo.h

TWOBYTE_SYM

#define TWOBYTE_SYM

Definition X86DisassemblerDecoderCommon.h:26

MAP4_SYM

#define MAP4_SYM

Definition X86DisassemblerDecoderCommon.h:33

CASE_ENCODING_VSIB

#define CASE_ENCODING_VSIB

Definition X86DisassemblerDecoderCommon.h:432

XOP9_MAP_SYM

#define XOP9_MAP_SYM

Definition X86DisassemblerDecoderCommon.h:30

CASE_ENCODING_RM

#define CASE_ENCODING_RM

Definition X86DisassemblerDecoderCommon.h:423

THREEDNOW_MAP_SYM

#define THREEDNOW_MAP_SYM

Definition X86DisassemblerDecoderCommon.h:32

INSTRUCTIONS_SYM

#define INSTRUCTIONS_SYM

Definition X86DisassemblerDecoderCommon.h:23

THREEBYTE3A_SYM

#define THREEBYTE3A_SYM

Definition X86DisassemblerDecoderCommon.h:28

XOP8_MAP_SYM

#define XOP8_MAP_SYM

Definition X86DisassemblerDecoderCommon.h:29

THREEBYTE38_SYM

#define THREEBYTE38_SYM

Definition X86DisassemblerDecoderCommon.h:27

XOPA_MAP_SYM

#define XOPA_MAP_SYM

Definition X86DisassemblerDecoderCommon.h:31

MAP6_SYM

#define MAP6_SYM

Definition X86DisassemblerDecoderCommon.h:35

MAP7_SYM

#define MAP7_SYM

Definition X86DisassemblerDecoderCommon.h:36

ONEBYTE_SYM

#define ONEBYTE_SYM

Definition X86DisassemblerDecoderCommon.h:25

MAP5_SYM

#define MAP5_SYM

Definition X86DisassemblerDecoderCommon.h:34

X86DisassemblerDecoder.h

rFromEVEX2of4

#define rFromEVEX2of4(evex)

Definition X86DisassemblerDecoder.h:92

lFromEVEX4of4

#define lFromEVEX4of4(evex)

Definition X86DisassemblerDecoder.h:105

l2FromEVEX4of4

#define l2FromEVEX4of4(evex)

Definition X86DisassemblerDecoder.h:104

rFromVEX2of3

#define rFromVEX2of3(vex)

Definition X86DisassemblerDecoder.h:83

zFromEVEX4of4

#define zFromEVEX4of4(evex)

Definition X86DisassemblerDecoder.h:103

wFromREX2

#define wFromREX2(rex2)

Definition X86DisassemblerDecoder.h:64

rFromREX

#define rFromREX(rex)

Definition X86DisassemblerDecoder.h:56

bFromXOP2of3

#define bFromXOP2of3(xop)

Definition X86DisassemblerDecoder.h:71

xFromVEX2of3

#define xFromVEX2of3(vex)

Definition X86DisassemblerDecoder.h:84

mmmmmFromVEX2of3

#define mmmmmFromVEX2of3(vex)

Definition X86DisassemblerDecoder.h:86

rmFromModRM

#define rmFromModRM(modRM)

Definition X86DisassemblerDecoder.h:49

bFromREX2

#define bFromREX2(rex2)

Definition X86DisassemblerDecoder.h:67

baseFromSIB

#define baseFromSIB(sib)

Definition X86DisassemblerDecoder.h:53

bFromEVEX4of4

#define bFromEVEX4of4(evex)

Definition X86DisassemblerDecoder.h:106

rFromVEX2of2

#define rFromVEX2of2(vex)

Definition X86DisassemblerDecoder.h:78

ppFromEVEX3of4

#define ppFromEVEX3of4(evex)

Definition X86DisassemblerDecoder.h:101

v2FromEVEX4of4

#define v2FromEVEX4of4(evex)

Definition X86DisassemblerDecoder.h:107

modFromModRM

#define modFromModRM(modRM)

Definition X86DisassemblerDecoder.h:47

rFromXOP2of3

#define rFromXOP2of3(xop)

Definition X86DisassemblerDecoder.h:69

wFromREX

#define wFromREX(rex)

Definition X86DisassemblerDecoder.h:55

lFromXOP3of3

#define lFromXOP3of3(xop)

Definition X86DisassemblerDecoder.h:75

EA_BASES_64BIT

#define EA_BASES_64BIT

Definition X86DisassemblerDecoder.h:287

lFromVEX2of2

#define lFromVEX2of2(vex)

Definition X86DisassemblerDecoder.h:80

REGS_YMM

#define REGS_YMM

Definition X86DisassemblerDecoder.h:399

x2FromREX2

#define x2FromREX2(rex2)

Definition X86DisassemblerDecoder.h:62

scFromEVEX4of4

#define scFromEVEX4of4(evex)

Definition X86DisassemblerDecoder.h:110

scaleFromSIB

#define scaleFromSIB(sib)

Definition X86DisassemblerDecoder.h:51

REGS_XMM

#define REGS_XMM

Definition X86DisassemblerDecoder.h:365

rFromREX2

#define rFromREX2(rex2)

Definition X86DisassemblerDecoder.h:65

regFromModRM

#define regFromModRM(modRM)

Definition X86DisassemblerDecoder.h:48

b2FromEVEX2of4

#define b2FromEVEX2of4(evex)

Definition X86DisassemblerDecoder.h:96

b2FromREX2

#define b2FromREX2(rex2)

Definition X86DisassemblerDecoder.h:63

vvvvFromVEX2of2

#define vvvvFromVEX2of2(vex)

Definition X86DisassemblerDecoder.h:79

nfFromEVEX4of4

#define nfFromEVEX4of4(evex)

Definition X86DisassemblerDecoder.h:109

ALL_REGS

#define ALL_REGS

Definition X86DisassemblerDecoder.h:547

ppFromXOP3of3

#define ppFromXOP3of3(xop)

Definition X86DisassemblerDecoder.h:76

ALL_SIB_BASES

#define ALL_SIB_BASES

Definition X86DisassemblerDecoder.h:543

vvvvFromVEX3of3

#define vvvvFromVEX3of3(vex)

Definition X86DisassemblerDecoder.h:88

r2FromEVEX2of4

#define r2FromEVEX2of4(evex)

Definition X86DisassemblerDecoder.h:95

uFromEVEX3of4

#define uFromEVEX3of4(evex)

Definition X86DisassemblerDecoder.h:100

xFromREX2

#define xFromREX2(rex2)

Definition X86DisassemblerDecoder.h:66

EA_BASES_32BIT

#define EA_BASES_32BIT

Definition X86DisassemblerDecoder.h:219

xFromXOP2of3

#define xFromXOP2of3(xop)

Definition X86DisassemblerDecoder.h:70

wFromEVEX3of4

#define wFromEVEX3of4(evex)

Definition X86DisassemblerDecoder.h:98

bFromVEX2of3

#define bFromVEX2of3(vex)

Definition X86DisassemblerDecoder.h:85

wFromVEX3of3

#define wFromVEX3of3(vex)

Definition X86DisassemblerDecoder.h:87

mmmmmFromXOP2of3

#define mmmmmFromXOP2of3(xop)

Definition X86DisassemblerDecoder.h:72

aaaFromEVEX4of4

#define aaaFromEVEX4of4(evex)

Definition X86DisassemblerDecoder.h:108

lFromVEX3of3

#define lFromVEX3of3(vex)

Definition X86DisassemblerDecoder.h:89

mmmFromEVEX2of4

#define mmmFromEVEX2of4(evex)

Definition X86DisassemblerDecoder.h:97

ppFromVEX3of3

#define ppFromVEX3of3(vex)

Definition X86DisassemblerDecoder.h:90

bFromEVEX2of4

#define bFromEVEX2of4(evex)

Definition X86DisassemblerDecoder.h:94

xFromEVEX2of4

#define xFromEVEX2of4(evex)

Definition X86DisassemblerDecoder.h:93

REGS_ZMM

#define REGS_ZMM

Definition X86DisassemblerDecoder.h:433

ppFromVEX2of2

#define ppFromVEX2of2(vex)

Definition X86DisassemblerDecoder.h:81

indexFromSIB

#define indexFromSIB(sib)

Definition X86DisassemblerDecoder.h:52

ALL_EA_BASES

#define ALL_EA_BASES

Definition X86DisassemblerDecoder.h:538

mFromREX2

#define mFromREX2(rex2)

Definition X86DisassemblerDecoder.h:60

vvvvFromXOP3of3

#define vvvvFromXOP3of3(xop)

Definition X86DisassemblerDecoder.h:74

wFromXOP3of3

#define wFromXOP3of3(xop)

Definition X86DisassemblerDecoder.h:73

r2FromREX2

#define r2FromREX2(rex2)

Definition X86DisassemblerDecoder.h:61

oszcFromEVEX3of4

#define oszcFromEVEX3of4(evex)

Definition X86DisassemblerDecoder.h:102

vvvvFromEVEX3of4

#define vvvvFromEVEX3of4(evex)

Definition X86DisassemblerDecoder.h:99

xFromREX

#define xFromREX(rex)

Definition X86DisassemblerDecoder.h:57

bFromREX

#define bFromREX(rex)

Definition X86DisassemblerDecoder.h:58

translateRegister

static void translateRegister(MCInst &mcInst, Reg reg)

translateRegister - Translates an internal register to the appropriate LLVM register,...

Definition X86Disassembler.cpp:1927

isREX2

static bool isREX2(struct InternalInstruction *insn, uint8_t prefix)

Definition X86Disassembler.cpp:213

getInstructionID

static int getInstructionID(struct InternalInstruction *insn, const MCInstrInfo *mii)

Definition X86Disassembler.cpp:1194

readOpcode

static bool readOpcode(struct InternalInstruction *insn)

Definition X86Disassembler.cpp:925

createX86Disassembler

static MCDisassembler * createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)

Definition X86Disassembler.cpp:2474

translateMaskRegister

static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum)

translateMaskRegister - Translates a 3-bit mask register number to LLVM form, and appends it to an MC...

Definition X86Disassembler.cpp:2354

translateDstIndex

static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn)

translateDstIndex - Appends a destination index operand to an MCInst.

Definition X86Disassembler.cpp:1975

translateImmediate

static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)

translateImmediate - Appends an immediate operand to an MCInst.

Definition X86Disassembler.cpp:1997

readOperands

static int readOperands(struct InternalInstruction *insn)

Definition X86Disassembler.cpp:1632

translateFPRegister

static void translateFPRegister(MCInst &mcInst, uint8_t stackPos)

translateFPRegister - Translates a stack position on the FPU stack to its LLVM form,...

Definition X86Disassembler.cpp:2343

is64Bit

static bool is64Bit(const char *name)

Definition X86Disassembler.cpp:1082

segmentRegnums

static const uint8_t segmentRegnums[SEG_OVERRIDE_max]

Definition X86Disassembler.cpp:1936

readImmediate

static int readImmediate(struct InternalInstruction *insn, uint8_t size)

Definition X86Disassembler.cpp:1548

getInstructionIDWithAttrMask

static int getInstructionIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)

Definition X86Disassembler.cpp:1093

readSIB

static int readSIB(struct InternalInstruction *insn)

Definition X86Disassembler.cpp:549

isREX

static bool isREX(struct InternalInstruction *insn, uint8_t prefix)

Definition X86Disassembler.cpp:209

readVVVV

static int readVVVV(struct InternalInstruction *insn)

Definition X86Disassembler.cpp:1592

isNF

static bool isNF(InternalInstruction *insn)

Definition X86Disassembler.cpp:1170

translateSrcIndex

static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn)

translateSrcIndex - Appends a source index operand to an MCInst.

Definition X86Disassembler.cpp:1950

GENERIC_FIXUP_FUNC

#define GENERIC_FIXUP_FUNC(name, base, prefix)

Definition X86Disassembler.cpp:781

readMaskRegister

static int readMaskRegister(struct InternalInstruction *insn)

Definition X86Disassembler.cpp:1619

translateRM

static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)

translateRM - Translates an operand stored in the R/M (and possibly SIB) byte of an instruction to LL...

Definition X86Disassembler.cpp:2306

decode

static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)

Definition X86Disassembler.cpp:124

readOpcodeRegister

static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)

Definition X86Disassembler.cpp:1504

readDisplacement

static int readDisplacement(struct InternalInstruction *insn)

Definition X86Disassembler.cpp:613

isCCMPOrCTEST

static bool isCCMPOrCTEST(InternalInstruction *insn)

Definition X86Disassembler.cpp:1151

LLVMInitializeX86Disassembler

LLVM_C_ABI void LLVMInitializeX86Disassembler()

Definition X86Disassembler.cpp:2481

fixupReg

static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)

Definition X86Disassembler.cpp:865

debug

#define debug(s)

Definition X86Disassembler.cpp:97

readModRM

static int readModRM(struct InternalInstruction *insn)

Definition X86Disassembler.cpp:644

is16BitEquivalent

static bool is16BitEquivalent(const char *orig, const char *equiv)

Definition X86Disassembler.cpp:1063

translateRMMemory

static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis, bool ForceSIB=false)

translateRMMemory - Translates a memory operand stored in the Mod and R/M fields of an internal instr...

Definition X86Disassembler.cpp:2141

translateInstruction

static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis)

translateInstruction - Translates an internal instruction and all its operands to an MCInst.

Definition X86Disassembler.cpp:2441

translateRMRegister

static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn)

translateRMRegister - Translates a register stored in the R/M field of the ModR/M byte to its LLVM eq...

Definition X86Disassembler.cpp:2102

translateOperand

static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)

translateOperand - Translates an operand stored in an internal instruction to LLVM's format and appen...

Definition X86Disassembler.cpp:2372

readPrefixes

static int readPrefixes(struct InternalInstruction *insn)

Definition X86Disassembler.cpp:222

peek

static bool peek(struct InternalInstruction *insn, uint8_t &byte)

Definition X86Disassembler.cpp:191

X86MCTargetDesc.h

X86TargetInfo.h

llvm::ArrayRef

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Definition ArrayRef.h:41

llvm::ArrayRef::size

size_t size() const

size - Get the array size.

Definition ArrayRef.h:143

llvm::ArrayRef::empty

bool empty() const

empty - Check if the array is empty.

Definition ArrayRef.h:138

llvm::MCContext

Context object for machine code objects.

Definition MCContext.h:83

llvm::MCDisassembler

Superclass for all disassemblers.

Definition MCDisassembler.h:85

llvm::MCDisassembler::tryAddingSymbolicOperand

bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) const

Definition MCDisassembler.cpp:28

llvm::MCDisassembler::tryAddingPcLoadReferenceComment

void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const

Definition MCDisassembler.cpp:41

llvm::MCDisassembler::DecodeStatus

DecodeStatus

Ternary decode status.

Definition MCDisassembler.h:109

llvm::MCInst

Instances of this class represent a single low-level machine instruction.

Definition MCInst.h:188

llvm::MCInst::getOpcode

unsigned getOpcode() const

Definition MCInst.h:202

llvm::MCInst::addOperand

void addOperand(const MCOperand Op)

Definition MCInst.h:215

llvm::MCInst::setOpcode

void setOpcode(unsigned Op)

Definition MCInst.h:201

llvm::MCInst::clear

void clear()

Definition MCInst.h:223

llvm::MCInstrInfo

Interface to description of machine instruction set.

Definition MCInstrInfo.h:27

llvm::MCInstrInfo::getName

StringRef getName(unsigned Opcode) const

Returns the name for the instructions with the given opcode.

Definition MCInstrInfo.h:97

llvm::MCOperand

Instances of this class represent operands of the MCInst class.

Definition MCInst.h:40

llvm::MCOperand::createReg

static MCOperand createReg(MCRegister Reg)

Definition MCInst.h:138

llvm::MCOperand::createImm

static MCOperand createImm(int64_t Val)

Definition MCInst.h:145

llvm::MCSubtargetInfo

Generic base class for all target subtargets.

Definition MCSubtargetInfo.h:77

llvm::MCSubtargetInfo::getFeatureBits

const FeatureBitset & getFeatureBits() const

Definition MCSubtargetInfo.h:115

llvm::StringRef

StringRef - Represent a constant reference to a string, i.e.

Definition StringRef.h:55

llvm::StringRef::data

constexpr const char * data() const

data - Get a pointer to the start of the string (which may not be null terminated).

Definition StringRef.h:140

llvm::Target

Target - Wrapper for Target specific information.

Definition TargetRegistry.h:146

llvm::raw_ostream

This class implements an extremely fast bulk output stream that can only output to a stream.

Definition raw_ostream.h:53

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Definition ErrorHandling.h:164

llvm::MipsISD::Ret

@ Ret

Definition MipsISelLowering.h:117

llvm::X86Disassembler

Definition X86DisassemblerDecoderCommon.h:21

llvm::X86Disassembler::EABase

EABase

All possible values of the base field for effective-address computations, a.k.a.

Definition X86DisassemblerDecoder.h:568

llvm::X86Disassembler::EA_BASE_NONE

@ EA_BASE_NONE

Definition X86DisassemblerDecoder.h:570

llvm::X86Disassembler::VEX_LOB_MAP4

@ VEX_LOB_MAP4

Definition X86DisassemblerDecoder.h:639

llvm::X86Disassembler::VEX_LOB_MAP6

@ VEX_LOB_MAP6

Definition X86DisassemblerDecoder.h:641

llvm::X86Disassembler::VEX_LOB_0F

@ VEX_LOB_0F

Definition X86DisassemblerDecoder.h:636

llvm::X86Disassembler::VEX_LOB_MAP5

@ VEX_LOB_MAP5

Definition X86DisassemblerDecoder.h:640

llvm::X86Disassembler::VEX_LOB_0F3A

@ VEX_LOB_0F3A

Definition X86DisassemblerDecoder.h:638

llvm::X86Disassembler::VEX_LOB_MAP7

@ VEX_LOB_MAP7

Definition X86DisassemblerDecoder.h:642

llvm::X86Disassembler::VEX_LOB_0F38

@ VEX_LOB_0F38

Definition X86DisassemblerDecoder.h:637

llvm::X86Disassembler::Reg

Reg

All possible values of the reg field in the ModR/M byte.

Definition X86DisassemblerDecoder.h:614

llvm::X86Disassembler::DisassemblerMode

DisassemblerMode

Decoding mode for the Intel disassembler.

Definition X86DisassemblerDecoderCommon.h:541

llvm::X86Disassembler::MODE_64BIT

@ MODE_64BIT

Definition X86DisassemblerDecoderCommon.h:541

llvm::X86Disassembler::MODE_16BIT

@ MODE_16BIT

Definition X86DisassemblerDecoderCommon.h:541

llvm::X86Disassembler::MODE_32BIT

@ MODE_32BIT

Definition X86DisassemblerDecoderCommon.h:541

llvm::X86Disassembler::InstrUID

uint16_t InstrUID

Definition X86DisassemblerDecoderCommon.h:394

llvm::X86Disassembler::ATTR_OPSIZE

@ ATTR_OPSIZE

Definition X86DisassemblerDecoderCommon.h:62

llvm::X86Disassembler::ATTR_EVEXKZ

@ ATTR_EVEXKZ

Definition X86DisassemblerDecoderCommon.h:69

llvm::X86Disassembler::ATTR_64BIT

@ ATTR_64BIT

Definition X86DisassemblerDecoderCommon.h:58

llvm::X86Disassembler::ATTR_REX2

@ ATTR_REX2

Definition X86DisassemblerDecoderCommon.h:71

llvm::X86Disassembler::ATTR_EVEXU

@ ATTR_EVEXU

Definition X86DisassemblerDecoderCommon.h:73

llvm::X86Disassembler::ATTR_NONE

@ ATTR_NONE

Definition X86DisassemblerDecoderCommon.h:57

llvm::X86Disassembler::ATTR_EVEX

@ ATTR_EVEX

Definition X86DisassemblerDecoderCommon.h:66

llvm::X86Disassembler::ATTR_EVEXL2

@ ATTR_EVEXL2

Definition X86DisassemblerDecoderCommon.h:67

llvm::X86Disassembler::ATTR_VEX

@ ATTR_VEX

Definition X86DisassemblerDecoderCommon.h:64

llvm::X86Disassembler::ATTR_EVEXK

@ ATTR_EVEXK

Definition X86DisassemblerDecoderCommon.h:68

llvm::X86Disassembler::ATTR_EVEXB

@ ATTR_EVEXB

Definition X86DisassemblerDecoderCommon.h:70

llvm::X86Disassembler::ATTR_EVEXNF

@ ATTR_EVEXNF

Definition X86DisassemblerDecoderCommon.h:72

llvm::X86Disassembler::ATTR_REXW

@ ATTR_REXW

Definition X86DisassemblerDecoderCommon.h:61

llvm::X86Disassembler::ATTR_XS

@ ATTR_XS

Definition X86DisassemblerDecoderCommon.h:59

llvm::X86Disassembler::ATTR_ADSIZE

@ ATTR_ADSIZE

Definition X86DisassemblerDecoderCommon.h:63

llvm::X86Disassembler::ATTR_XD

@ ATTR_XD

Definition X86DisassemblerDecoderCommon.h:60

llvm::X86Disassembler::ATTR_VEXL

@ ATTR_VEXL

Definition X86DisassemblerDecoderCommon.h:65

llvm::X86Disassembler::TYPE_XOP

@ TYPE_XOP

Definition X86DisassemblerDecoder.h:664

llvm::X86Disassembler::TYPE_VEX_2B

@ TYPE_VEX_2B

Definition X86DisassemblerDecoder.h:661

llvm::X86Disassembler::TYPE_NO_VEX_XOP

@ TYPE_NO_VEX_XOP

Definition X86DisassemblerDecoder.h:660

llvm::X86Disassembler::TYPE_VEX_3B

@ TYPE_VEX_3B

Definition X86DisassemblerDecoder.h:662

llvm::X86Disassembler::TYPE_EVEX

@ TYPE_EVEX

Definition X86DisassemblerDecoder.h:663

llvm::X86Disassembler::InstructionContext

InstructionContext

Definition X86DisassemblerDecoderCommon.h:367

llvm::X86Disassembler::IC_max

@ IC_max

Definition X86DisassemblerDecoderCommon.h:367

llvm::X86Disassembler::OpcodeType

OpcodeType

Definition X86DisassemblerDecoderCommon.h:372

llvm::X86Disassembler::THREEDNOW_MAP

@ THREEDNOW_MAP

Definition X86DisassemblerDecoderCommon.h:380

llvm::X86Disassembler::MAP6

@ MAP6

Definition X86DisassemblerDecoderCommon.h:383

llvm::X86Disassembler::THREEBYTE_38

@ THREEBYTE_38

Definition X86DisassemblerDecoderCommon.h:375

llvm::X86Disassembler::XOPA_MAP

@ XOPA_MAP

Definition X86DisassemblerDecoderCommon.h:379

llvm::X86Disassembler::TWOBYTE

@ TWOBYTE

Definition X86DisassemblerDecoderCommon.h:374

llvm::X86Disassembler::MAP7

@ MAP7

Definition X86DisassemblerDecoderCommon.h:384

llvm::X86Disassembler::MAP5

@ MAP5

Definition X86DisassemblerDecoderCommon.h:382

llvm::X86Disassembler::MAP4

@ MAP4

Definition X86DisassemblerDecoderCommon.h:381

llvm::X86Disassembler::XOP9_MAP

@ XOP9_MAP

Definition X86DisassemblerDecoderCommon.h:378

llvm::X86Disassembler::XOP8_MAP

@ XOP8_MAP

Definition X86DisassemblerDecoderCommon.h:377

llvm::X86Disassembler::THREEBYTE_3A

@ THREEBYTE_3A

Definition X86DisassemblerDecoderCommon.h:376

llvm::X86Disassembler::ONEBYTE

@ ONEBYTE

Definition X86DisassemblerDecoderCommon.h:373

llvm::X86Disassembler::OperandType

OperandType

Definition X86DisassemblerDecoderCommon.h:527

llvm::X86Disassembler::SIBBase

SIBBase

All possible values of the SIB base field.

Definition X86DisassemblerDecoder.h:599

llvm::X86Disassembler::SIB_BASE_NONE

@ SIB_BASE_NONE

Definition X86DisassemblerDecoder.h:601

llvm::X86Disassembler::OperandEncoding

OperandEncoding

Definition X86DisassemblerDecoderCommon.h:485

llvm::X86Disassembler::VEX_PREFIX_66

@ VEX_PREFIX_66

Definition X86DisassemblerDecoder.h:654

llvm::X86Disassembler::VEX_PREFIX_F3

@ VEX_PREFIX_F3

Definition X86DisassemblerDecoder.h:655

llvm::X86Disassembler::VEX_PREFIX_NONE

@ VEX_PREFIX_NONE

Definition X86DisassemblerDecoder.h:653

llvm::X86Disassembler::VEX_PREFIX_F2

@ VEX_PREFIX_F2

Definition X86DisassemblerDecoder.h:656

llvm::X86Disassembler::EA_DISP_32

@ EA_DISP_32

Definition X86DisassemblerDecoder.h:610

llvm::X86Disassembler::EA_DISP_NONE

@ EA_DISP_NONE

Definition X86DisassemblerDecoder.h:610

llvm::X86Disassembler::EA_DISP_8

@ EA_DISP_8

Definition X86DisassemblerDecoder.h:610

llvm::X86Disassembler::EA_DISP_16

@ EA_DISP_16

Definition X86DisassemblerDecoder.h:610

llvm::X86Disassembler::SIBIndex

SIBIndex

All possible values of the SIB index field.

Definition X86DisassemblerDecoder.h:585

llvm::X86Disassembler::SIB_INDEX_NONE

@ SIB_INDEX_NONE

Definition X86DisassemblerDecoder.h:587

llvm::X86Disassembler::XOP_MAP_SELECT_8

@ XOP_MAP_SELECT_8

Definition X86DisassemblerDecoder.h:646

llvm::X86Disassembler::XOP_MAP_SELECT_9

@ XOP_MAP_SELECT_9

Definition X86DisassemblerDecoder.h:647

llvm::X86Disassembler::XOP_MAP_SELECT_A

@ XOP_MAP_SELECT_A

Definition X86DisassemblerDecoder.h:648

llvm::X86Disassembler::SEG_OVERRIDE_max

@ SEG_OVERRIDE_max

Definition X86DisassemblerDecoder.h:631

llvm::X86Disassembler::SEG_OVERRIDE_FS

@ SEG_OVERRIDE_FS

Definition X86DisassemblerDecoder.h:629

llvm::X86Disassembler::SEG_OVERRIDE_SS

@ SEG_OVERRIDE_SS

Definition X86DisassemblerDecoder.h:626

llvm::X86Disassembler::SEG_OVERRIDE_GS

@ SEG_OVERRIDE_GS

Definition X86DisassemblerDecoder.h:630

llvm::X86Disassembler::SEG_OVERRIDE_CS

@ SEG_OVERRIDE_CS

Definition X86DisassemblerDecoder.h:625

llvm::X86Disassembler::SEG_OVERRIDE_ES

@ SEG_OVERRIDE_ES

Definition X86DisassemblerDecoder.h:628

llvm::X86Disassembler::SEG_OVERRIDE_DS

@ SEG_OVERRIDE_DS

Definition X86DisassemblerDecoder.h:627

llvm::X86

Define some predicates that are used for node matching.

Definition X86TargetParser.h:25

llvm::X86::BX_SI

@ BX_SI

Definition X86Disassembler.cpp:1815

llvm::X86::BP_DI

@ BP_DI

Definition X86Disassembler.cpp:1818

llvm::X86::sib64

@ sib64

Definition X86Disassembler.cpp:1820

llvm::X86::BX_DI

@ BX_DI

Definition X86Disassembler.cpp:1816

llvm::X86::BP_SI

@ BP_SI

Definition X86Disassembler.cpp:1817

llvm::X86::sib

@ sib

Definition X86Disassembler.cpp:1819

llvm::X86::IP_HAS_LOCK

@ IP_HAS_LOCK

Definition X86BaseInfo.h:57

llvm::X86::IP_HAS_AD_SIZE

@ IP_HAS_AD_SIZE

Definition X86BaseInfo.h:54

llvm::X86::IP_HAS_REPEAT

@ IP_HAS_REPEAT

Definition X86BaseInfo.h:56

llvm::X86::IP_HAS_OP_SIZE

@ IP_HAS_OP_SIZE

Definition X86BaseInfo.h:53

llvm::X86::IP_NO_PREFIX

@ IP_NO_PREFIX

Definition X86BaseInfo.h:52

llvm::X86::IP_HAS_REPEAT_NE

@ IP_HAS_REPEAT_NE

Definition X86BaseInfo.h:55

llvm::rdf::Instr

NodeAddr< InstrNode * > Instr

Definition RDFGraph.h:389

llvm::sframe::Flags

Flags

Definition SFrame.h:39

llvm::support::endian::read

value_type read(const void *memory, endianness endian)

Read a value of a particular endianness from memory.

Definition Endian.h:60

llvm

This is an optimization pass for GlobalISel generic memory operations.

Definition AddressRanges.h:18

llvm::mod

LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt mod(const DynamicAPInt &LHS, const DynamicAPInt &RHS)

is always non-negative.

Definition DynamicAPInt.h:395

llvm::size

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

Definition STLExtras.h:1655

llvm::getTheX86_32Target

Target & getTheX86_32Target()

Definition X86TargetInfo.cpp:14

llvm::AtomicOrderingCABI::consume

@ consume

Definition AtomicOrdering.h:30

llvm::dbgs

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

Definition Debug.cpp:207

llvm::CaptureComponents::Address

@ Address

Definition ModRef.h:308

llvm::format

format_object< Ts... > format(const char *Fmt, const Ts &... Vals)

These are helper functions used to produce formatted output.

Definition Format.h:129

llvm::WaitForUnlockResult::Success

@ Success

The lock was released successfully.

Definition AdvisoryLock.h:20

llvm::MCPhysReg

uint16_t MCPhysReg

An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...

Definition MCRegister.h:21

llvm::Op

DWARFExpression::Operation Op

Definition DWARFExpressionPrinter.cpp:22

llvm::move

OutputIt move(R &&Range, OutputIt Out)

Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.

Definition STLExtras.h:1867

llvm::endianness::little

@ little

Definition bit.h:73

llvm::getTheX86_64Target

Target & getTheX86_64Target()

Definition X86TargetInfo.cpp:18

std

Implement std::hash so that hash_code can be used in STL containers.

Definition BitVector.h:867

raw_ostream.h

ContextDecision

Definition X86Disassembler.cpp:118

ContextDecision::opcodeDecisions

OpcodeDecision opcodeDecisions[IC_max]

Definition X86Disassembler.cpp:119

ModRMDecision

Definition X86Disassembler.cpp:102

ModRMDecision::instructionIDs

uint16_t instructionIDs

Definition X86Disassembler.cpp:104

ModRMDecision::modrm_type

uint8_t modrm_type

Definition X86Disassembler.cpp:103

OpcodeDecision

Definition X86Disassembler.cpp:109

OpcodeDecision::modRMDecisions

ModRMDecision modRMDecisions[256]

Definition X86Disassembler.cpp:110

llvm::TargetRegistry::RegisterMCDisassembler

static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)

RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.

Definition TargetRegistry.h:930

llvm::X86Disassembler::InstructionSpecifier

The specification for how to extract and interpret a full instruction and its operands.

Definition X86DisassemblerDecoder.h:669

llvm::X86Disassembler::InstructionSpecifier::operands

uint16_t operands

Definition X86DisassemblerDecoder.h:670

llvm::X86Disassembler::InternalInstruction

The x86 internal instruction, which is produced by the decoder.

Definition X86DisassemblerDecoder.h:674

llvm::X86Disassembler::InternalInstruction::reg

Reg reg

Definition X86DisassemblerDecoder.h:782

llvm::X86Disassembler::InternalInstruction::eaBase

EABase eaBase

Definition X86DisassemblerDecoder.h:779

llvm::X86Disassembler::InternalInstruction::modRM

uint8_t modRM

Definition X86DisassemblerDecoder.h:754

llvm::X86Disassembler::InternalInstruction::opcodeRegister

Reg opcodeRegister

Definition X86DisassemblerDecoder.h:768

llvm::X86Disassembler::InternalInstruction::RC

uint8_t RC

Definition X86DisassemblerDecoder.h:791

llvm::X86Disassembler::InternalInstruction::regBase

Reg regBase

Definition X86DisassemblerDecoder.h:775

llvm::X86Disassembler::InternalInstruction::operands

ArrayRef< OperandSpecifier > operands

Definition X86DisassemblerDecoder.h:793

llvm::X86Disassembler::InternalInstruction::eaDisplacement

EADisplacement eaDisplacement

Definition X86DisassemblerDecoder.h:780

llvm::X86Disassembler::InternalInstruction::eaRegBase

EABase eaRegBase

Definition X86DisassemblerDecoder.h:774

llvm::X86Disassembler::InternalInstruction::rex2ExtensionPrefix

uint8_t rex2ExtensionPrefix[2]

Definition X86DisassemblerDecoder.h:698

llvm::X86Disassembler::InternalInstruction::writemask

Reg writemask

Definition X86DisassemblerDecoder.h:749

llvm::X86Disassembler::InternalInstruction::opcode

uint8_t opcode

Definition X86DisassemblerDecoder.h:729

llvm::X86Disassembler::InternalInstruction::vectorExtensionPrefix

uint8_t vectorExtensionPrefix[4]

Definition X86DisassemblerDecoder.h:694

llvm::X86Disassembler::InternalInstruction::segmentOverride

SegmentOverride segmentOverride

Definition X86DisassemblerDecoder.h:702

llvm::X86Disassembler::InternalInstruction::numImmediatesConsumed

uint8_t numImmediatesConsumed

Definition X86DisassemblerDecoder.h:763

llvm::X86Disassembler::InternalInstruction::immediateSize

uint8_t immediateSize

Definition X86DisassemblerDecoder.h:719

llvm::X86Disassembler::InternalInstruction::hasAdSize

bool hasAdSize

Definition X86DisassemblerDecoder.h:707

llvm::X86Disassembler::InternalInstruction::hasOpSize

bool hasOpSize

Definition X86DisassemblerDecoder.h:709

llvm::X86Disassembler::InternalInstruction::addressSize

uint8_t addressSize

Definition X86DisassemblerDecoder.h:717

llvm::X86Disassembler::InternalInstruction::sibIndexBase

SIBIndex sibIndexBase

Definition X86DisassemblerDecoder.h:785

llvm::X86Disassembler::InternalInstruction::bytes

llvm::ArrayRef< uint8_t > bytes

Definition X86DisassemblerDecoder.h:676

llvm::X86Disassembler::InternalInstruction::xAcquireRelease

bool xAcquireRelease

Definition X86DisassemblerDecoder.h:704

llvm::X86Disassembler::InternalInstruction::sib

uint8_t sib

Definition X86DisassemblerDecoder.h:757

llvm::X86Disassembler::InternalInstruction::displacement

int32_t displacement

Definition X86DisassemblerDecoder.h:760

llvm::X86Disassembler::InternalInstruction::rexPrefix

uint8_t rexPrefix

Definition X86DisassemblerDecoder.h:700

llvm::X86Disassembler::InternalInstruction::consumedModRM

bool consumedModRM

Definition X86DisassemblerDecoder.h:753

llvm::X86Disassembler::InternalInstruction::sibScale

uint8_t sibScale

Definition X86DisassemblerDecoder.h:787

llvm::X86Disassembler::InternalInstruction::numImmediatesTranslated

uint8_t numImmediatesTranslated

Definition X86DisassemblerDecoder.h:764

llvm::X86Disassembler::InternalInstruction::hasLockPrefix

bool hasLockPrefix

Definition X86DisassemblerDecoder.h:711

llvm::X86Disassembler::InternalInstruction::startLocation

uint64_t startLocation

Definition X86DisassemblerDecoder.h:685

llvm::X86Disassembler::InternalInstruction::readerCursor

uint64_t readerCursor

Definition X86DisassemblerDecoder.h:678

llvm::X86Disassembler::InternalInstruction::sibIndex

SIBIndex sibIndex

Definition X86DisassemblerDecoder.h:786

llvm::X86Disassembler::InternalInstruction::registerSize

uint8_t registerSize

Definition X86DisassemblerDecoder.h:716

llvm::X86Disassembler::InternalInstruction::spec

const InstructionSpecifier * spec

Definition X86DisassemblerDecoder.h:738

llvm::X86Disassembler::InternalInstruction::displacementSize

uint8_t displacementSize

Definition X86DisassemblerDecoder.h:718

llvm::X86Disassembler::InternalInstruction::instructionID

uint16_t instructionID

Definition X86DisassemblerDecoder.h:736

llvm::X86Disassembler::InternalInstruction::mode

DisassemblerMode mode

Definition X86DisassemblerDecoder.h:683

llvm::X86Disassembler::InternalInstruction::sibBase

SIBBase sibBase

Definition X86DisassemblerDecoder.h:788

llvm::X86Disassembler::InternalInstruction::repeatPrefix

uint8_t repeatPrefix

Definition X86DisassemblerDecoder.h:713

llvm::X86Disassembler::InternalInstruction::vectorExtensionType

VectorExtensionType vectorExtensionType

Definition X86DisassemblerDecoder.h:696

llvm::X86Disassembler::InternalInstruction::vvvv

Reg vvvv

Definition X86DisassemblerDecoder.h:746

llvm::X86Disassembler::InternalInstruction::immediates

uint64_t immediates[3]

Definition X86DisassemblerDecoder.h:765

llvm::X86Disassembler::InternalInstruction::opcodeType

OpcodeType opcodeType

Definition X86DisassemblerDecoder.h:734

llvm::X86Disassembler::InternalInstruction::displacementOffset

uint8_t displacementOffset

Definition X86DisassemblerDecoder.h:723

llvm::X86Disassembler::InternalInstruction::mandatoryPrefix

uint8_t mandatoryPrefix

Definition X86DisassemblerDecoder.h:692

llvm::X86Disassembler::InternalInstruction::immediateOffset

uint8_t immediateOffset

Definition X86DisassemblerDecoder.h:724

llvm::X86Disassembler::InternalInstruction::length

size_t length

Definition X86DisassemblerDecoder.h:687

llvm::X86Disassembler::OperandSpecifier

The specification for how to extract and interpret one operand.

Definition X86DisassemblerDecoderCommon.h:531

llvm::X86Disassembler::OperandSpecifier::encoding

uint8_t encoding

Definition X86DisassemblerDecoderCommon.h:532

llvm::X86Disassembler::OperandSpecifier::type

uint8_t type

Definition X86DisassemblerDecoderCommon.h:533

Generated on for LLVM by doxygen 1.14.0

LLVM: lib/Target/X86/Disassembler/X86Disassembler.cpp Source File