1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 //===----------------------------------------------------------------------===// 11 // 12 /// \file 13 /// 14 /// This file contains definition for AMDGPU ISA disassembler 15 // 16 //===----------------------------------------------------------------------===// 17 18 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)? 19 20 #include "Disassembler/AMDGPUDisassembler.h" 21 #include "AMDGPU.h" 22 #include "AMDGPURegisterInfo.h" 23 #include "SIDefines.h" 24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 25 #include "Utils/AMDGPUBaseInfo.h" 26 #include "llvm-c/Disassembler.h" 27 #include "llvm/ADT/APInt.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/Twine.h" 30 #include "llvm/BinaryFormat/ELF.h" 31 #include "llvm/MC/MCContext.h" 32 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 33 #include "llvm/MC/MCExpr.h" 34 #include "llvm/MC/MCFixedLenDisassembler.h" 35 #include "llvm/MC/MCInst.h" 36 #include "llvm/MC/MCSubtargetInfo.h" 37 #include "llvm/Support/Endian.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetRegistry.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <algorithm> 43 #include <cassert> 44 #include <cstddef> 45 #include <cstdint> 46 #include <iterator> 47 #include <tuple> 48 #include <vector> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "amdgpu-disassembler" 53 54 using DecodeStatus = llvm::MCDisassembler::DecodeStatus; 55 56 inline static MCDisassembler::DecodeStatus 57 addOperand(MCInst &Inst, const MCOperand& Opnd) { 58 Inst.addOperand(Opnd); 59 return Opnd.isValid() ? 60 MCDisassembler::Success : 61 MCDisassembler::SoftFail; 62 } 63 64 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, 65 uint16_t NameIdx) { 66 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx); 67 if (OpIdx != -1) { 68 auto I = MI.begin(); 69 std::advance(I, OpIdx); 70 MI.insert(I, Op); 71 } 72 return OpIdx; 73 } 74 75 static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, 76 uint64_t Addr, const void *Decoder) { 77 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 78 79 APInt SignedOffset(18, Imm * 4, true); 80 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue(); 81 82 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2)) 83 return MCDisassembler::Success; 84 return addOperand(Inst, MCOperand::createImm(Imm)); 85 } 86 87 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \ 88 static DecodeStatus StaticDecoderName(MCInst &Inst, \ 89 unsigned Imm, \ 90 uint64_t /*Addr*/, \ 91 const void *Decoder) { \ 92 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \ 93 return addOperand(Inst, DAsm->DecoderName(Imm)); \ 94 } 95 96 #define DECODE_OPERAND_REG(RegClass) \ 97 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) 98 99 DECODE_OPERAND_REG(VGPR_32) 100 DECODE_OPERAND_REG(VS_32) 101 DECODE_OPERAND_REG(VS_64) 102 DECODE_OPERAND_REG(VS_128) 103 104 DECODE_OPERAND_REG(VReg_64) 105 DECODE_OPERAND_REG(VReg_96) 106 DECODE_OPERAND_REG(VReg_128) 107 108 DECODE_OPERAND_REG(SReg_32) 109 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) 110 DECODE_OPERAND_REG(SReg_32_XEXEC_HI) 111 DECODE_OPERAND_REG(SReg_64) 112 DECODE_OPERAND_REG(SReg_64_XEXEC) 113 DECODE_OPERAND_REG(SReg_128) 114 DECODE_OPERAND_REG(SReg_256) 115 DECODE_OPERAND_REG(SReg_512) 116 117 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, 118 unsigned Imm, 119 uint64_t Addr, 120 const void *Decoder) { 121 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 122 return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm)); 123 } 124 125 static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, 126 unsigned Imm, 127 uint64_t Addr, 128 const void *Decoder) { 129 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 130 return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); 131 } 132 133 #define DECODE_SDWA(DecName) \ 134 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) 135 136 DECODE_SDWA(Src32) 137 DECODE_SDWA(Src16) 138 DECODE_SDWA(VopcDst) 139 140 #include "AMDGPUGenDisassemblerTables.inc" 141 142 //===----------------------------------------------------------------------===// 143 // 144 //===----------------------------------------------------------------------===// 145 146 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) { 147 assert(Bytes.size() >= sizeof(T)); 148 const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data()); 149 Bytes = Bytes.slice(sizeof(T)); 150 return Res; 151 } 152 153 DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table, 154 MCInst &MI, 155 uint64_t Inst, 156 uint64_t Address) const { 157 assert(MI.getOpcode() == 0); 158 assert(MI.getNumOperands() == 0); 159 MCInst TmpInst; 160 HasLiteral = false; 161 const auto SavedBytes = Bytes; 162 if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) { 163 MI = TmpInst; 164 return MCDisassembler::Success; 165 } 166 Bytes = SavedBytes; 167 return MCDisassembler::Fail; 168 } 169 170 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, 171 ArrayRef<uint8_t> Bytes_, 172 uint64_t Address, 173 raw_ostream &WS, 174 raw_ostream &CS) const { 175 CommentStream = &CS; 176 bool IsSDWA = false; 177 178 // ToDo: AMDGPUDisassembler supports only VI ISA. 179 if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]) 180 report_fatal_error("Disassembly not yet supported for subtarget"); 181 182 const unsigned MaxInstBytesNum = (std::min)((size_t)8, Bytes_.size()); 183 Bytes = Bytes_.slice(0, MaxInstBytesNum); 184 185 DecodeStatus Res = MCDisassembler::Fail; 186 do { 187 // ToDo: better to switch encoding length using some bit predicate 188 // but it is unknown yet, so try all we can 189 190 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2 191 // encodings 192 if (Bytes.size() >= 8) { 193 const uint64_t QW = eatBytes<uint64_t>(Bytes); 194 Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address); 195 if (Res) break; 196 197 Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); 198 if (Res) { IsSDWA = true; break; } 199 200 Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); 201 if (Res) { IsSDWA = true; break; } 202 203 if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) { 204 Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address); 205 if (Res) 206 break; 207 } 208 209 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and 210 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special 211 // table first so we print the correct name. 212 if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) { 213 Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address); 214 if (Res) 215 break; 216 } 217 } 218 219 // Reinitialize Bytes as DPP64 could have eaten too much 220 Bytes = Bytes_.slice(0, MaxInstBytesNum); 221 222 // Try decode 32-bit instruction 223 if (Bytes.size() < 4) break; 224 const uint32_t DW = eatBytes<uint32_t>(Bytes); 225 Res = tryDecodeInst(DecoderTableVI32, MI, DW, Address); 226 if (Res) break; 227 228 Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address); 229 if (Res) break; 230 231 Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address); 232 if (Res) break; 233 234 if (Bytes.size() < 4) break; 235 const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW; 236 Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address); 237 if (Res) break; 238 239 Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address); 240 if (Res) break; 241 242 Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address); 243 } while (false); 244 245 if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || 246 MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si || 247 MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi || 248 MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi)) { 249 // Insert dummy unused src2_modifiers. 250 insertNamedMCOperand(MI, MCOperand::createImm(0), 251 AMDGPU::OpName::src2_modifiers); 252 } 253 254 if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { 255 Res = convertMIMGInst(MI); 256 } 257 258 if (Res && IsSDWA) 259 Res = convertSDWAInst(MI); 260 261 // if the opcode was not recognized we'll assume a Size of 4 bytes 262 // (unless there are fewer bytes left) 263 Size = Res ? (MaxInstBytesNum - Bytes.size()) 264 : std::min((size_t)4, Bytes_.size()); 265 return Res; 266 } 267 268 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { 269 if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { 270 if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1) 271 // VOPC - insert clamp 272 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); 273 } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 274 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst); 275 if (SDst != -1) { 276 // VOPC - insert VCC register as sdst 277 insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC), 278 AMDGPU::OpName::sdst); 279 } else { 280 // VOP1/2 - insert omod if present in instruction 281 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod); 282 } 283 } 284 return MCDisassembler::Success; 285 } 286 287 // Note that MIMG format provides no information about VADDR size. 288 // Consequently, decoded instructions always show address 289 // as if it has 1 dword, which could be not really so. 290 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { 291 292 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4) { 293 return MCDisassembler::Success; 294 } 295 296 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 297 AMDGPU::OpName::vdst); 298 299 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 300 AMDGPU::OpName::vdata); 301 302 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 303 AMDGPU::OpName::dmask); 304 305 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 306 AMDGPU::OpName::tfe); 307 308 assert(VDataIdx != -1); 309 assert(DMaskIdx != -1); 310 assert(TFEIdx != -1); 311 312 bool IsAtomic = (VDstIdx != -1); 313 314 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; 315 if (DMask == 0) 316 return MCDisassembler::Success; 317 318 unsigned DstSize = countPopulation(DMask); 319 if (DstSize == 1) 320 return MCDisassembler::Success; 321 322 bool D16 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::D16; 323 if (D16 && AMDGPU::hasPackedD16(STI)) { 324 DstSize = (DstSize + 1) / 2; 325 } 326 327 // FIXME: Add tfe support 328 if (MI.getOperand(TFEIdx).getImm()) 329 return MCDisassembler::Success; 330 331 int NewOpcode = -1; 332 333 if (IsAtomic) { 334 if (DMask == 0x1 || DMask == 0x3 || DMask == 0xF) { 335 NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), DstSize); 336 } 337 if (NewOpcode == -1) return MCDisassembler::Success; 338 } else { 339 NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), DstSize); 340 assert(NewOpcode != -1 && "could not find matching mimg channel instruction"); 341 } 342 343 auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; 344 345 // Get first subregister of VData 346 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); 347 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0); 348 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0; 349 350 // Widen the register to the correct number of enabled channels. 351 auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, 352 &MRI.getRegClass(RCID)); 353 if (NewVdata == AMDGPU::NoRegister) { 354 // It's possible to encode this such that the low register + enabled 355 // components exceeds the register count. 356 return MCDisassembler::Success; 357 } 358 359 MI.setOpcode(NewOpcode); 360 // vaddr will be always appear as a single VGPR. This will look different than 361 // how it is usually emitted because the number of register components is not 362 // in the instruction encoding. 363 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); 364 365 if (IsAtomic) { 366 // Atomic operations have an additional operand (a copy of data) 367 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); 368 } 369 370 return MCDisassembler::Success; 371 } 372 373 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { 374 return getContext().getRegisterInfo()-> 375 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); 376 } 377 378 inline 379 MCOperand AMDGPUDisassembler::errOperand(unsigned V, 380 const Twine& ErrMsg) const { 381 *CommentStream << "Error: " + ErrMsg; 382 383 // ToDo: add support for error operands to MCInst.h 384 // return MCOperand::createError(V); 385 return MCOperand(); 386 } 387 388 inline 389 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const { 390 return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI)); 391 } 392 393 inline 394 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID, 395 unsigned Val) const { 396 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID]; 397 if (Val >= RegCl.getNumRegs()) 398 return errOperand(Val, Twine(getRegClassName(RegClassID)) + 399 ": unknown register " + Twine(Val)); 400 return createRegOperand(RegCl.getRegister(Val)); 401 } 402 403 inline 404 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, 405 unsigned Val) const { 406 // ToDo: SI/CI have 104 SGPRs, VI - 102 407 // Valery: here we accepting as much as we can, let assembler sort it out 408 int shift = 0; 409 switch (SRegClassID) { 410 case AMDGPU::SGPR_32RegClassID: 411 case AMDGPU::TTMP_32RegClassID: 412 break; 413 case AMDGPU::SGPR_64RegClassID: 414 case AMDGPU::TTMP_64RegClassID: 415 shift = 1; 416 break; 417 case AMDGPU::SGPR_128RegClassID: 418 case AMDGPU::TTMP_128RegClassID: 419 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in 420 // this bundle? 421 case AMDGPU::SGPR_256RegClassID: 422 case AMDGPU::TTMP_256RegClassID: 423 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in 424 // this bundle? 425 case AMDGPU::SGPR_512RegClassID: 426 case AMDGPU::TTMP_512RegClassID: 427 shift = 2; 428 break; 429 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in 430 // this bundle? 431 default: 432 llvm_unreachable("unhandled register class"); 433 } 434 435 if (Val % (1 << shift)) { 436 *CommentStream << "Warning: " << getRegClassName(SRegClassID) 437 << ": scalar reg isn't aligned " << Val; 438 } 439 440 return createRegOperand(SRegClassID, Val >> shift); 441 } 442 443 MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const { 444 return decodeSrcOp(OPW32, Val); 445 } 446 447 MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const { 448 return decodeSrcOp(OPW64, Val); 449 } 450 451 MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const { 452 return decodeSrcOp(OPW128, Val); 453 } 454 455 MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const { 456 return decodeSrcOp(OPW16, Val); 457 } 458 459 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const { 460 return decodeSrcOp(OPWV216, Val); 461 } 462 463 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const { 464 // Some instructions have operand restrictions beyond what the encoding 465 // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra 466 // high bit. 467 Val &= 255; 468 469 return createRegOperand(AMDGPU::VGPR_32RegClassID, Val); 470 } 471 472 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { 473 return createRegOperand(AMDGPU::VReg_64RegClassID, Val); 474 } 475 476 MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const { 477 return createRegOperand(AMDGPU::VReg_96RegClassID, Val); 478 } 479 480 MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const { 481 return createRegOperand(AMDGPU::VReg_128RegClassID, Val); 482 } 483 484 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const { 485 // table-gen generated disassembler doesn't care about operand types 486 // leaving only registry class so SSrc_32 operand turns into SReg_32 487 // and therefore we accept immediates and literals here as well 488 return decodeSrcOp(OPW32, Val); 489 } 490 491 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC( 492 unsigned Val) const { 493 // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI 494 return decodeOperand_SReg_32(Val); 495 } 496 497 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI( 498 unsigned Val) const { 499 // SReg_32_XM0 is SReg_32 without EXEC_HI 500 return decodeOperand_SReg_32(Val); 501 } 502 503 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { 504 return decodeSrcOp(OPW64, Val); 505 } 506 507 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const { 508 return decodeSrcOp(OPW64, Val); 509 } 510 511 MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const { 512 return decodeSrcOp(OPW128, Val); 513 } 514 515 MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const { 516 return decodeDstOp(OPW256, Val); 517 } 518 519 MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const { 520 return decodeDstOp(OPW512, Val); 521 } 522 523 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { 524 // For now all literal constants are supposed to be unsigned integer 525 // ToDo: deal with signed/unsigned 64-bit integer constants 526 // ToDo: deal with float/double constants 527 if (!HasLiteral) { 528 if (Bytes.size() < 4) { 529 return errOperand(0, "cannot read literal, inst bytes left " + 530 Twine(Bytes.size())); 531 } 532 HasLiteral = true; 533 Literal = eatBytes<uint32_t>(Bytes); 534 } 535 return MCOperand::createImm(Literal); 536 } 537 538 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { 539 using namespace AMDGPU::EncValues; 540 541 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX); 542 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ? 543 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) : 544 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm))); 545 // Cast prevents negative overflow. 546 } 547 548 static int64_t getInlineImmVal32(unsigned Imm) { 549 switch (Imm) { 550 case 240: 551 return FloatToBits(0.5f); 552 case 241: 553 return FloatToBits(-0.5f); 554 case 242: 555 return FloatToBits(1.0f); 556 case 243: 557 return FloatToBits(-1.0f); 558 case 244: 559 return FloatToBits(2.0f); 560 case 245: 561 return FloatToBits(-2.0f); 562 case 246: 563 return FloatToBits(4.0f); 564 case 247: 565 return FloatToBits(-4.0f); 566 case 248: // 1 / (2 * PI) 567 return 0x3e22f983; 568 default: 569 llvm_unreachable("invalid fp inline imm"); 570 } 571 } 572 573 static int64_t getInlineImmVal64(unsigned Imm) { 574 switch (Imm) { 575 case 240: 576 return DoubleToBits(0.5); 577 case 241: 578 return DoubleToBits(-0.5); 579 case 242: 580 return DoubleToBits(1.0); 581 case 243: 582 return DoubleToBits(-1.0); 583 case 244: 584 return DoubleToBits(2.0); 585 case 245: 586 return DoubleToBits(-2.0); 587 case 246: 588 return DoubleToBits(4.0); 589 case 247: 590 return DoubleToBits(-4.0); 591 case 248: // 1 / (2 * PI) 592 return 0x3fc45f306dc9c882; 593 default: 594 llvm_unreachable("invalid fp inline imm"); 595 } 596 } 597 598 static int64_t getInlineImmVal16(unsigned Imm) { 599 switch (Imm) { 600 case 240: 601 return 0x3800; 602 case 241: 603 return 0xB800; 604 case 242: 605 return 0x3C00; 606 case 243: 607 return 0xBC00; 608 case 244: 609 return 0x4000; 610 case 245: 611 return 0xC000; 612 case 246: 613 return 0x4400; 614 case 247: 615 return 0xC400; 616 case 248: // 1 / (2 * PI) 617 return 0x3118; 618 default: 619 llvm_unreachable("invalid fp inline imm"); 620 } 621 } 622 623 MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) { 624 assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN 625 && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX); 626 627 // ToDo: case 248: 1/(2*PI) - is allowed only on VI 628 switch (Width) { 629 case OPW32: 630 return MCOperand::createImm(getInlineImmVal32(Imm)); 631 case OPW64: 632 return MCOperand::createImm(getInlineImmVal64(Imm)); 633 case OPW16: 634 case OPWV216: 635 return MCOperand::createImm(getInlineImmVal16(Imm)); 636 default: 637 llvm_unreachable("implement me"); 638 } 639 } 640 641 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const { 642 using namespace AMDGPU; 643 644 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 645 switch (Width) { 646 default: // fall 647 case OPW32: 648 case OPW16: 649 case OPWV216: 650 return VGPR_32RegClassID; 651 case OPW64: return VReg_64RegClassID; 652 case OPW128: return VReg_128RegClassID; 653 } 654 } 655 656 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const { 657 using namespace AMDGPU; 658 659 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 660 switch (Width) { 661 default: // fall 662 case OPW32: 663 case OPW16: 664 case OPWV216: 665 return SGPR_32RegClassID; 666 case OPW64: return SGPR_64RegClassID; 667 case OPW128: return SGPR_128RegClassID; 668 case OPW256: return SGPR_256RegClassID; 669 case OPW512: return SGPR_512RegClassID; 670 } 671 } 672 673 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const { 674 using namespace AMDGPU; 675 676 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 677 switch (Width) { 678 default: // fall 679 case OPW32: 680 case OPW16: 681 case OPWV216: 682 return TTMP_32RegClassID; 683 case OPW64: return TTMP_64RegClassID; 684 case OPW128: return TTMP_128RegClassID; 685 case OPW256: return TTMP_256RegClassID; 686 case OPW512: return TTMP_512RegClassID; 687 } 688 } 689 690 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { 691 using namespace AMDGPU::EncValues; 692 693 unsigned TTmpMin = isGFX9() ? TTMP_GFX9_MIN : TTMP_VI_MIN; 694 unsigned TTmpMax = isGFX9() ? TTMP_GFX9_MAX : TTMP_VI_MAX; 695 696 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; 697 } 698 699 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const { 700 using namespace AMDGPU::EncValues; 701 702 assert(Val < 512); // enum9 703 704 if (VGPR_MIN <= Val && Val <= VGPR_MAX) { 705 return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN); 706 } 707 if (Val <= SGPR_MAX) { 708 assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 709 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 710 } 711 712 int TTmpIdx = getTTmpIdx(Val); 713 if (TTmpIdx >= 0) { 714 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 715 } 716 717 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) 718 return decodeIntImmed(Val); 719 720 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) 721 return decodeFPImmed(Width, Val); 722 723 if (Val == LITERAL_CONST) 724 return decodeLiteralConstant(); 725 726 switch (Width) { 727 case OPW32: 728 case OPW16: 729 case OPWV216: 730 return decodeSpecialReg32(Val); 731 case OPW64: 732 return decodeSpecialReg64(Val); 733 default: 734 llvm_unreachable("unexpected immediate type"); 735 } 736 } 737 738 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const { 739 using namespace AMDGPU::EncValues; 740 741 assert(Val < 128); 742 assert(Width == OPW256 || Width == OPW512); 743 744 if (Val <= SGPR_MAX) { 745 assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 746 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 747 } 748 749 int TTmpIdx = getTTmpIdx(Val); 750 if (TTmpIdx >= 0) { 751 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 752 } 753 754 llvm_unreachable("unknown dst register"); 755 } 756 757 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { 758 using namespace AMDGPU; 759 760 switch (Val) { 761 case 102: return createRegOperand(FLAT_SCR_LO); 762 case 103: return createRegOperand(FLAT_SCR_HI); 763 case 104: return createRegOperand(XNACK_MASK_LO); 764 case 105: return createRegOperand(XNACK_MASK_HI); 765 case 106: return createRegOperand(VCC_LO); 766 case 107: return createRegOperand(VCC_HI); 767 case 108: assert(!isGFX9()); return createRegOperand(TBA_LO); 768 case 109: assert(!isGFX9()); return createRegOperand(TBA_HI); 769 case 110: assert(!isGFX9()); return createRegOperand(TMA_LO); 770 case 111: assert(!isGFX9()); return createRegOperand(TMA_HI); 771 case 124: return createRegOperand(M0); 772 case 126: return createRegOperand(EXEC_LO); 773 case 127: return createRegOperand(EXEC_HI); 774 case 235: return createRegOperand(SRC_SHARED_BASE); 775 case 236: return createRegOperand(SRC_SHARED_LIMIT); 776 case 237: return createRegOperand(SRC_PRIVATE_BASE); 777 case 238: return createRegOperand(SRC_PRIVATE_LIMIT); 778 // TODO: SRC_POPS_EXITING_WAVE_ID 779 // ToDo: no support for vccz register 780 case 251: break; 781 // ToDo: no support for execz register 782 case 252: break; 783 case 253: return createRegOperand(SCC); 784 default: break; 785 } 786 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 787 } 788 789 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { 790 using namespace AMDGPU; 791 792 switch (Val) { 793 case 102: return createRegOperand(FLAT_SCR); 794 case 104: return createRegOperand(XNACK_MASK); 795 case 106: return createRegOperand(VCC); 796 case 108: assert(!isGFX9()); return createRegOperand(TBA); 797 case 110: assert(!isGFX9()); return createRegOperand(TMA); 798 case 126: return createRegOperand(EXEC); 799 default: break; 800 } 801 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 802 } 803 804 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, 805 const unsigned Val) const { 806 using namespace AMDGPU::SDWA; 807 using namespace AMDGPU::EncValues; 808 809 if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { 810 // XXX: static_cast<int> is needed to avoid stupid warning: 811 // compare with unsigned is always true 812 if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast<int>(Val) && 813 Val <= SDWA9EncValues::SRC_VGPR_MAX) { 814 return createRegOperand(getVgprClassId(Width), 815 Val - SDWA9EncValues::SRC_VGPR_MIN); 816 } 817 if (SDWA9EncValues::SRC_SGPR_MIN <= Val && 818 Val <= SDWA9EncValues::SRC_SGPR_MAX) { 819 return createSRegOperand(getSgprClassId(Width), 820 Val - SDWA9EncValues::SRC_SGPR_MIN); 821 } 822 if (SDWA9EncValues::SRC_TTMP_MIN <= Val && 823 Val <= SDWA9EncValues::SRC_TTMP_MAX) { 824 return createSRegOperand(getTtmpClassId(Width), 825 Val - SDWA9EncValues::SRC_TTMP_MIN); 826 } 827 828 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN; 829 830 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) 831 return decodeIntImmed(SVal); 832 833 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX) 834 return decodeFPImmed(Width, SVal); 835 836 return decodeSpecialReg32(SVal); 837 } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 838 return createRegOperand(getVgprClassId(Width), Val); 839 } 840 llvm_unreachable("unsupported target"); 841 } 842 843 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { 844 return decodeSDWASrc(OPW16, Val); 845 } 846 847 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { 848 return decodeSDWASrc(OPW32, Val); 849 } 850 851 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { 852 using namespace AMDGPU::SDWA; 853 854 assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] && 855 "SDWAVopcDst should be present only on GFX9"); 856 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { 857 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; 858 859 int TTmpIdx = getTTmpIdx(Val); 860 if (TTmpIdx >= 0) { 861 return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx); 862 } else if (Val > AMDGPU::EncValues::SGPR_MAX) { 863 return decodeSpecialReg64(Val); 864 } else { 865 return createSRegOperand(getSgprClassId(OPW64), Val); 866 } 867 } else { 868 return createRegOperand(AMDGPU::VCC); 869 } 870 } 871 872 bool AMDGPUDisassembler::isVI() const { 873 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 874 } 875 876 bool AMDGPUDisassembler::isGFX9() const { 877 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 878 } 879 880 //===----------------------------------------------------------------------===// 881 // AMDGPUSymbolizer 882 //===----------------------------------------------------------------------===// 883 884 // Try to find symbol name for specified label 885 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst, 886 raw_ostream &/*cStream*/, int64_t Value, 887 uint64_t /*Address*/, bool IsBranch, 888 uint64_t /*Offset*/, uint64_t /*InstSize*/) { 889 using SymbolInfoTy = std::tuple<uint64_t, StringRef, uint8_t>; 890 using SectionSymbolsTy = std::vector<SymbolInfoTy>; 891 892 if (!IsBranch) { 893 return false; 894 } 895 896 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo); 897 if (!Symbols) 898 return false; 899 900 auto Result = std::find_if(Symbols->begin(), Symbols->end(), 901 [Value](const SymbolInfoTy& Val) { 902 return std::get<0>(Val) == static_cast<uint64_t>(Value) 903 && std::get<2>(Val) == ELF::STT_NOTYPE; 904 }); 905 if (Result != Symbols->end()) { 906 auto *Sym = Ctx.getOrCreateSymbol(std::get<1>(*Result)); 907 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx); 908 Inst.addOperand(MCOperand::createExpr(Add)); 909 return true; 910 } 911 return false; 912 } 913 914 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, 915 int64_t Value, 916 uint64_t Address) { 917 llvm_unreachable("unimplemented"); 918 } 919 920 //===----------------------------------------------------------------------===// 921 // Initialization 922 //===----------------------------------------------------------------------===// 923 924 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/, 925 LLVMOpInfoCallback /*GetOpInfo*/, 926 LLVMSymbolLookupCallback /*SymbolLookUp*/, 927 void *DisInfo, 928 MCContext *Ctx, 929 std::unique_ptr<MCRelocationInfo> &&RelInfo) { 930 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo); 931 } 932 933 static MCDisassembler *createAMDGPUDisassembler(const Target &T, 934 const MCSubtargetInfo &STI, 935 MCContext &Ctx) { 936 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo()); 937 } 938 939 extern "C" void LLVMInitializeAMDGPUDisassembler() { 940 TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(), 941 createAMDGPUDisassembler); 942 TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(), 943 createAMDGPUSymbolizer); 944 } 945