1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 //===----------------------------------------------------------------------===// 11 // 12 /// \file 13 /// 14 /// This file contains definition for AMDGPU ISA disassembler 15 // 16 //===----------------------------------------------------------------------===// 17 18 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)? 19 20 #include "Disassembler/AMDGPUDisassembler.h" 21 #include "AMDGPU.h" 22 #include "AMDGPURegisterInfo.h" 23 #include "SIDefines.h" 24 #include "Utils/AMDGPUBaseInfo.h" 25 #include "llvm-c/Disassembler.h" 26 #include "llvm/ADT/APInt.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/Twine.h" 29 #include "llvm/BinaryFormat/ELF.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 32 #include "llvm/MC/MCExpr.h" 33 #include "llvm/MC/MCFixedLenDisassembler.h" 34 #include "llvm/MC/MCInst.h" 35 #include "llvm/MC/MCSubtargetInfo.h" 36 #include "llvm/Support/Endian.h" 37 #include "llvm/Support/ErrorHandling.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/TargetRegistry.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <algorithm> 42 #include <cassert> 43 #include <cstddef> 44 #include <cstdint> 45 #include <iterator> 46 #include <tuple> 47 #include <vector> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "amdgpu-disassembler" 52 53 using DecodeStatus = llvm::MCDisassembler::DecodeStatus; 54 55 inline static MCDisassembler::DecodeStatus 56 addOperand(MCInst &Inst, const MCOperand& Opnd) { 57 Inst.addOperand(Opnd); 58 return Opnd.isValid() ? 59 MCDisassembler::Success : 60 MCDisassembler::SoftFail; 61 } 62 63 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, 64 uint16_t NameIdx) { 65 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx); 66 if (OpIdx != -1) { 67 auto I = MI.begin(); 68 std::advance(I, OpIdx); 69 MI.insert(I, Op); 70 } 71 return OpIdx; 72 } 73 74 static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, 75 uint64_t Addr, const void *Decoder) { 76 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 77 78 APInt SignedOffset(18, Imm * 4, true); 79 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue(); 80 81 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2)) 82 return MCDisassembler::Success; 83 return addOperand(Inst, MCOperand::createImm(Imm)); 84 } 85 86 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \ 87 static DecodeStatus StaticDecoderName(MCInst &Inst, \ 88 unsigned Imm, \ 89 uint64_t /*Addr*/, \ 90 const void *Decoder) { \ 91 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \ 92 return addOperand(Inst, DAsm->DecoderName(Imm)); \ 93 } 94 95 #define DECODE_OPERAND_REG(RegClass) \ 96 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) 97 98 DECODE_OPERAND_REG(VGPR_32) 99 DECODE_OPERAND_REG(VS_32) 100 DECODE_OPERAND_REG(VS_64) 101 DECODE_OPERAND_REG(VS_128) 102 103 DECODE_OPERAND_REG(VReg_64) 104 DECODE_OPERAND_REG(VReg_96) 105 DECODE_OPERAND_REG(VReg_128) 106 107 DECODE_OPERAND_REG(SReg_32) 108 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) 109 DECODE_OPERAND_REG(SReg_32_XEXEC_HI) 110 DECODE_OPERAND_REG(SReg_64) 111 DECODE_OPERAND_REG(SReg_64_XEXEC) 112 DECODE_OPERAND_REG(SReg_128) 113 DECODE_OPERAND_REG(SReg_256) 114 DECODE_OPERAND_REG(SReg_512) 115 116 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, 117 unsigned Imm, 118 uint64_t Addr, 119 const void *Decoder) { 120 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 121 return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm)); 122 } 123 124 static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, 125 unsigned Imm, 126 uint64_t Addr, 127 const void *Decoder) { 128 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 129 return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); 130 } 131 132 #define DECODE_SDWA(DecName) \ 133 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) 134 135 DECODE_SDWA(Src32) 136 DECODE_SDWA(Src16) 137 DECODE_SDWA(VopcDst) 138 139 #include "AMDGPUGenDisassemblerTables.inc" 140 141 //===----------------------------------------------------------------------===// 142 // 143 //===----------------------------------------------------------------------===// 144 145 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) { 146 assert(Bytes.size() >= sizeof(T)); 147 const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data()); 148 Bytes = Bytes.slice(sizeof(T)); 149 return Res; 150 } 151 152 DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table, 153 MCInst &MI, 154 uint64_t Inst, 155 uint64_t Address) const { 156 assert(MI.getOpcode() == 0); 157 assert(MI.getNumOperands() == 0); 158 MCInst TmpInst; 159 HasLiteral = false; 160 const auto SavedBytes = Bytes; 161 if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) { 162 MI = TmpInst; 163 return MCDisassembler::Success; 164 } 165 Bytes = SavedBytes; 166 return MCDisassembler::Fail; 167 } 168 169 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, 170 ArrayRef<uint8_t> Bytes_, 171 uint64_t Address, 172 raw_ostream &WS, 173 raw_ostream &CS) const { 174 CommentStream = &CS; 175 bool IsSDWA = false; 176 177 // ToDo: AMDGPUDisassembler supports only VI ISA. 178 if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]) 179 report_fatal_error("Disassembly not yet supported for subtarget"); 180 181 const unsigned MaxInstBytesNum = (std::min)((size_t)8, Bytes_.size()); 182 Bytes = Bytes_.slice(0, MaxInstBytesNum); 183 184 DecodeStatus Res = MCDisassembler::Fail; 185 do { 186 // ToDo: better to switch encoding length using some bit predicate 187 // but it is unknown yet, so try all we can 188 189 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2 190 // encodings 191 if (Bytes.size() >= 8) { 192 const uint64_t QW = eatBytes<uint64_t>(Bytes); 193 Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address); 194 if (Res) break; 195 196 Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); 197 if (Res) { IsSDWA = true; break; } 198 199 Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); 200 if (Res) { IsSDWA = true; break; } 201 202 if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) { 203 Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address); 204 if (Res) 205 break; 206 } 207 208 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and 209 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special 210 // table first so we print the correct name. 211 if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) { 212 Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address); 213 if (Res) 214 break; 215 } 216 } 217 218 // Reinitialize Bytes as DPP64 could have eaten too much 219 Bytes = Bytes_.slice(0, MaxInstBytesNum); 220 221 // Try decode 32-bit instruction 222 if (Bytes.size() < 4) break; 223 const uint32_t DW = eatBytes<uint32_t>(Bytes); 224 Res = tryDecodeInst(DecoderTableVI32, MI, DW, Address); 225 if (Res) break; 226 227 Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address); 228 if (Res) break; 229 230 Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address); 231 if (Res) break; 232 233 if (Bytes.size() < 4) break; 234 const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW; 235 Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address); 236 if (Res) break; 237 238 Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address); 239 if (Res) break; 240 241 Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address); 242 } while (false); 243 244 if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || 245 MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si || 246 MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi || 247 MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi)) { 248 // Insert dummy unused src2_modifiers. 249 insertNamedMCOperand(MI, MCOperand::createImm(0), 250 AMDGPU::OpName::src2_modifiers); 251 } 252 253 if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { 254 Res = convertMIMGInst(MI); 255 } 256 257 if (Res && IsSDWA) 258 Res = convertSDWAInst(MI); 259 260 // if the opcode was not recognized we'll assume a Size of 4 bytes 261 // (unless there are fewer bytes left) 262 Size = Res ? (MaxInstBytesNum - Bytes.size()) 263 : std::min((size_t)4, Bytes_.size()); 264 return Res; 265 } 266 267 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { 268 if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { 269 if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1) 270 // VOPC - insert clamp 271 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); 272 } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 273 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst); 274 if (SDst != -1) { 275 // VOPC - insert VCC register as sdst 276 insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC), 277 AMDGPU::OpName::sdst); 278 } else { 279 // VOP1/2 - insert omod if present in instruction 280 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod); 281 } 282 } 283 return MCDisassembler::Success; 284 } 285 286 // Note that MIMG format provides no information about VADDR size. 287 // Consequently, decoded instructions always show address 288 // as if it has 1 dword, which could be not really so. 289 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { 290 291 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4) { 292 return MCDisassembler::Success; 293 } 294 295 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 296 AMDGPU::OpName::vdst); 297 298 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 299 AMDGPU::OpName::vdata); 300 301 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 302 AMDGPU::OpName::dmask); 303 304 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 305 AMDGPU::OpName::tfe); 306 307 assert(VDataIdx != -1); 308 assert(DMaskIdx != -1); 309 assert(TFEIdx != -1); 310 311 bool IsAtomic = (VDstIdx != -1); 312 313 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; 314 if (DMask == 0) 315 return MCDisassembler::Success; 316 317 unsigned DstSize = countPopulation(DMask); 318 if (DstSize == 1) 319 return MCDisassembler::Success; 320 321 bool D16 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::D16; 322 if (D16 && AMDGPU::hasPackedD16(STI)) { 323 DstSize = (DstSize + 1) / 2; 324 } 325 326 // FIXME: Add tfe support 327 if (MI.getOperand(TFEIdx).getImm()) 328 return MCDisassembler::Success; 329 330 int NewOpcode = -1; 331 332 if (IsAtomic) { 333 if (DMask == 0x1 || DMask == 0x3 || DMask == 0xF) { 334 NewOpcode = AMDGPU::getMaskedMIMGAtomicOp(*MCII, MI.getOpcode(), DstSize); 335 } 336 if (NewOpcode == -1) return MCDisassembler::Success; 337 } else { 338 NewOpcode = AMDGPU::getMaskedMIMGOp(*MCII, MI.getOpcode(), DstSize); 339 assert(NewOpcode != -1 && "could not find matching mimg channel instruction"); 340 } 341 342 auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; 343 344 // Get first subregister of VData 345 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); 346 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0); 347 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0; 348 349 // Widen the register to the correct number of enabled channels. 350 auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, 351 &MRI.getRegClass(RCID)); 352 if (NewVdata == AMDGPU::NoRegister) { 353 // It's possible to encode this such that the low register + enabled 354 // components exceeds the register count. 355 return MCDisassembler::Success; 356 } 357 358 MI.setOpcode(NewOpcode); 359 // vaddr will be always appear as a single VGPR. This will look different than 360 // how it is usually emitted because the number of register components is not 361 // in the instruction encoding. 362 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); 363 364 if (IsAtomic) { 365 // Atomic operations have an additional operand (a copy of data) 366 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); 367 } 368 369 return MCDisassembler::Success; 370 } 371 372 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { 373 return getContext().getRegisterInfo()-> 374 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); 375 } 376 377 inline 378 MCOperand AMDGPUDisassembler::errOperand(unsigned V, 379 const Twine& ErrMsg) const { 380 *CommentStream << "Error: " + ErrMsg; 381 382 // ToDo: add support for error operands to MCInst.h 383 // return MCOperand::createError(V); 384 return MCOperand(); 385 } 386 387 inline 388 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const { 389 return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI)); 390 } 391 392 inline 393 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID, 394 unsigned Val) const { 395 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID]; 396 if (Val >= RegCl.getNumRegs()) 397 return errOperand(Val, Twine(getRegClassName(RegClassID)) + 398 ": unknown register " + Twine(Val)); 399 return createRegOperand(RegCl.getRegister(Val)); 400 } 401 402 inline 403 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, 404 unsigned Val) const { 405 // ToDo: SI/CI have 104 SGPRs, VI - 102 406 // Valery: here we accepting as much as we can, let assembler sort it out 407 int shift = 0; 408 switch (SRegClassID) { 409 case AMDGPU::SGPR_32RegClassID: 410 case AMDGPU::TTMP_32RegClassID: 411 break; 412 case AMDGPU::SGPR_64RegClassID: 413 case AMDGPU::TTMP_64RegClassID: 414 shift = 1; 415 break; 416 case AMDGPU::SGPR_128RegClassID: 417 case AMDGPU::TTMP_128RegClassID: 418 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in 419 // this bundle? 420 case AMDGPU::SGPR_256RegClassID: 421 case AMDGPU::TTMP_256RegClassID: 422 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in 423 // this bundle? 424 case AMDGPU::SGPR_512RegClassID: 425 case AMDGPU::TTMP_512RegClassID: 426 shift = 2; 427 break; 428 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in 429 // this bundle? 430 default: 431 llvm_unreachable("unhandled register class"); 432 } 433 434 if (Val % (1 << shift)) { 435 *CommentStream << "Warning: " << getRegClassName(SRegClassID) 436 << ": scalar reg isn't aligned " << Val; 437 } 438 439 return createRegOperand(SRegClassID, Val >> shift); 440 } 441 442 MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const { 443 return decodeSrcOp(OPW32, Val); 444 } 445 446 MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const { 447 return decodeSrcOp(OPW64, Val); 448 } 449 450 MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const { 451 return decodeSrcOp(OPW128, Val); 452 } 453 454 MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const { 455 return decodeSrcOp(OPW16, Val); 456 } 457 458 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const { 459 return decodeSrcOp(OPWV216, Val); 460 } 461 462 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const { 463 // Some instructions have operand restrictions beyond what the encoding 464 // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra 465 // high bit. 466 Val &= 255; 467 468 return createRegOperand(AMDGPU::VGPR_32RegClassID, Val); 469 } 470 471 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { 472 return createRegOperand(AMDGPU::VReg_64RegClassID, Val); 473 } 474 475 MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const { 476 return createRegOperand(AMDGPU::VReg_96RegClassID, Val); 477 } 478 479 MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const { 480 return createRegOperand(AMDGPU::VReg_128RegClassID, Val); 481 } 482 483 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const { 484 // table-gen generated disassembler doesn't care about operand types 485 // leaving only registry class so SSrc_32 operand turns into SReg_32 486 // and therefore we accept immediates and literals here as well 487 return decodeSrcOp(OPW32, Val); 488 } 489 490 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC( 491 unsigned Val) const { 492 // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI 493 return decodeOperand_SReg_32(Val); 494 } 495 496 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI( 497 unsigned Val) const { 498 // SReg_32_XM0 is SReg_32 without EXEC_HI 499 return decodeOperand_SReg_32(Val); 500 } 501 502 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { 503 return decodeSrcOp(OPW64, Val); 504 } 505 506 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const { 507 return decodeSrcOp(OPW64, Val); 508 } 509 510 MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const { 511 return decodeSrcOp(OPW128, Val); 512 } 513 514 MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const { 515 return decodeDstOp(OPW256, Val); 516 } 517 518 MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const { 519 return decodeDstOp(OPW512, Val); 520 } 521 522 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { 523 // For now all literal constants are supposed to be unsigned integer 524 // ToDo: deal with signed/unsigned 64-bit integer constants 525 // ToDo: deal with float/double constants 526 if (!HasLiteral) { 527 if (Bytes.size() < 4) { 528 return errOperand(0, "cannot read literal, inst bytes left " + 529 Twine(Bytes.size())); 530 } 531 HasLiteral = true; 532 Literal = eatBytes<uint32_t>(Bytes); 533 } 534 return MCOperand::createImm(Literal); 535 } 536 537 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { 538 using namespace AMDGPU::EncValues; 539 540 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX); 541 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ? 542 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) : 543 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm))); 544 // Cast prevents negative overflow. 545 } 546 547 static int64_t getInlineImmVal32(unsigned Imm) { 548 switch (Imm) { 549 case 240: 550 return FloatToBits(0.5f); 551 case 241: 552 return FloatToBits(-0.5f); 553 case 242: 554 return FloatToBits(1.0f); 555 case 243: 556 return FloatToBits(-1.0f); 557 case 244: 558 return FloatToBits(2.0f); 559 case 245: 560 return FloatToBits(-2.0f); 561 case 246: 562 return FloatToBits(4.0f); 563 case 247: 564 return FloatToBits(-4.0f); 565 case 248: // 1 / (2 * PI) 566 return 0x3e22f983; 567 default: 568 llvm_unreachable("invalid fp inline imm"); 569 } 570 } 571 572 static int64_t getInlineImmVal64(unsigned Imm) { 573 switch (Imm) { 574 case 240: 575 return DoubleToBits(0.5); 576 case 241: 577 return DoubleToBits(-0.5); 578 case 242: 579 return DoubleToBits(1.0); 580 case 243: 581 return DoubleToBits(-1.0); 582 case 244: 583 return DoubleToBits(2.0); 584 case 245: 585 return DoubleToBits(-2.0); 586 case 246: 587 return DoubleToBits(4.0); 588 case 247: 589 return DoubleToBits(-4.0); 590 case 248: // 1 / (2 * PI) 591 return 0x3fc45f306dc9c882; 592 default: 593 llvm_unreachable("invalid fp inline imm"); 594 } 595 } 596 597 static int64_t getInlineImmVal16(unsigned Imm) { 598 switch (Imm) { 599 case 240: 600 return 0x3800; 601 case 241: 602 return 0xB800; 603 case 242: 604 return 0x3C00; 605 case 243: 606 return 0xBC00; 607 case 244: 608 return 0x4000; 609 case 245: 610 return 0xC000; 611 case 246: 612 return 0x4400; 613 case 247: 614 return 0xC400; 615 case 248: // 1 / (2 * PI) 616 return 0x3118; 617 default: 618 llvm_unreachable("invalid fp inline imm"); 619 } 620 } 621 622 MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) { 623 assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN 624 && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX); 625 626 // ToDo: case 248: 1/(2*PI) - is allowed only on VI 627 switch (Width) { 628 case OPW32: 629 return MCOperand::createImm(getInlineImmVal32(Imm)); 630 case OPW64: 631 return MCOperand::createImm(getInlineImmVal64(Imm)); 632 case OPW16: 633 case OPWV216: 634 return MCOperand::createImm(getInlineImmVal16(Imm)); 635 default: 636 llvm_unreachable("implement me"); 637 } 638 } 639 640 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const { 641 using namespace AMDGPU; 642 643 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 644 switch (Width) { 645 default: // fall 646 case OPW32: 647 case OPW16: 648 case OPWV216: 649 return VGPR_32RegClassID; 650 case OPW64: return VReg_64RegClassID; 651 case OPW128: return VReg_128RegClassID; 652 } 653 } 654 655 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const { 656 using namespace AMDGPU; 657 658 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 659 switch (Width) { 660 default: // fall 661 case OPW32: 662 case OPW16: 663 case OPWV216: 664 return SGPR_32RegClassID; 665 case OPW64: return SGPR_64RegClassID; 666 case OPW128: return SGPR_128RegClassID; 667 case OPW256: return SGPR_256RegClassID; 668 case OPW512: return SGPR_512RegClassID; 669 } 670 } 671 672 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const { 673 using namespace AMDGPU; 674 675 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 676 switch (Width) { 677 default: // fall 678 case OPW32: 679 case OPW16: 680 case OPWV216: 681 return TTMP_32RegClassID; 682 case OPW64: return TTMP_64RegClassID; 683 case OPW128: return TTMP_128RegClassID; 684 case OPW256: return TTMP_256RegClassID; 685 case OPW512: return TTMP_512RegClassID; 686 } 687 } 688 689 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { 690 using namespace AMDGPU::EncValues; 691 692 unsigned TTmpMin = isGFX9() ? TTMP_GFX9_MIN : TTMP_VI_MIN; 693 unsigned TTmpMax = isGFX9() ? TTMP_GFX9_MAX : TTMP_VI_MAX; 694 695 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; 696 } 697 698 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const { 699 using namespace AMDGPU::EncValues; 700 701 assert(Val < 512); // enum9 702 703 if (VGPR_MIN <= Val && Val <= VGPR_MAX) { 704 return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN); 705 } 706 if (Val <= SGPR_MAX) { 707 assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 708 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 709 } 710 711 int TTmpIdx = getTTmpIdx(Val); 712 if (TTmpIdx >= 0) { 713 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 714 } 715 716 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) 717 return decodeIntImmed(Val); 718 719 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) 720 return decodeFPImmed(Width, Val); 721 722 if (Val == LITERAL_CONST) 723 return decodeLiteralConstant(); 724 725 switch (Width) { 726 case OPW32: 727 case OPW16: 728 case OPWV216: 729 return decodeSpecialReg32(Val); 730 case OPW64: 731 return decodeSpecialReg64(Val); 732 default: 733 llvm_unreachable("unexpected immediate type"); 734 } 735 } 736 737 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const { 738 using namespace AMDGPU::EncValues; 739 740 assert(Val < 128); 741 assert(Width == OPW256 || Width == OPW512); 742 743 if (Val <= SGPR_MAX) { 744 assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 745 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 746 } 747 748 int TTmpIdx = getTTmpIdx(Val); 749 if (TTmpIdx >= 0) { 750 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 751 } 752 753 llvm_unreachable("unknown dst register"); 754 } 755 756 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { 757 using namespace AMDGPU; 758 759 switch (Val) { 760 case 102: return createRegOperand(FLAT_SCR_LO); 761 case 103: return createRegOperand(FLAT_SCR_HI); 762 case 104: return createRegOperand(XNACK_MASK_LO); 763 case 105: return createRegOperand(XNACK_MASK_HI); 764 case 106: return createRegOperand(VCC_LO); 765 case 107: return createRegOperand(VCC_HI); 766 case 108: assert(!isGFX9()); return createRegOperand(TBA_LO); 767 case 109: assert(!isGFX9()); return createRegOperand(TBA_HI); 768 case 110: assert(!isGFX9()); return createRegOperand(TMA_LO); 769 case 111: assert(!isGFX9()); return createRegOperand(TMA_HI); 770 case 124: return createRegOperand(M0); 771 case 126: return createRegOperand(EXEC_LO); 772 case 127: return createRegOperand(EXEC_HI); 773 case 235: return createRegOperand(SRC_SHARED_BASE); 774 case 236: return createRegOperand(SRC_SHARED_LIMIT); 775 case 237: return createRegOperand(SRC_PRIVATE_BASE); 776 case 238: return createRegOperand(SRC_PRIVATE_LIMIT); 777 // TODO: SRC_POPS_EXITING_WAVE_ID 778 // ToDo: no support for vccz register 779 case 251: break; 780 // ToDo: no support for execz register 781 case 252: break; 782 case 253: return createRegOperand(SCC); 783 default: break; 784 } 785 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 786 } 787 788 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { 789 using namespace AMDGPU; 790 791 switch (Val) { 792 case 102: return createRegOperand(FLAT_SCR); 793 case 104: return createRegOperand(XNACK_MASK); 794 case 106: return createRegOperand(VCC); 795 case 108: assert(!isGFX9()); return createRegOperand(TBA); 796 case 110: assert(!isGFX9()); return createRegOperand(TMA); 797 case 126: return createRegOperand(EXEC); 798 default: break; 799 } 800 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 801 } 802 803 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, 804 const unsigned Val) const { 805 using namespace AMDGPU::SDWA; 806 using namespace AMDGPU::EncValues; 807 808 if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { 809 // XXX: static_cast<int> is needed to avoid stupid warning: 810 // compare with unsigned is always true 811 if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast<int>(Val) && 812 Val <= SDWA9EncValues::SRC_VGPR_MAX) { 813 return createRegOperand(getVgprClassId(Width), 814 Val - SDWA9EncValues::SRC_VGPR_MIN); 815 } 816 if (SDWA9EncValues::SRC_SGPR_MIN <= Val && 817 Val <= SDWA9EncValues::SRC_SGPR_MAX) { 818 return createSRegOperand(getSgprClassId(Width), 819 Val - SDWA9EncValues::SRC_SGPR_MIN); 820 } 821 if (SDWA9EncValues::SRC_TTMP_MIN <= Val && 822 Val <= SDWA9EncValues::SRC_TTMP_MAX) { 823 return createSRegOperand(getTtmpClassId(Width), 824 Val - SDWA9EncValues::SRC_TTMP_MIN); 825 } 826 827 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN; 828 829 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) 830 return decodeIntImmed(SVal); 831 832 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX) 833 return decodeFPImmed(Width, SVal); 834 835 return decodeSpecialReg32(SVal); 836 } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 837 return createRegOperand(getVgprClassId(Width), Val); 838 } 839 llvm_unreachable("unsupported target"); 840 } 841 842 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { 843 return decodeSDWASrc(OPW16, Val); 844 } 845 846 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { 847 return decodeSDWASrc(OPW32, Val); 848 } 849 850 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { 851 using namespace AMDGPU::SDWA; 852 853 assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] && 854 "SDWAVopcDst should be present only on GFX9"); 855 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { 856 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; 857 858 int TTmpIdx = getTTmpIdx(Val); 859 if (TTmpIdx >= 0) { 860 return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx); 861 } else if (Val > AMDGPU::EncValues::SGPR_MAX) { 862 return decodeSpecialReg64(Val); 863 } else { 864 return createSRegOperand(getSgprClassId(OPW64), Val); 865 } 866 } else { 867 return createRegOperand(AMDGPU::VCC); 868 } 869 } 870 871 bool AMDGPUDisassembler::isVI() const { 872 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 873 } 874 875 bool AMDGPUDisassembler::isGFX9() const { 876 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 877 } 878 879 //===----------------------------------------------------------------------===// 880 // AMDGPUSymbolizer 881 //===----------------------------------------------------------------------===// 882 883 // Try to find symbol name for specified label 884 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst, 885 raw_ostream &/*cStream*/, int64_t Value, 886 uint64_t /*Address*/, bool IsBranch, 887 uint64_t /*Offset*/, uint64_t /*InstSize*/) { 888 using SymbolInfoTy = std::tuple<uint64_t, StringRef, uint8_t>; 889 using SectionSymbolsTy = std::vector<SymbolInfoTy>; 890 891 if (!IsBranch) { 892 return false; 893 } 894 895 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo); 896 if (!Symbols) 897 return false; 898 899 auto Result = std::find_if(Symbols->begin(), Symbols->end(), 900 [Value](const SymbolInfoTy& Val) { 901 return std::get<0>(Val) == static_cast<uint64_t>(Value) 902 && std::get<2>(Val) == ELF::STT_NOTYPE; 903 }); 904 if (Result != Symbols->end()) { 905 auto *Sym = Ctx.getOrCreateSymbol(std::get<1>(*Result)); 906 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx); 907 Inst.addOperand(MCOperand::createExpr(Add)); 908 return true; 909 } 910 return false; 911 } 912 913 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, 914 int64_t Value, 915 uint64_t Address) { 916 llvm_unreachable("unimplemented"); 917 } 918 919 //===----------------------------------------------------------------------===// 920 // Initialization 921 //===----------------------------------------------------------------------===// 922 923 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/, 924 LLVMOpInfoCallback /*GetOpInfo*/, 925 LLVMSymbolLookupCallback /*SymbolLookUp*/, 926 void *DisInfo, 927 MCContext *Ctx, 928 std::unique_ptr<MCRelocationInfo> &&RelInfo) { 929 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo); 930 } 931 932 static MCDisassembler *createAMDGPUDisassembler(const Target &T, 933 const MCSubtargetInfo &STI, 934 MCContext &Ctx) { 935 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo()); 936 } 937 938 extern "C" void LLVMInitializeAMDGPUDisassembler() { 939 TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(), 940 createAMDGPUDisassembler); 941 TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(), 942 createAMDGPUSymbolizer); 943 } 944