1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 //===----------------------------------------------------------------------===// 11 // 12 /// \file 13 /// 14 /// This file contains definition for AMDGPU ISA disassembler 15 // 16 //===----------------------------------------------------------------------===// 17 18 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)? 19 20 #include "Disassembler/AMDGPUDisassembler.h" 21 #include "AMDGPU.h" 22 #include "AMDGPURegisterInfo.h" 23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 24 #include "SIDefines.h" 25 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 26 #include "Utils/AMDGPUBaseInfo.h" 27 #include "llvm-c/Disassembler.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/Twine.h" 31 #include "llvm/BinaryFormat/ELF.h" 32 #include "llvm/MC/MCContext.h" 33 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 34 #include "llvm/MC/MCExpr.h" 35 #include "llvm/MC/MCFixedLenDisassembler.h" 36 #include "llvm/MC/MCInst.h" 37 #include "llvm/MC/MCSubtargetInfo.h" 38 #include "llvm/Support/Endian.h" 39 #include "llvm/Support/ErrorHandling.h" 40 #include "llvm/Support/MathExtras.h" 41 #include "llvm/Support/TargetRegistry.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include <algorithm> 44 #include <cassert> 45 #include <cstddef> 46 #include <cstdint> 47 #include <iterator> 48 #include <tuple> 49 #include <vector> 50 51 using namespace llvm; 52 53 #define DEBUG_TYPE "amdgpu-disassembler" 54 55 using DecodeStatus = llvm::MCDisassembler::DecodeStatus; 56 57 inline static MCDisassembler::DecodeStatus 58 addOperand(MCInst &Inst, const MCOperand& Opnd) { 59 Inst.addOperand(Opnd); 60 return Opnd.isValid() ? 61 MCDisassembler::Success : 62 MCDisassembler::SoftFail; 63 } 64 65 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, 66 uint16_t NameIdx) { 67 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx); 68 if (OpIdx != -1) { 69 auto I = MI.begin(); 70 std::advance(I, OpIdx); 71 MI.insert(I, Op); 72 } 73 return OpIdx; 74 } 75 76 static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, 77 uint64_t Addr, const void *Decoder) { 78 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 79 80 APInt SignedOffset(18, Imm * 4, true); 81 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue(); 82 83 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2)) 84 return MCDisassembler::Success; 85 return addOperand(Inst, MCOperand::createImm(Imm)); 86 } 87 88 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \ 89 static DecodeStatus StaticDecoderName(MCInst &Inst, \ 90 unsigned Imm, \ 91 uint64_t /*Addr*/, \ 92 const void *Decoder) { \ 93 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \ 94 return addOperand(Inst, DAsm->DecoderName(Imm)); \ 95 } 96 97 #define DECODE_OPERAND_REG(RegClass) \ 98 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) 99 100 DECODE_OPERAND_REG(VGPR_32) 101 DECODE_OPERAND_REG(VS_32) 102 DECODE_OPERAND_REG(VS_64) 103 DECODE_OPERAND_REG(VS_128) 104 105 DECODE_OPERAND_REG(VReg_64) 106 DECODE_OPERAND_REG(VReg_96) 107 DECODE_OPERAND_REG(VReg_128) 108 109 DECODE_OPERAND_REG(SReg_32) 110 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) 111 DECODE_OPERAND_REG(SReg_32_XEXEC_HI) 112 DECODE_OPERAND_REG(SReg_64) 113 DECODE_OPERAND_REG(SReg_64_XEXEC) 114 DECODE_OPERAND_REG(SReg_128) 115 DECODE_OPERAND_REG(SReg_256) 116 DECODE_OPERAND_REG(SReg_512) 117 118 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, 119 unsigned Imm, 120 uint64_t Addr, 121 const void *Decoder) { 122 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 123 return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm)); 124 } 125 126 static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, 127 unsigned Imm, 128 uint64_t Addr, 129 const void *Decoder) { 130 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 131 return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); 132 } 133 134 #define DECODE_SDWA(DecName) \ 135 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) 136 137 DECODE_SDWA(Src32) 138 DECODE_SDWA(Src16) 139 DECODE_SDWA(VopcDst) 140 141 #include "AMDGPUGenDisassemblerTables.inc" 142 143 //===----------------------------------------------------------------------===// 144 // 145 //===----------------------------------------------------------------------===// 146 147 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) { 148 assert(Bytes.size() >= sizeof(T)); 149 const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data()); 150 Bytes = Bytes.slice(sizeof(T)); 151 return Res; 152 } 153 154 DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table, 155 MCInst &MI, 156 uint64_t Inst, 157 uint64_t Address) const { 158 assert(MI.getOpcode() == 0); 159 assert(MI.getNumOperands() == 0); 160 MCInst TmpInst; 161 HasLiteral = false; 162 const auto SavedBytes = Bytes; 163 if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) { 164 MI = TmpInst; 165 return MCDisassembler::Success; 166 } 167 Bytes = SavedBytes; 168 return MCDisassembler::Fail; 169 } 170 171 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, 172 ArrayRef<uint8_t> Bytes_, 173 uint64_t Address, 174 raw_ostream &WS, 175 raw_ostream &CS) const { 176 CommentStream = &CS; 177 bool IsSDWA = false; 178 179 // ToDo: AMDGPUDisassembler supports only VI ISA. 180 if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]) 181 report_fatal_error("Disassembly not yet supported for subtarget"); 182 183 const unsigned MaxInstBytesNum = (std::min)((size_t)8, Bytes_.size()); 184 Bytes = Bytes_.slice(0, MaxInstBytesNum); 185 186 DecodeStatus Res = MCDisassembler::Fail; 187 do { 188 // ToDo: better to switch encoding length using some bit predicate 189 // but it is unknown yet, so try all we can 190 191 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2 192 // encodings 193 if (Bytes.size() >= 8) { 194 const uint64_t QW = eatBytes<uint64_t>(Bytes); 195 Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address); 196 if (Res) break; 197 198 Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); 199 if (Res) { IsSDWA = true; break; } 200 201 Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); 202 if (Res) { IsSDWA = true; break; } 203 204 if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) { 205 Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address); 206 if (Res) 207 break; 208 } 209 210 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and 211 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special 212 // table first so we print the correct name. 213 if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) { 214 Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address); 215 if (Res) 216 break; 217 } 218 } 219 220 // Reinitialize Bytes as DPP64 could have eaten too much 221 Bytes = Bytes_.slice(0, MaxInstBytesNum); 222 223 // Try decode 32-bit instruction 224 if (Bytes.size() < 4) break; 225 const uint32_t DW = eatBytes<uint32_t>(Bytes); 226 Res = tryDecodeInst(DecoderTableVI32, MI, DW, Address); 227 if (Res) break; 228 229 Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address); 230 if (Res) break; 231 232 Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address); 233 if (Res) break; 234 235 if (Bytes.size() < 4) break; 236 const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW; 237 Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address); 238 if (Res) break; 239 240 Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address); 241 if (Res) break; 242 243 Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address); 244 } while (false); 245 246 if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || 247 MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si || 248 MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi || 249 MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi)) { 250 // Insert dummy unused src2_modifiers. 251 insertNamedMCOperand(MI, MCOperand::createImm(0), 252 AMDGPU::OpName::src2_modifiers); 253 } 254 255 if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { 256 Res = convertMIMGInst(MI); 257 } 258 259 if (Res && IsSDWA) 260 Res = convertSDWAInst(MI); 261 262 // if the opcode was not recognized we'll assume a Size of 4 bytes 263 // (unless there are fewer bytes left) 264 Size = Res ? (MaxInstBytesNum - Bytes.size()) 265 : std::min((size_t)4, Bytes_.size()); 266 return Res; 267 } 268 269 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { 270 if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { 271 if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1) 272 // VOPC - insert clamp 273 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); 274 } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 275 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst); 276 if (SDst != -1) { 277 // VOPC - insert VCC register as sdst 278 insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC), 279 AMDGPU::OpName::sdst); 280 } else { 281 // VOP1/2 - insert omod if present in instruction 282 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod); 283 } 284 } 285 return MCDisassembler::Success; 286 } 287 288 // Note that MIMG format provides no information about VADDR size. 289 // Consequently, decoded instructions always show address 290 // as if it has 1 dword, which could be not really so. 291 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { 292 293 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 294 AMDGPU::OpName::vdst); 295 296 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 297 AMDGPU::OpName::vdata); 298 299 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 300 AMDGPU::OpName::dmask); 301 302 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 303 AMDGPU::OpName::tfe); 304 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 305 AMDGPU::OpName::d16); 306 307 assert(VDataIdx != -1); 308 assert(DMaskIdx != -1); 309 assert(TFEIdx != -1); 310 311 bool IsAtomic = (VDstIdx != -1); 312 bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4; 313 314 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; 315 if (DMask == 0) 316 return MCDisassembler::Success; 317 318 unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask); 319 if (DstSize == 1) 320 return MCDisassembler::Success; 321 322 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm(); 323 if (D16 && AMDGPU::hasPackedD16(STI)) { 324 DstSize = (DstSize + 1) / 2; 325 } 326 327 // FIXME: Add tfe support 328 if (MI.getOperand(TFEIdx).getImm()) 329 return MCDisassembler::Success; 330 331 int NewOpcode = -1; 332 333 if (IsGather4) { 334 if (D16 && AMDGPU::hasPackedD16(STI)) 335 NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), 2); 336 else 337 return MCDisassembler::Success; 338 } else { 339 NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), DstSize); 340 if (NewOpcode == -1) 341 return MCDisassembler::Success; 342 } 343 344 auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; 345 346 // Get first subregister of VData 347 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); 348 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0); 349 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0; 350 351 // Widen the register to the correct number of enabled channels. 352 auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, 353 &MRI.getRegClass(RCID)); 354 if (NewVdata == AMDGPU::NoRegister) { 355 // It's possible to encode this such that the low register + enabled 356 // components exceeds the register count. 357 return MCDisassembler::Success; 358 } 359 360 MI.setOpcode(NewOpcode); 361 // vaddr will be always appear as a single VGPR. This will look different than 362 // how it is usually emitted because the number of register components is not 363 // in the instruction encoding. 364 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); 365 366 if (IsAtomic) { 367 // Atomic operations have an additional operand (a copy of data) 368 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); 369 } 370 371 return MCDisassembler::Success; 372 } 373 374 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { 375 return getContext().getRegisterInfo()-> 376 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); 377 } 378 379 inline 380 MCOperand AMDGPUDisassembler::errOperand(unsigned V, 381 const Twine& ErrMsg) const { 382 *CommentStream << "Error: " + ErrMsg; 383 384 // ToDo: add support for error operands to MCInst.h 385 // return MCOperand::createError(V); 386 return MCOperand(); 387 } 388 389 inline 390 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const { 391 return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI)); 392 } 393 394 inline 395 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID, 396 unsigned Val) const { 397 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID]; 398 if (Val >= RegCl.getNumRegs()) 399 return errOperand(Val, Twine(getRegClassName(RegClassID)) + 400 ": unknown register " + Twine(Val)); 401 return createRegOperand(RegCl.getRegister(Val)); 402 } 403 404 inline 405 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, 406 unsigned Val) const { 407 // ToDo: SI/CI have 104 SGPRs, VI - 102 408 // Valery: here we accepting as much as we can, let assembler sort it out 409 int shift = 0; 410 switch (SRegClassID) { 411 case AMDGPU::SGPR_32RegClassID: 412 case AMDGPU::TTMP_32RegClassID: 413 break; 414 case AMDGPU::SGPR_64RegClassID: 415 case AMDGPU::TTMP_64RegClassID: 416 shift = 1; 417 break; 418 case AMDGPU::SGPR_128RegClassID: 419 case AMDGPU::TTMP_128RegClassID: 420 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in 421 // this bundle? 422 case AMDGPU::SGPR_256RegClassID: 423 case AMDGPU::TTMP_256RegClassID: 424 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in 425 // this bundle? 426 case AMDGPU::SGPR_512RegClassID: 427 case AMDGPU::TTMP_512RegClassID: 428 shift = 2; 429 break; 430 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in 431 // this bundle? 432 default: 433 llvm_unreachable("unhandled register class"); 434 } 435 436 if (Val % (1 << shift)) { 437 *CommentStream << "Warning: " << getRegClassName(SRegClassID) 438 << ": scalar reg isn't aligned " << Val; 439 } 440 441 return createRegOperand(SRegClassID, Val >> shift); 442 } 443 444 MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const { 445 return decodeSrcOp(OPW32, Val); 446 } 447 448 MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const { 449 return decodeSrcOp(OPW64, Val); 450 } 451 452 MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const { 453 return decodeSrcOp(OPW128, Val); 454 } 455 456 MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const { 457 return decodeSrcOp(OPW16, Val); 458 } 459 460 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const { 461 return decodeSrcOp(OPWV216, Val); 462 } 463 464 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const { 465 // Some instructions have operand restrictions beyond what the encoding 466 // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra 467 // high bit. 468 Val &= 255; 469 470 return createRegOperand(AMDGPU::VGPR_32RegClassID, Val); 471 } 472 473 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { 474 return createRegOperand(AMDGPU::VReg_64RegClassID, Val); 475 } 476 477 MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const { 478 return createRegOperand(AMDGPU::VReg_96RegClassID, Val); 479 } 480 481 MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const { 482 return createRegOperand(AMDGPU::VReg_128RegClassID, Val); 483 } 484 485 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const { 486 // table-gen generated disassembler doesn't care about operand types 487 // leaving only registry class so SSrc_32 operand turns into SReg_32 488 // and therefore we accept immediates and literals here as well 489 return decodeSrcOp(OPW32, Val); 490 } 491 492 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC( 493 unsigned Val) const { 494 // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI 495 return decodeOperand_SReg_32(Val); 496 } 497 498 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI( 499 unsigned Val) const { 500 // SReg_32_XM0 is SReg_32 without EXEC_HI 501 return decodeOperand_SReg_32(Val); 502 } 503 504 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { 505 return decodeSrcOp(OPW64, Val); 506 } 507 508 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const { 509 return decodeSrcOp(OPW64, Val); 510 } 511 512 MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const { 513 return decodeSrcOp(OPW128, Val); 514 } 515 516 MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const { 517 return decodeDstOp(OPW256, Val); 518 } 519 520 MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const { 521 return decodeDstOp(OPW512, Val); 522 } 523 524 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { 525 // For now all literal constants are supposed to be unsigned integer 526 // ToDo: deal with signed/unsigned 64-bit integer constants 527 // ToDo: deal with float/double constants 528 if (!HasLiteral) { 529 if (Bytes.size() < 4) { 530 return errOperand(0, "cannot read literal, inst bytes left " + 531 Twine(Bytes.size())); 532 } 533 HasLiteral = true; 534 Literal = eatBytes<uint32_t>(Bytes); 535 } 536 return MCOperand::createImm(Literal); 537 } 538 539 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { 540 using namespace AMDGPU::EncValues; 541 542 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX); 543 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ? 544 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) : 545 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm))); 546 // Cast prevents negative overflow. 547 } 548 549 static int64_t getInlineImmVal32(unsigned Imm) { 550 switch (Imm) { 551 case 240: 552 return FloatToBits(0.5f); 553 case 241: 554 return FloatToBits(-0.5f); 555 case 242: 556 return FloatToBits(1.0f); 557 case 243: 558 return FloatToBits(-1.0f); 559 case 244: 560 return FloatToBits(2.0f); 561 case 245: 562 return FloatToBits(-2.0f); 563 case 246: 564 return FloatToBits(4.0f); 565 case 247: 566 return FloatToBits(-4.0f); 567 case 248: // 1 / (2 * PI) 568 return 0x3e22f983; 569 default: 570 llvm_unreachable("invalid fp inline imm"); 571 } 572 } 573 574 static int64_t getInlineImmVal64(unsigned Imm) { 575 switch (Imm) { 576 case 240: 577 return DoubleToBits(0.5); 578 case 241: 579 return DoubleToBits(-0.5); 580 case 242: 581 return DoubleToBits(1.0); 582 case 243: 583 return DoubleToBits(-1.0); 584 case 244: 585 return DoubleToBits(2.0); 586 case 245: 587 return DoubleToBits(-2.0); 588 case 246: 589 return DoubleToBits(4.0); 590 case 247: 591 return DoubleToBits(-4.0); 592 case 248: // 1 / (2 * PI) 593 return 0x3fc45f306dc9c882; 594 default: 595 llvm_unreachable("invalid fp inline imm"); 596 } 597 } 598 599 static int64_t getInlineImmVal16(unsigned Imm) { 600 switch (Imm) { 601 case 240: 602 return 0x3800; 603 case 241: 604 return 0xB800; 605 case 242: 606 return 0x3C00; 607 case 243: 608 return 0xBC00; 609 case 244: 610 return 0x4000; 611 case 245: 612 return 0xC000; 613 case 246: 614 return 0x4400; 615 case 247: 616 return 0xC400; 617 case 248: // 1 / (2 * PI) 618 return 0x3118; 619 default: 620 llvm_unreachable("invalid fp inline imm"); 621 } 622 } 623 624 MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) { 625 assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN 626 && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX); 627 628 // ToDo: case 248: 1/(2*PI) - is allowed only on VI 629 switch (Width) { 630 case OPW32: 631 return MCOperand::createImm(getInlineImmVal32(Imm)); 632 case OPW64: 633 return MCOperand::createImm(getInlineImmVal64(Imm)); 634 case OPW16: 635 case OPWV216: 636 return MCOperand::createImm(getInlineImmVal16(Imm)); 637 default: 638 llvm_unreachable("implement me"); 639 } 640 } 641 642 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const { 643 using namespace AMDGPU; 644 645 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 646 switch (Width) { 647 default: // fall 648 case OPW32: 649 case OPW16: 650 case OPWV216: 651 return VGPR_32RegClassID; 652 case OPW64: return VReg_64RegClassID; 653 case OPW128: return VReg_128RegClassID; 654 } 655 } 656 657 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const { 658 using namespace AMDGPU; 659 660 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 661 switch (Width) { 662 default: // fall 663 case OPW32: 664 case OPW16: 665 case OPWV216: 666 return SGPR_32RegClassID; 667 case OPW64: return SGPR_64RegClassID; 668 case OPW128: return SGPR_128RegClassID; 669 case OPW256: return SGPR_256RegClassID; 670 case OPW512: return SGPR_512RegClassID; 671 } 672 } 673 674 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const { 675 using namespace AMDGPU; 676 677 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 678 switch (Width) { 679 default: // fall 680 case OPW32: 681 case OPW16: 682 case OPWV216: 683 return TTMP_32RegClassID; 684 case OPW64: return TTMP_64RegClassID; 685 case OPW128: return TTMP_128RegClassID; 686 case OPW256: return TTMP_256RegClassID; 687 case OPW512: return TTMP_512RegClassID; 688 } 689 } 690 691 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { 692 using namespace AMDGPU::EncValues; 693 694 unsigned TTmpMin = isGFX9() ? TTMP_GFX9_MIN : TTMP_VI_MIN; 695 unsigned TTmpMax = isGFX9() ? TTMP_GFX9_MAX : TTMP_VI_MAX; 696 697 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; 698 } 699 700 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const { 701 using namespace AMDGPU::EncValues; 702 703 assert(Val < 512); // enum9 704 705 if (VGPR_MIN <= Val && Val <= VGPR_MAX) { 706 return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN); 707 } 708 if (Val <= SGPR_MAX) { 709 assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 710 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 711 } 712 713 int TTmpIdx = getTTmpIdx(Val); 714 if (TTmpIdx >= 0) { 715 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 716 } 717 718 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) 719 return decodeIntImmed(Val); 720 721 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) 722 return decodeFPImmed(Width, Val); 723 724 if (Val == LITERAL_CONST) 725 return decodeLiteralConstant(); 726 727 switch (Width) { 728 case OPW32: 729 case OPW16: 730 case OPWV216: 731 return decodeSpecialReg32(Val); 732 case OPW64: 733 return decodeSpecialReg64(Val); 734 default: 735 llvm_unreachable("unexpected immediate type"); 736 } 737 } 738 739 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const { 740 using namespace AMDGPU::EncValues; 741 742 assert(Val < 128); 743 assert(Width == OPW256 || Width == OPW512); 744 745 if (Val <= SGPR_MAX) { 746 assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 747 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 748 } 749 750 int TTmpIdx = getTTmpIdx(Val); 751 if (TTmpIdx >= 0) { 752 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 753 } 754 755 llvm_unreachable("unknown dst register"); 756 } 757 758 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { 759 using namespace AMDGPU; 760 761 switch (Val) { 762 case 102: return createRegOperand(FLAT_SCR_LO); 763 case 103: return createRegOperand(FLAT_SCR_HI); 764 case 104: return createRegOperand(XNACK_MASK_LO); 765 case 105: return createRegOperand(XNACK_MASK_HI); 766 case 106: return createRegOperand(VCC_LO); 767 case 107: return createRegOperand(VCC_HI); 768 case 108: assert(!isGFX9()); return createRegOperand(TBA_LO); 769 case 109: assert(!isGFX9()); return createRegOperand(TBA_HI); 770 case 110: assert(!isGFX9()); return createRegOperand(TMA_LO); 771 case 111: assert(!isGFX9()); return createRegOperand(TMA_HI); 772 case 124: return createRegOperand(M0); 773 case 126: return createRegOperand(EXEC_LO); 774 case 127: return createRegOperand(EXEC_HI); 775 case 235: return createRegOperand(SRC_SHARED_BASE); 776 case 236: return createRegOperand(SRC_SHARED_LIMIT); 777 case 237: return createRegOperand(SRC_PRIVATE_BASE); 778 case 238: return createRegOperand(SRC_PRIVATE_LIMIT); 779 // TODO: SRC_POPS_EXITING_WAVE_ID 780 // ToDo: no support for vccz register 781 case 251: break; 782 // ToDo: no support for execz register 783 case 252: break; 784 case 253: return createRegOperand(SCC); 785 default: break; 786 } 787 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 788 } 789 790 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { 791 using namespace AMDGPU; 792 793 switch (Val) { 794 case 102: return createRegOperand(FLAT_SCR); 795 case 104: return createRegOperand(XNACK_MASK); 796 case 106: return createRegOperand(VCC); 797 case 108: assert(!isGFX9()); return createRegOperand(TBA); 798 case 110: assert(!isGFX9()); return createRegOperand(TMA); 799 case 126: return createRegOperand(EXEC); 800 default: break; 801 } 802 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 803 } 804 805 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, 806 const unsigned Val) const { 807 using namespace AMDGPU::SDWA; 808 using namespace AMDGPU::EncValues; 809 810 if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { 811 // XXX: static_cast<int> is needed to avoid stupid warning: 812 // compare with unsigned is always true 813 if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast<int>(Val) && 814 Val <= SDWA9EncValues::SRC_VGPR_MAX) { 815 return createRegOperand(getVgprClassId(Width), 816 Val - SDWA9EncValues::SRC_VGPR_MIN); 817 } 818 if (SDWA9EncValues::SRC_SGPR_MIN <= Val && 819 Val <= SDWA9EncValues::SRC_SGPR_MAX) { 820 return createSRegOperand(getSgprClassId(Width), 821 Val - SDWA9EncValues::SRC_SGPR_MIN); 822 } 823 if (SDWA9EncValues::SRC_TTMP_MIN <= Val && 824 Val <= SDWA9EncValues::SRC_TTMP_MAX) { 825 return createSRegOperand(getTtmpClassId(Width), 826 Val - SDWA9EncValues::SRC_TTMP_MIN); 827 } 828 829 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN; 830 831 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) 832 return decodeIntImmed(SVal); 833 834 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX) 835 return decodeFPImmed(Width, SVal); 836 837 return decodeSpecialReg32(SVal); 838 } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 839 return createRegOperand(getVgprClassId(Width), Val); 840 } 841 llvm_unreachable("unsupported target"); 842 } 843 844 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { 845 return decodeSDWASrc(OPW16, Val); 846 } 847 848 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { 849 return decodeSDWASrc(OPW32, Val); 850 } 851 852 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { 853 using namespace AMDGPU::SDWA; 854 855 assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] && 856 "SDWAVopcDst should be present only on GFX9"); 857 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { 858 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; 859 860 int TTmpIdx = getTTmpIdx(Val); 861 if (TTmpIdx >= 0) { 862 return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx); 863 } else if (Val > AMDGPU::EncValues::SGPR_MAX) { 864 return decodeSpecialReg64(Val); 865 } else { 866 return createSRegOperand(getSgprClassId(OPW64), Val); 867 } 868 } else { 869 return createRegOperand(AMDGPU::VCC); 870 } 871 } 872 873 bool AMDGPUDisassembler::isVI() const { 874 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 875 } 876 877 bool AMDGPUDisassembler::isGFX9() const { 878 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 879 } 880 881 //===----------------------------------------------------------------------===// 882 // AMDGPUSymbolizer 883 //===----------------------------------------------------------------------===// 884 885 // Try to find symbol name for specified label 886 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst, 887 raw_ostream &/*cStream*/, int64_t Value, 888 uint64_t /*Address*/, bool IsBranch, 889 uint64_t /*Offset*/, uint64_t /*InstSize*/) { 890 using SymbolInfoTy = std::tuple<uint64_t, StringRef, uint8_t>; 891 using SectionSymbolsTy = std::vector<SymbolInfoTy>; 892 893 if (!IsBranch) { 894 return false; 895 } 896 897 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo); 898 if (!Symbols) 899 return false; 900 901 auto Result = std::find_if(Symbols->begin(), Symbols->end(), 902 [Value](const SymbolInfoTy& Val) { 903 return std::get<0>(Val) == static_cast<uint64_t>(Value) 904 && std::get<2>(Val) == ELF::STT_NOTYPE; 905 }); 906 if (Result != Symbols->end()) { 907 auto *Sym = Ctx.getOrCreateSymbol(std::get<1>(*Result)); 908 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx); 909 Inst.addOperand(MCOperand::createExpr(Add)); 910 return true; 911 } 912 return false; 913 } 914 915 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, 916 int64_t Value, 917 uint64_t Address) { 918 llvm_unreachable("unimplemented"); 919 } 920 921 //===----------------------------------------------------------------------===// 922 // Initialization 923 //===----------------------------------------------------------------------===// 924 925 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/, 926 LLVMOpInfoCallback /*GetOpInfo*/, 927 LLVMSymbolLookupCallback /*SymbolLookUp*/, 928 void *DisInfo, 929 MCContext *Ctx, 930 std::unique_ptr<MCRelocationInfo> &&RelInfo) { 931 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo); 932 } 933 934 static MCDisassembler *createAMDGPUDisassembler(const Target &T, 935 const MCSubtargetInfo &STI, 936 MCContext &Ctx) { 937 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo()); 938 } 939 940 extern "C" void LLVMInitializeAMDGPUDisassembler() { 941 TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(), 942 createAMDGPUDisassembler); 943 TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(), 944 createAMDGPUSymbolizer); 945 } 946