1 //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 // 11 /// \file 12 /// 13 /// This file contains definition for AMDGPU ISA disassembler 14 // 15 //===----------------------------------------------------------------------===// 16 17 // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)? 18 19 #include "Disassembler/AMDGPUDisassembler.h" 20 #include "AMDGPU.h" 21 #include "AMDGPURegisterInfo.h" 22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23 #include "SIDefines.h" 24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 25 #include "Utils/AMDGPUBaseInfo.h" 26 #include "llvm-c/Disassembler.h" 27 #include "llvm/ADT/APInt.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/Twine.h" 30 #include "llvm/BinaryFormat/ELF.h" 31 #include "llvm/MC/MCContext.h" 32 #include "llvm/MC/MCDisassembler/MCDisassembler.h" 33 #include "llvm/MC/MCExpr.h" 34 #include "llvm/MC/MCFixedLenDisassembler.h" 35 #include "llvm/MC/MCInst.h" 36 #include "llvm/MC/MCSubtargetInfo.h" 37 #include "llvm/Support/Endian.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetRegistry.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <algorithm> 43 #include <cassert> 44 #include <cstddef> 45 #include <cstdint> 46 #include <iterator> 47 #include <tuple> 48 #include <vector> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "amdgpu-disassembler" 53 54 using DecodeStatus = llvm::MCDisassembler::DecodeStatus; 55 56 inline static MCDisassembler::DecodeStatus 57 addOperand(MCInst &Inst, const MCOperand& Opnd) { 58 Inst.addOperand(Opnd); 59 return Opnd.isValid() ? 60 MCDisassembler::Success : 61 MCDisassembler::SoftFail; 62 } 63 64 static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, 65 uint16_t NameIdx) { 66 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx); 67 if (OpIdx != -1) { 68 auto I = MI.begin(); 69 std::advance(I, OpIdx); 70 MI.insert(I, Op); 71 } 72 return OpIdx; 73 } 74 75 static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, 76 uint64_t Addr, const void *Decoder) { 77 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 78 79 // Our branches take a simm16, but we need two extra bits to account for the 80 // factor of 4. 81 APInt SignedOffset(18, Imm * 4, true); 82 int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue(); 83 84 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2)) 85 return MCDisassembler::Success; 86 return addOperand(Inst, MCOperand::createImm(Imm)); 87 } 88 89 #define DECODE_OPERAND(StaticDecoderName, DecoderName) \ 90 static DecodeStatus StaticDecoderName(MCInst &Inst, \ 91 unsigned Imm, \ 92 uint64_t /*Addr*/, \ 93 const void *Decoder) { \ 94 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); \ 95 return addOperand(Inst, DAsm->DecoderName(Imm)); \ 96 } 97 98 #define DECODE_OPERAND_REG(RegClass) \ 99 DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) 100 101 DECODE_OPERAND_REG(VGPR_32) 102 DECODE_OPERAND_REG(VRegOrLds_32) 103 DECODE_OPERAND_REG(VS_32) 104 DECODE_OPERAND_REG(VS_64) 105 DECODE_OPERAND_REG(VS_128) 106 107 DECODE_OPERAND_REG(VReg_64) 108 DECODE_OPERAND_REG(VReg_96) 109 DECODE_OPERAND_REG(VReg_128) 110 111 DECODE_OPERAND_REG(SReg_32) 112 DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) 113 DECODE_OPERAND_REG(SReg_32_XEXEC_HI) 114 DECODE_OPERAND_REG(SRegOrLds_32) 115 DECODE_OPERAND_REG(SReg_64) 116 DECODE_OPERAND_REG(SReg_64_XEXEC) 117 DECODE_OPERAND_REG(SReg_128) 118 DECODE_OPERAND_REG(SReg_256) 119 DECODE_OPERAND_REG(SReg_512) 120 121 static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, 122 unsigned Imm, 123 uint64_t Addr, 124 const void *Decoder) { 125 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 126 return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm)); 127 } 128 129 static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, 130 unsigned Imm, 131 uint64_t Addr, 132 const void *Decoder) { 133 auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder); 134 return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); 135 } 136 137 #define DECODE_SDWA(DecName) \ 138 DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName) 139 140 DECODE_SDWA(Src32) 141 DECODE_SDWA(Src16) 142 DECODE_SDWA(VopcDst) 143 144 #include "AMDGPUGenDisassemblerTables.inc" 145 146 //===----------------------------------------------------------------------===// 147 // 148 //===----------------------------------------------------------------------===// 149 150 template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) { 151 assert(Bytes.size() >= sizeof(T)); 152 const auto Res = support::endian::read<T, support::endianness::little>(Bytes.data()); 153 Bytes = Bytes.slice(sizeof(T)); 154 return Res; 155 } 156 157 DecodeStatus AMDGPUDisassembler::tryDecodeInst(const uint8_t* Table, 158 MCInst &MI, 159 uint64_t Inst, 160 uint64_t Address) const { 161 assert(MI.getOpcode() == 0); 162 assert(MI.getNumOperands() == 0); 163 MCInst TmpInst; 164 HasLiteral = false; 165 const auto SavedBytes = Bytes; 166 if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) { 167 MI = TmpInst; 168 return MCDisassembler::Success; 169 } 170 Bytes = SavedBytes; 171 return MCDisassembler::Fail; 172 } 173 174 DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, 175 ArrayRef<uint8_t> Bytes_, 176 uint64_t Address, 177 raw_ostream &WS, 178 raw_ostream &CS) const { 179 CommentStream = &CS; 180 bool IsSDWA = false; 181 182 // ToDo: AMDGPUDisassembler supports only VI ISA. 183 if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]) 184 report_fatal_error("Disassembly not yet supported for subtarget"); 185 186 const unsigned MaxInstBytesNum = (std::min)((size_t)8, Bytes_.size()); 187 Bytes = Bytes_.slice(0, MaxInstBytesNum); 188 189 DecodeStatus Res = MCDisassembler::Fail; 190 do { 191 // ToDo: better to switch encoding length using some bit predicate 192 // but it is unknown yet, so try all we can 193 194 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2 195 // encodings 196 if (Bytes.size() >= 8) { 197 const uint64_t QW = eatBytes<uint64_t>(Bytes); 198 Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address); 199 if (Res) break; 200 201 Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); 202 if (Res) { IsSDWA = true; break; } 203 204 Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); 205 if (Res) { IsSDWA = true; break; } 206 207 if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) { 208 Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address); 209 if (Res) 210 break; 211 } 212 213 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and 214 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special 215 // table first so we print the correct name. 216 if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) { 217 Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address); 218 if (Res) 219 break; 220 } 221 } 222 223 // Reinitialize Bytes as DPP64 could have eaten too much 224 Bytes = Bytes_.slice(0, MaxInstBytesNum); 225 226 // Try decode 32-bit instruction 227 if (Bytes.size() < 4) break; 228 const uint32_t DW = eatBytes<uint32_t>(Bytes); 229 Res = tryDecodeInst(DecoderTableVI32, MI, DW, Address); 230 if (Res) break; 231 232 Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address); 233 if (Res) break; 234 235 Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address); 236 if (Res) break; 237 238 if (Bytes.size() < 4) break; 239 const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW; 240 Res = tryDecodeInst(DecoderTableVI64, MI, QW, Address); 241 if (Res) break; 242 243 Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address); 244 if (Res) break; 245 246 Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address); 247 } while (false); 248 249 if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || 250 MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si || 251 MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi || 252 MI.getOpcode() == AMDGPU::V_FMAC_F32_e64_vi)) { 253 // Insert dummy unused src2_modifiers. 254 insertNamedMCOperand(MI, MCOperand::createImm(0), 255 AMDGPU::OpName::src2_modifiers); 256 } 257 258 if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG)) { 259 Res = convertMIMGInst(MI); 260 } 261 262 if (Res && IsSDWA) 263 Res = convertSDWAInst(MI); 264 265 // if the opcode was not recognized we'll assume a Size of 4 bytes 266 // (unless there are fewer bytes left) 267 Size = Res ? (MaxInstBytesNum - Bytes.size()) 268 : std::min((size_t)4, Bytes_.size()); 269 return Res; 270 } 271 272 DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { 273 if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { 274 if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1) 275 // VOPC - insert clamp 276 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); 277 } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 278 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst); 279 if (SDst != -1) { 280 // VOPC - insert VCC register as sdst 281 insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC), 282 AMDGPU::OpName::sdst); 283 } else { 284 // VOP1/2 - insert omod if present in instruction 285 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod); 286 } 287 } 288 return MCDisassembler::Success; 289 } 290 291 // Note that MIMG format provides no information about VADDR size. 292 // Consequently, decoded instructions always show address 293 // as if it has 1 dword, which could be not really so. 294 DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { 295 296 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 297 AMDGPU::OpName::vdst); 298 299 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 300 AMDGPU::OpName::vdata); 301 302 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 303 AMDGPU::OpName::dmask); 304 305 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 306 AMDGPU::OpName::tfe); 307 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), 308 AMDGPU::OpName::d16); 309 310 assert(VDataIdx != -1); 311 assert(DMaskIdx != -1); 312 assert(TFEIdx != -1); 313 314 bool IsAtomic = (VDstIdx != -1); 315 bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4; 316 317 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf; 318 if (DMask == 0) 319 return MCDisassembler::Success; 320 321 unsigned DstSize = IsGather4 ? 4 : countPopulation(DMask); 322 if (DstSize == 1) 323 return MCDisassembler::Success; 324 325 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm(); 326 if (D16 && AMDGPU::hasPackedD16(STI)) { 327 DstSize = (DstSize + 1) / 2; 328 } 329 330 // FIXME: Add tfe support 331 if (MI.getOperand(TFEIdx).getImm()) 332 return MCDisassembler::Success; 333 334 int NewOpcode = -1; 335 336 if (IsGather4) { 337 if (D16 && AMDGPU::hasPackedD16(STI)) 338 NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), 2); 339 else 340 return MCDisassembler::Success; 341 } else { 342 NewOpcode = AMDGPU::getMaskedMIMGOp(MI.getOpcode(), DstSize); 343 if (NewOpcode == -1) 344 return MCDisassembler::Success; 345 } 346 347 auto RCID = MCII->get(NewOpcode).OpInfo[VDataIdx].RegClass; 348 349 // Get first subregister of VData 350 unsigned Vdata0 = MI.getOperand(VDataIdx).getReg(); 351 unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0); 352 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0; 353 354 // Widen the register to the correct number of enabled channels. 355 auto NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, 356 &MRI.getRegClass(RCID)); 357 if (NewVdata == AMDGPU::NoRegister) { 358 // It's possible to encode this such that the low register + enabled 359 // components exceeds the register count. 360 return MCDisassembler::Success; 361 } 362 363 MI.setOpcode(NewOpcode); 364 // vaddr will be always appear as a single VGPR. This will look different than 365 // how it is usually emitted because the number of register components is not 366 // in the instruction encoding. 367 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata); 368 369 if (IsAtomic) { 370 // Atomic operations have an additional operand (a copy of data) 371 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata); 372 } 373 374 return MCDisassembler::Success; 375 } 376 377 const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const { 378 return getContext().getRegisterInfo()-> 379 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]); 380 } 381 382 inline 383 MCOperand AMDGPUDisassembler::errOperand(unsigned V, 384 const Twine& ErrMsg) const { 385 *CommentStream << "Error: " + ErrMsg; 386 387 // ToDo: add support for error operands to MCInst.h 388 // return MCOperand::createError(V); 389 return MCOperand(); 390 } 391 392 inline 393 MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const { 394 return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI)); 395 } 396 397 inline 398 MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID, 399 unsigned Val) const { 400 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID]; 401 if (Val >= RegCl.getNumRegs()) 402 return errOperand(Val, Twine(getRegClassName(RegClassID)) + 403 ": unknown register " + Twine(Val)); 404 return createRegOperand(RegCl.getRegister(Val)); 405 } 406 407 inline 408 MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID, 409 unsigned Val) const { 410 // ToDo: SI/CI have 104 SGPRs, VI - 102 411 // Valery: here we accepting as much as we can, let assembler sort it out 412 int shift = 0; 413 switch (SRegClassID) { 414 case AMDGPU::SGPR_32RegClassID: 415 case AMDGPU::TTMP_32RegClassID: 416 break; 417 case AMDGPU::SGPR_64RegClassID: 418 case AMDGPU::TTMP_64RegClassID: 419 shift = 1; 420 break; 421 case AMDGPU::SGPR_128RegClassID: 422 case AMDGPU::TTMP_128RegClassID: 423 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in 424 // this bundle? 425 case AMDGPU::SGPR_256RegClassID: 426 case AMDGPU::TTMP_256RegClassID: 427 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in 428 // this bundle? 429 case AMDGPU::SGPR_512RegClassID: 430 case AMDGPU::TTMP_512RegClassID: 431 shift = 2; 432 break; 433 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in 434 // this bundle? 435 default: 436 llvm_unreachable("unhandled register class"); 437 } 438 439 if (Val % (1 << shift)) { 440 *CommentStream << "Warning: " << getRegClassName(SRegClassID) 441 << ": scalar reg isn't aligned " << Val; 442 } 443 444 return createRegOperand(SRegClassID, Val >> shift); 445 } 446 447 MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const { 448 return decodeSrcOp(OPW32, Val); 449 } 450 451 MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const { 452 return decodeSrcOp(OPW64, Val); 453 } 454 455 MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const { 456 return decodeSrcOp(OPW128, Val); 457 } 458 459 MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const { 460 return decodeSrcOp(OPW16, Val); 461 } 462 463 MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const { 464 return decodeSrcOp(OPWV216, Val); 465 } 466 467 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const { 468 // Some instructions have operand restrictions beyond what the encoding 469 // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra 470 // high bit. 471 Val &= 255; 472 473 return createRegOperand(AMDGPU::VGPR_32RegClassID, Val); 474 } 475 476 MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const { 477 return decodeSrcOp(OPW32, Val); 478 } 479 480 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const { 481 return createRegOperand(AMDGPU::VReg_64RegClassID, Val); 482 } 483 484 MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const { 485 return createRegOperand(AMDGPU::VReg_96RegClassID, Val); 486 } 487 488 MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const { 489 return createRegOperand(AMDGPU::VReg_128RegClassID, Val); 490 } 491 492 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const { 493 // table-gen generated disassembler doesn't care about operand types 494 // leaving only registry class so SSrc_32 operand turns into SReg_32 495 // and therefore we accept immediates and literals here as well 496 return decodeSrcOp(OPW32, Val); 497 } 498 499 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC( 500 unsigned Val) const { 501 // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI 502 return decodeOperand_SReg_32(Val); 503 } 504 505 MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI( 506 unsigned Val) const { 507 // SReg_32_XM0 is SReg_32 without EXEC_HI 508 return decodeOperand_SReg_32(Val); 509 } 510 511 MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const { 512 // table-gen generated disassembler doesn't care about operand types 513 // leaving only registry class so SSrc_32 operand turns into SReg_32 514 // and therefore we accept immediates and literals here as well 515 return decodeSrcOp(OPW32, Val); 516 } 517 518 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const { 519 return decodeSrcOp(OPW64, Val); 520 } 521 522 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const { 523 return decodeSrcOp(OPW64, Val); 524 } 525 526 MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const { 527 return decodeSrcOp(OPW128, Val); 528 } 529 530 MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const { 531 return decodeDstOp(OPW256, Val); 532 } 533 534 MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const { 535 return decodeDstOp(OPW512, Val); 536 } 537 538 MCOperand AMDGPUDisassembler::decodeLiteralConstant() const { 539 // For now all literal constants are supposed to be unsigned integer 540 // ToDo: deal with signed/unsigned 64-bit integer constants 541 // ToDo: deal with float/double constants 542 if (!HasLiteral) { 543 if (Bytes.size() < 4) { 544 return errOperand(0, "cannot read literal, inst bytes left " + 545 Twine(Bytes.size())); 546 } 547 HasLiteral = true; 548 Literal = eatBytes<uint32_t>(Bytes); 549 } 550 return MCOperand::createImm(Literal); 551 } 552 553 MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) { 554 using namespace AMDGPU::EncValues; 555 556 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX); 557 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ? 558 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) : 559 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm))); 560 // Cast prevents negative overflow. 561 } 562 563 static int64_t getInlineImmVal32(unsigned Imm) { 564 switch (Imm) { 565 case 240: 566 return FloatToBits(0.5f); 567 case 241: 568 return FloatToBits(-0.5f); 569 case 242: 570 return FloatToBits(1.0f); 571 case 243: 572 return FloatToBits(-1.0f); 573 case 244: 574 return FloatToBits(2.0f); 575 case 245: 576 return FloatToBits(-2.0f); 577 case 246: 578 return FloatToBits(4.0f); 579 case 247: 580 return FloatToBits(-4.0f); 581 case 248: // 1 / (2 * PI) 582 return 0x3e22f983; 583 default: 584 llvm_unreachable("invalid fp inline imm"); 585 } 586 } 587 588 static int64_t getInlineImmVal64(unsigned Imm) { 589 switch (Imm) { 590 case 240: 591 return DoubleToBits(0.5); 592 case 241: 593 return DoubleToBits(-0.5); 594 case 242: 595 return DoubleToBits(1.0); 596 case 243: 597 return DoubleToBits(-1.0); 598 case 244: 599 return DoubleToBits(2.0); 600 case 245: 601 return DoubleToBits(-2.0); 602 case 246: 603 return DoubleToBits(4.0); 604 case 247: 605 return DoubleToBits(-4.0); 606 case 248: // 1 / (2 * PI) 607 return 0x3fc45f306dc9c882; 608 default: 609 llvm_unreachable("invalid fp inline imm"); 610 } 611 } 612 613 static int64_t getInlineImmVal16(unsigned Imm) { 614 switch (Imm) { 615 case 240: 616 return 0x3800; 617 case 241: 618 return 0xB800; 619 case 242: 620 return 0x3C00; 621 case 243: 622 return 0xBC00; 623 case 244: 624 return 0x4000; 625 case 245: 626 return 0xC000; 627 case 246: 628 return 0x4400; 629 case 247: 630 return 0xC400; 631 case 248: // 1 / (2 * PI) 632 return 0x3118; 633 default: 634 llvm_unreachable("invalid fp inline imm"); 635 } 636 } 637 638 MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) { 639 assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN 640 && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX); 641 642 // ToDo: case 248: 1/(2*PI) - is allowed only on VI 643 switch (Width) { 644 case OPW32: 645 return MCOperand::createImm(getInlineImmVal32(Imm)); 646 case OPW64: 647 return MCOperand::createImm(getInlineImmVal64(Imm)); 648 case OPW16: 649 case OPWV216: 650 return MCOperand::createImm(getInlineImmVal16(Imm)); 651 default: 652 llvm_unreachable("implement me"); 653 } 654 } 655 656 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const { 657 using namespace AMDGPU; 658 659 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 660 switch (Width) { 661 default: // fall 662 case OPW32: 663 case OPW16: 664 case OPWV216: 665 return VGPR_32RegClassID; 666 case OPW64: return VReg_64RegClassID; 667 case OPW128: return VReg_128RegClassID; 668 } 669 } 670 671 unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const { 672 using namespace AMDGPU; 673 674 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 675 switch (Width) { 676 default: // fall 677 case OPW32: 678 case OPW16: 679 case OPWV216: 680 return SGPR_32RegClassID; 681 case OPW64: return SGPR_64RegClassID; 682 case OPW128: return SGPR_128RegClassID; 683 case OPW256: return SGPR_256RegClassID; 684 case OPW512: return SGPR_512RegClassID; 685 } 686 } 687 688 unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const { 689 using namespace AMDGPU; 690 691 assert(OPW_FIRST_ <= Width && Width < OPW_LAST_); 692 switch (Width) { 693 default: // fall 694 case OPW32: 695 case OPW16: 696 case OPWV216: 697 return TTMP_32RegClassID; 698 case OPW64: return TTMP_64RegClassID; 699 case OPW128: return TTMP_128RegClassID; 700 case OPW256: return TTMP_256RegClassID; 701 case OPW512: return TTMP_512RegClassID; 702 } 703 } 704 705 int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const { 706 using namespace AMDGPU::EncValues; 707 708 unsigned TTmpMin = isGFX9() ? TTMP_GFX9_MIN : TTMP_VI_MIN; 709 unsigned TTmpMax = isGFX9() ? TTMP_GFX9_MAX : TTMP_VI_MAX; 710 711 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1; 712 } 713 714 MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const { 715 using namespace AMDGPU::EncValues; 716 717 assert(Val < 512); // enum9 718 719 if (VGPR_MIN <= Val && Val <= VGPR_MAX) { 720 return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN); 721 } 722 if (Val <= SGPR_MAX) { 723 assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 724 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 725 } 726 727 int TTmpIdx = getTTmpIdx(Val); 728 if (TTmpIdx >= 0) { 729 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 730 } 731 732 if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) 733 return decodeIntImmed(Val); 734 735 if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) 736 return decodeFPImmed(Width, Val); 737 738 if (Val == LITERAL_CONST) 739 return decodeLiteralConstant(); 740 741 switch (Width) { 742 case OPW32: 743 case OPW16: 744 case OPWV216: 745 return decodeSpecialReg32(Val); 746 case OPW64: 747 return decodeSpecialReg64(Val); 748 default: 749 llvm_unreachable("unexpected immediate type"); 750 } 751 } 752 753 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const { 754 using namespace AMDGPU::EncValues; 755 756 assert(Val < 128); 757 assert(Width == OPW256 || Width == OPW512); 758 759 if (Val <= SGPR_MAX) { 760 assert(SGPR_MIN == 0); // "SGPR_MIN <= Val" is always true and causes compilation warning. 761 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); 762 } 763 764 int TTmpIdx = getTTmpIdx(Val); 765 if (TTmpIdx >= 0) { 766 return createSRegOperand(getTtmpClassId(Width), TTmpIdx); 767 } 768 769 llvm_unreachable("unknown dst register"); 770 } 771 772 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const { 773 using namespace AMDGPU; 774 775 switch (Val) { 776 case 102: return createRegOperand(FLAT_SCR_LO); 777 case 103: return createRegOperand(FLAT_SCR_HI); 778 case 104: return createRegOperand(XNACK_MASK_LO); 779 case 105: return createRegOperand(XNACK_MASK_HI); 780 case 106: return createRegOperand(VCC_LO); 781 case 107: return createRegOperand(VCC_HI); 782 case 108: return createRegOperand(TBA_LO); 783 case 109: return createRegOperand(TBA_HI); 784 case 110: return createRegOperand(TMA_LO); 785 case 111: return createRegOperand(TMA_HI); 786 case 124: return createRegOperand(M0); 787 case 126: return createRegOperand(EXEC_LO); 788 case 127: return createRegOperand(EXEC_HI); 789 case 235: return createRegOperand(SRC_SHARED_BASE); 790 case 236: return createRegOperand(SRC_SHARED_LIMIT); 791 case 237: return createRegOperand(SRC_PRIVATE_BASE); 792 case 238: return createRegOperand(SRC_PRIVATE_LIMIT); 793 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID); 794 // ToDo: no support for vccz register 795 case 251: break; 796 // ToDo: no support for execz register 797 case 252: break; 798 case 253: return createRegOperand(SCC); 799 case 254: return createRegOperand(LDS_DIRECT); 800 default: break; 801 } 802 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 803 } 804 805 MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { 806 using namespace AMDGPU; 807 808 switch (Val) { 809 case 102: return createRegOperand(FLAT_SCR); 810 case 104: return createRegOperand(XNACK_MASK); 811 case 106: return createRegOperand(VCC); 812 case 108: return createRegOperand(TBA); 813 case 110: return createRegOperand(TMA); 814 case 126: return createRegOperand(EXEC); 815 case 235: return createRegOperand(SRC_SHARED_BASE); 816 case 236: return createRegOperand(SRC_SHARED_LIMIT); 817 case 237: return createRegOperand(SRC_PRIVATE_BASE); 818 case 238: return createRegOperand(SRC_PRIVATE_LIMIT); 819 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID); 820 default: break; 821 } 822 return errOperand(Val, "unknown operand encoding " + Twine(Val)); 823 } 824 825 MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, 826 const unsigned Val) const { 827 using namespace AMDGPU::SDWA; 828 using namespace AMDGPU::EncValues; 829 830 if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) { 831 // XXX: cast to int is needed to avoid stupid warning: 832 // compare with unsigned is always true 833 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) && 834 Val <= SDWA9EncValues::SRC_VGPR_MAX) { 835 return createRegOperand(getVgprClassId(Width), 836 Val - SDWA9EncValues::SRC_VGPR_MIN); 837 } 838 if (SDWA9EncValues::SRC_SGPR_MIN <= Val && 839 Val <= SDWA9EncValues::SRC_SGPR_MAX) { 840 return createSRegOperand(getSgprClassId(Width), 841 Val - SDWA9EncValues::SRC_SGPR_MIN); 842 } 843 if (SDWA9EncValues::SRC_TTMP_MIN <= Val && 844 Val <= SDWA9EncValues::SRC_TTMP_MAX) { 845 return createSRegOperand(getTtmpClassId(Width), 846 Val - SDWA9EncValues::SRC_TTMP_MIN); 847 } 848 849 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN; 850 851 if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) 852 return decodeIntImmed(SVal); 853 854 if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX) 855 return decodeFPImmed(Width, SVal); 856 857 return decodeSpecialReg32(SVal); 858 } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { 859 return createRegOperand(getVgprClassId(Width), Val); 860 } 861 llvm_unreachable("unsupported target"); 862 } 863 864 MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const { 865 return decodeSDWASrc(OPW16, Val); 866 } 867 868 MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const { 869 return decodeSDWASrc(OPW32, Val); 870 } 871 872 MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const { 873 using namespace AMDGPU::SDWA; 874 875 assert(STI.getFeatureBits()[AMDGPU::FeatureGFX9] && 876 "SDWAVopcDst should be present only on GFX9"); 877 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { 878 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; 879 880 int TTmpIdx = getTTmpIdx(Val); 881 if (TTmpIdx >= 0) { 882 return createSRegOperand(getTtmpClassId(OPW64), TTmpIdx); 883 } else if (Val > AMDGPU::EncValues::SGPR_MAX) { 884 return decodeSpecialReg64(Val); 885 } else { 886 return createSRegOperand(getSgprClassId(OPW64), Val); 887 } 888 } else { 889 return createRegOperand(AMDGPU::VCC); 890 } 891 } 892 893 bool AMDGPUDisassembler::isVI() const { 894 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 895 } 896 897 bool AMDGPUDisassembler::isGFX9() const { 898 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 899 } 900 901 //===----------------------------------------------------------------------===// 902 // AMDGPUSymbolizer 903 //===----------------------------------------------------------------------===// 904 905 // Try to find symbol name for specified label 906 bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst, 907 raw_ostream &/*cStream*/, int64_t Value, 908 uint64_t /*Address*/, bool IsBranch, 909 uint64_t /*Offset*/, uint64_t /*InstSize*/) { 910 using SymbolInfoTy = std::tuple<uint64_t, StringRef, uint8_t>; 911 using SectionSymbolsTy = std::vector<SymbolInfoTy>; 912 913 if (!IsBranch) { 914 return false; 915 } 916 917 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo); 918 if (!Symbols) 919 return false; 920 921 auto Result = std::find_if(Symbols->begin(), Symbols->end(), 922 [Value](const SymbolInfoTy& Val) { 923 return std::get<0>(Val) == static_cast<uint64_t>(Value) 924 && std::get<2>(Val) == ELF::STT_NOTYPE; 925 }); 926 if (Result != Symbols->end()) { 927 auto *Sym = Ctx.getOrCreateSymbol(std::get<1>(*Result)); 928 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx); 929 Inst.addOperand(MCOperand::createExpr(Add)); 930 return true; 931 } 932 return false; 933 } 934 935 void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, 936 int64_t Value, 937 uint64_t Address) { 938 llvm_unreachable("unimplemented"); 939 } 940 941 //===----------------------------------------------------------------------===// 942 // Initialization 943 //===----------------------------------------------------------------------===// 944 945 static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/, 946 LLVMOpInfoCallback /*GetOpInfo*/, 947 LLVMSymbolLookupCallback /*SymbolLookUp*/, 948 void *DisInfo, 949 MCContext *Ctx, 950 std::unique_ptr<MCRelocationInfo> &&RelInfo) { 951 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo); 952 } 953 954 static MCDisassembler *createAMDGPUDisassembler(const Target &T, 955 const MCSubtargetInfo &STI, 956 MCContext &Ctx) { 957 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo()); 958 } 959 960 extern "C" void LLVMInitializeAMDGPUDisassembler() { 961 TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(), 962 createAMDGPUDisassembler); 963 TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(), 964 createAMDGPUSymbolizer); 965 } 966