xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
10b57cec5SDimitry Andric //===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "AArch64ExternalSymbolizer.h"
100b57cec5SDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h"
110b57cec5SDimitry Andric #include "Utils/AArch64BaseInfo.h"
120b57cec5SDimitry Andric #include "llvm/MC/MCContext.h"
130b57cec5SDimitry Andric #include "llvm/MC/MCExpr.h"
140b57cec5SDimitry Andric #include "llvm/MC/MCInst.h"
150b57cec5SDimitry Andric #include "llvm/MC/MCRegisterInfo.h"
160b57cec5SDimitry Andric #include "llvm/Support/Format.h"
170b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric using namespace llvm;
200b57cec5SDimitry Andric 
210b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-disassembler"
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric static MCSymbolRefExpr::VariantKind
getVariant(uint64_t LLVMDisassembler_VariantKind)240b57cec5SDimitry Andric getVariant(uint64_t LLVMDisassembler_VariantKind) {
250b57cec5SDimitry Andric   switch (LLVMDisassembler_VariantKind) {
260b57cec5SDimitry Andric   case LLVMDisassembler_VariantKind_None:
270b57cec5SDimitry Andric     return MCSymbolRefExpr::VK_None;
280b57cec5SDimitry Andric   case LLVMDisassembler_VariantKind_ARM64_PAGE:
290b57cec5SDimitry Andric     return MCSymbolRefExpr::VK_PAGE;
300b57cec5SDimitry Andric   case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
310b57cec5SDimitry Andric     return MCSymbolRefExpr::VK_PAGEOFF;
320b57cec5SDimitry Andric   case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
330b57cec5SDimitry Andric     return MCSymbolRefExpr::VK_GOTPAGE;
340b57cec5SDimitry Andric   case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
350b57cec5SDimitry Andric     return MCSymbolRefExpr::VK_GOTPAGEOFF;
360b57cec5SDimitry Andric   case LLVMDisassembler_VariantKind_ARM64_TLVP:
37349cc55cSDimitry Andric     return MCSymbolRefExpr::VK_TLVPPAGE;
380b57cec5SDimitry Andric   case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
39349cc55cSDimitry Andric     return MCSymbolRefExpr::VK_TLVPPAGEOFF;
400b57cec5SDimitry Andric   default:
410b57cec5SDimitry Andric     llvm_unreachable("bad LLVMDisassembler_VariantKind");
420b57cec5SDimitry Andric   }
430b57cec5SDimitry Andric }
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
460b57cec5SDimitry Andric /// operand in place of the immediate Value in the MCInst.  The immediate
470b57cec5SDimitry Andric /// Value has not had any PC adjustment made by the caller. If the instruction
480b57cec5SDimitry Andric /// is a branch that adds the PC to the immediate Value then isBranch is
490b57cec5SDimitry Andric /// Success, else Fail. If GetOpInfo is non-null, then it is called to get any
500b57cec5SDimitry Andric /// symbolic information at the Address for this instrution.  If that returns
510b57cec5SDimitry Andric /// non-zero then the symbolic information it returns is used to create an
520b57cec5SDimitry Andric /// MCExpr and that is added as an operand to the MCInst.  If GetOpInfo()
530b57cec5SDimitry Andric /// returns zero and isBranch is Success then a symbol look up for
540b57cec5SDimitry Andric /// Address + Value is done and if a symbol is found an MCExpr is created with
550b57cec5SDimitry Andric /// that, else an MCExpr with Address + Value is created.  If GetOpInfo()
560b57cec5SDimitry Andric /// returns zero and isBranch is Fail then the Opcode of the MCInst is
570b57cec5SDimitry Andric /// tested and for ADRP an other instructions that help to load of pointers
580b57cec5SDimitry Andric /// a symbol look up is done to see it is returns a specific reference type
590b57cec5SDimitry Andric /// to add to the comment stream.  This function returns Success if it adds
600b57cec5SDimitry Andric /// an operand to the MCInst and Fail otherwise.
tryAddingSymbolicOperand(MCInst & MI,raw_ostream & CommentStream,int64_t Value,uint64_t Address,bool IsBranch,uint64_t Offset,uint64_t OpSize,uint64_t InstSize)610b57cec5SDimitry Andric bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
620b57cec5SDimitry Andric     MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
63*81ad6265SDimitry Andric     bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
640b57cec5SDimitry Andric   if (!SymbolLookUp)
650b57cec5SDimitry Andric     return false;
660b57cec5SDimitry Andric   // FIXME: This method shares a lot of code with
670b57cec5SDimitry Andric   //        MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
680b57cec5SDimitry Andric   //        refactor the MCExternalSymbolizer interface to allow more of this
690b57cec5SDimitry Andric   //        implementation to be shared.
700b57cec5SDimitry Andric   //
710b57cec5SDimitry Andric   struct LLVMOpInfo1 SymbolicOp;
720b57cec5SDimitry Andric   memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
730b57cec5SDimitry Andric   SymbolicOp.Value = Value;
740b57cec5SDimitry Andric   uint64_t ReferenceType;
750b57cec5SDimitry Andric   const char *ReferenceName;
76*81ad6265SDimitry Andric   if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize,
77*81ad6265SDimitry Andric                                1, &SymbolicOp)) {
780b57cec5SDimitry Andric     if (IsBranch) {
790b57cec5SDimitry Andric       ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
800b57cec5SDimitry Andric       const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
810b57cec5SDimitry Andric                                       Address, &ReferenceName);
820b57cec5SDimitry Andric       if (Name) {
830b57cec5SDimitry Andric         SymbolicOp.AddSymbol.Name = Name;
840b57cec5SDimitry Andric         SymbolicOp.AddSymbol.Present = true;
850b57cec5SDimitry Andric         SymbolicOp.Value = 0;
860b57cec5SDimitry Andric       } else {
870b57cec5SDimitry Andric         SymbolicOp.Value = Address + Value;
880b57cec5SDimitry Andric       }
890b57cec5SDimitry Andric       if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
900b57cec5SDimitry Andric         CommentStream << "symbol stub for: " << ReferenceName;
910b57cec5SDimitry Andric       else if (ReferenceType ==
920b57cec5SDimitry Andric                LLVMDisassembler_ReferenceType_Out_Objc_Message)
930b57cec5SDimitry Andric         CommentStream << "Objc message: " << ReferenceName;
940b57cec5SDimitry Andric     } else if (MI.getOpcode() == AArch64::ADRP) {
950b57cec5SDimitry Andric         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
960b57cec5SDimitry Andric         // otool expects the fully encoded ADRP instruction to be passed in as
970b57cec5SDimitry Andric         // the value here, so reconstruct it:
980b57cec5SDimitry Andric         const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
990b57cec5SDimitry Andric         uint32_t EncodedInst = 0x90000000;
1000b57cec5SDimitry Andric         EncodedInst |= (Value & 0x3) << 29; // immlo
1010b57cec5SDimitry Andric         EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi
1020b57cec5SDimitry Andric         EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg
1030b57cec5SDimitry Andric         SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
1040b57cec5SDimitry Andric                      &ReferenceName);
1050b57cec5SDimitry Andric         CommentStream << format("0x%llx", (0xfffffffffffff000LL & Address) +
1060b57cec5SDimitry Andric                                               Value * 0x1000);
1070b57cec5SDimitry Andric     } else if (MI.getOpcode() == AArch64::ADDXri ||
1080b57cec5SDimitry Andric                MI.getOpcode() == AArch64::LDRXui ||
1090b57cec5SDimitry Andric                MI.getOpcode() == AArch64::LDRXl ||
1100b57cec5SDimitry Andric                MI.getOpcode() == AArch64::ADR) {
1110b57cec5SDimitry Andric       if (MI.getOpcode() == AArch64::ADDXri)
1120b57cec5SDimitry Andric         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
1130b57cec5SDimitry Andric       else if (MI.getOpcode() == AArch64::LDRXui)
1140b57cec5SDimitry Andric         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
1150b57cec5SDimitry Andric       if (MI.getOpcode() == AArch64::LDRXl) {
1160b57cec5SDimitry Andric         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
1170b57cec5SDimitry Andric         SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
1180b57cec5SDimitry Andric                      &ReferenceName);
1190b57cec5SDimitry Andric       } else if (MI.getOpcode() == AArch64::ADR) {
1200b57cec5SDimitry Andric         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
1210b57cec5SDimitry Andric         SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
1220b57cec5SDimitry Andric                             &ReferenceName);
1230b57cec5SDimitry Andric       } else {
1240b57cec5SDimitry Andric         const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
1250b57cec5SDimitry Andric         // otool expects the fully encoded ADD/LDR instruction to be passed in
1260b57cec5SDimitry Andric         // as the value here, so reconstruct it:
1270b57cec5SDimitry Andric         unsigned EncodedInst =
1280b57cec5SDimitry Andric           MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000;
1290b57cec5SDimitry Andric         EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD]
1300b57cec5SDimitry Andric         EncodedInst |=
1310b57cec5SDimitry Andric           MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn
1320b57cec5SDimitry Andric         EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd
1330b57cec5SDimitry Andric 
1340b57cec5SDimitry Andric         SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
1350b57cec5SDimitry Andric                      &ReferenceName);
1360b57cec5SDimitry Andric       }
1370b57cec5SDimitry Andric       if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
1380b57cec5SDimitry Andric         CommentStream << "literal pool symbol address: " << ReferenceName;
1390b57cec5SDimitry Andric       else if (ReferenceType ==
1400b57cec5SDimitry Andric                LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) {
1410b57cec5SDimitry Andric         CommentStream << "literal pool for: \"";
1420b57cec5SDimitry Andric         CommentStream.write_escaped(ReferenceName);
1430b57cec5SDimitry Andric         CommentStream << "\"";
1440b57cec5SDimitry Andric       } else if (ReferenceType ==
1450b57cec5SDimitry Andric                LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
1460b57cec5SDimitry Andric         CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
1470b57cec5SDimitry Andric       else if (ReferenceType ==
1480b57cec5SDimitry Andric                LLVMDisassembler_ReferenceType_Out_Objc_Message)
1490b57cec5SDimitry Andric         CommentStream << "Objc message: " << ReferenceName;
1500b57cec5SDimitry Andric       else if (ReferenceType ==
1510b57cec5SDimitry Andric                LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
1520b57cec5SDimitry Andric         CommentStream << "Objc message ref: " << ReferenceName;
1530b57cec5SDimitry Andric       else if (ReferenceType ==
1540b57cec5SDimitry Andric                LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
1550b57cec5SDimitry Andric         CommentStream << "Objc selector ref: " << ReferenceName;
1560b57cec5SDimitry Andric       else if (ReferenceType ==
1570b57cec5SDimitry Andric                LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
1580b57cec5SDimitry Andric         CommentStream << "Objc class ref: " << ReferenceName;
1590b57cec5SDimitry Andric       // For these instructions, the SymbolLookUp() above is just to get the
1600b57cec5SDimitry Andric       // ReferenceType and ReferenceName.  We want to make sure not to
1610b57cec5SDimitry Andric       // fall through so we don't build an MCExpr to leave the disassembly
1620b57cec5SDimitry Andric       // of the immediate values of these instructions to the InstPrinter.
1630b57cec5SDimitry Andric       return false;
1640b57cec5SDimitry Andric     } else {
1650b57cec5SDimitry Andric       return false;
1660b57cec5SDimitry Andric     }
1670b57cec5SDimitry Andric   }
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric   const MCExpr *Add = nullptr;
1700b57cec5SDimitry Andric   if (SymbolicOp.AddSymbol.Present) {
1710b57cec5SDimitry Andric     if (SymbolicOp.AddSymbol.Name) {
1720b57cec5SDimitry Andric       StringRef Name(SymbolicOp.AddSymbol.Name);
1730b57cec5SDimitry Andric       MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
1740b57cec5SDimitry Andric       MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
1750b57cec5SDimitry Andric       if (Variant != MCSymbolRefExpr::VK_None)
1760b57cec5SDimitry Andric         Add = MCSymbolRefExpr::create(Sym, Variant, Ctx);
1770b57cec5SDimitry Andric       else
1780b57cec5SDimitry Andric         Add = MCSymbolRefExpr::create(Sym, Ctx);
1790b57cec5SDimitry Andric     } else {
1800b57cec5SDimitry Andric       Add = MCConstantExpr::create(SymbolicOp.AddSymbol.Value, Ctx);
1810b57cec5SDimitry Andric     }
1820b57cec5SDimitry Andric   }
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric   const MCExpr *Sub = nullptr;
1850b57cec5SDimitry Andric   if (SymbolicOp.SubtractSymbol.Present) {
1860b57cec5SDimitry Andric     if (SymbolicOp.SubtractSymbol.Name) {
1870b57cec5SDimitry Andric       StringRef Name(SymbolicOp.SubtractSymbol.Name);
1880b57cec5SDimitry Andric       MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
1890b57cec5SDimitry Andric       Sub = MCSymbolRefExpr::create(Sym, Ctx);
1900b57cec5SDimitry Andric     } else {
1910b57cec5SDimitry Andric       Sub = MCConstantExpr::create(SymbolicOp.SubtractSymbol.Value, Ctx);
1920b57cec5SDimitry Andric     }
1930b57cec5SDimitry Andric   }
1940b57cec5SDimitry Andric 
1950b57cec5SDimitry Andric   const MCExpr *Off = nullptr;
1960b57cec5SDimitry Andric   if (SymbolicOp.Value != 0)
1970b57cec5SDimitry Andric     Off = MCConstantExpr::create(SymbolicOp.Value, Ctx);
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric   const MCExpr *Expr;
2000b57cec5SDimitry Andric   if (Sub) {
2010b57cec5SDimitry Andric     const MCExpr *LHS;
2020b57cec5SDimitry Andric     if (Add)
2030b57cec5SDimitry Andric       LHS = MCBinaryExpr::createSub(Add, Sub, Ctx);
2040b57cec5SDimitry Andric     else
2050b57cec5SDimitry Andric       LHS = MCUnaryExpr::createMinus(Sub, Ctx);
2060b57cec5SDimitry Andric     if (Off)
2070b57cec5SDimitry Andric       Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx);
2080b57cec5SDimitry Andric     else
2090b57cec5SDimitry Andric       Expr = LHS;
2100b57cec5SDimitry Andric   } else if (Add) {
2110b57cec5SDimitry Andric     if (Off)
2120b57cec5SDimitry Andric       Expr = MCBinaryExpr::createAdd(Add, Off, Ctx);
2130b57cec5SDimitry Andric     else
2140b57cec5SDimitry Andric       Expr = Add;
2150b57cec5SDimitry Andric   } else {
2160b57cec5SDimitry Andric     if (Off)
2170b57cec5SDimitry Andric       Expr = Off;
2180b57cec5SDimitry Andric     else
2190b57cec5SDimitry Andric       Expr = MCConstantExpr::create(0, Ctx);
2200b57cec5SDimitry Andric   }
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric   MI.addOperand(MCOperand::createExpr(Expr));
2230b57cec5SDimitry Andric 
2240b57cec5SDimitry Andric   return true;
2250b57cec5SDimitry Andric }
226