1 //===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AArch64ExternalSymbolizer.h" 10 #include "Utils/AArch64BaseInfo.h" 11 #include "llvm/MC/MCContext.h" 12 #include "llvm/MC/MCExpr.h" 13 #include "llvm/MC/MCInst.h" 14 #include "llvm/MC/MCRegisterInfo.h" 15 #include "llvm/Support/Format.h" 16 #include "llvm/Support/raw_ostream.h" 17 18 using namespace llvm; 19 20 #define DEBUG_TYPE "aarch64-disassembler" 21 22 static MCSymbolRefExpr::VariantKind 23 getVariant(uint64_t LLVMDisassembler_VariantKind) { 24 switch (LLVMDisassembler_VariantKind) { 25 case LLVMDisassembler_VariantKind_None: 26 return MCSymbolRefExpr::VK_None; 27 case LLVMDisassembler_VariantKind_ARM64_PAGE: 28 return MCSymbolRefExpr::VK_PAGE; 29 case LLVMDisassembler_VariantKind_ARM64_PAGEOFF: 30 return MCSymbolRefExpr::VK_PAGEOFF; 31 case LLVMDisassembler_VariantKind_ARM64_GOTPAGE: 32 return MCSymbolRefExpr::VK_GOTPAGE; 33 case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF: 34 return MCSymbolRefExpr::VK_GOTPAGEOFF; 35 case LLVMDisassembler_VariantKind_ARM64_TLVP: 36 return MCSymbolRefExpr::VK_TLVPPAGE; 37 case LLVMDisassembler_VariantKind_ARM64_TLVOFF: 38 return MCSymbolRefExpr::VK_TLVPPAGEOFF; 39 default: 40 llvm_unreachable("bad LLVMDisassembler_VariantKind"); 41 } 42 } 43 44 /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic 45 /// operand in place of the immediate Value in the MCInst. The immediate 46 /// Value has not had any PC adjustment made by the caller. If the instruction 47 /// is a branch that adds the PC to the immediate Value then isBranch is 48 /// Success, else Fail. If GetOpInfo is non-null, then it is called to get any 49 /// symbolic information at the Address for this instrution. If that returns 50 /// non-zero then the symbolic information it returns is used to create an 51 /// MCExpr and that is added as an operand to the MCInst. If GetOpInfo() 52 /// returns zero and isBranch is Success then a symbol look up for 53 /// Address + Value is done and if a symbol is found an MCExpr is created with 54 /// that, else an MCExpr with Address + Value is created. If GetOpInfo() 55 /// returns zero and isBranch is Fail then the Opcode of the MCInst is 56 /// tested and for ADRP an other instructions that help to load of pointers 57 /// a symbol look up is done to see it is returns a specific reference type 58 /// to add to the comment stream. This function returns Success if it adds 59 /// an operand to the MCInst and Fail otherwise. 60 bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( 61 MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, 62 bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) { 63 if (!SymbolLookUp) 64 return false; 65 // FIXME: This method shares a lot of code with 66 // MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible 67 // refactor the MCExternalSymbolizer interface to allow more of this 68 // implementation to be shared. 69 // 70 struct LLVMOpInfo1 SymbolicOp; 71 memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); 72 SymbolicOp.Value = Value; 73 uint64_t ReferenceType; 74 const char *ReferenceName; 75 if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize, 76 1, &SymbolicOp)) { 77 if (IsBranch) { 78 ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; 79 const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, 80 Address, &ReferenceName); 81 if (Name) { 82 SymbolicOp.AddSymbol.Name = Name; 83 SymbolicOp.AddSymbol.Present = true; 84 SymbolicOp.Value = 0; 85 } else { 86 SymbolicOp.Value = Address + Value; 87 } 88 if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) 89 CommentStream << "symbol stub for: " << ReferenceName; 90 else if (ReferenceType == 91 LLVMDisassembler_ReferenceType_Out_Objc_Message) 92 CommentStream << "Objc message: " << ReferenceName; 93 } else if (MI.getOpcode() == AArch64::ADRP) { 94 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP; 95 // otool expects the fully encoded ADRP instruction to be passed in as 96 // the value here, so reconstruct it: 97 const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); 98 uint32_t EncodedInst = 0x90000000; 99 EncodedInst |= (Value & 0x3) << 29; // immlo 100 EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi 101 EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg 102 SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, 103 &ReferenceName); 104 CommentStream << format("0x%llx", (0xfffffffffffff000LL & Address) + 105 Value * 0x1000); 106 } else if (MI.getOpcode() == AArch64::ADDXri || 107 MI.getOpcode() == AArch64::LDRXui || 108 MI.getOpcode() == AArch64::LDRXl || 109 MI.getOpcode() == AArch64::ADR) { 110 if (MI.getOpcode() == AArch64::ADDXri) 111 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri; 112 else if (MI.getOpcode() == AArch64::LDRXui) 113 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui; 114 if (MI.getOpcode() == AArch64::LDRXl) { 115 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl; 116 SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, 117 &ReferenceName); 118 } else if (MI.getOpcode() == AArch64::ADR) { 119 ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR; 120 SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, 121 &ReferenceName); 122 } else { 123 const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); 124 // otool expects the fully encoded ADD/LDR instruction to be passed in 125 // as the value here, so reconstruct it: 126 unsigned EncodedInst = 127 MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000; 128 EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD] 129 EncodedInst |= 130 MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn 131 EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd 132 133 SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, 134 &ReferenceName); 135 } 136 if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) 137 CommentStream << "literal pool symbol address: " << ReferenceName; 138 else if (ReferenceType == 139 LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) { 140 CommentStream << "literal pool for: \""; 141 CommentStream.write_escaped(ReferenceName); 142 CommentStream << "\""; 143 } else if (ReferenceType == 144 LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) 145 CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\""; 146 else if (ReferenceType == 147 LLVMDisassembler_ReferenceType_Out_Objc_Message) 148 CommentStream << "Objc message: " << ReferenceName; 149 else if (ReferenceType == 150 LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) 151 CommentStream << "Objc message ref: " << ReferenceName; 152 else if (ReferenceType == 153 LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) 154 CommentStream << "Objc selector ref: " << ReferenceName; 155 else if (ReferenceType == 156 LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) 157 CommentStream << "Objc class ref: " << ReferenceName; 158 // For these instructions, the SymbolLookUp() above is just to get the 159 // ReferenceType and ReferenceName. We want to make sure not to 160 // fall through so we don't build an MCExpr to leave the disassembly 161 // of the immediate values of these instructions to the InstPrinter. 162 return false; 163 } else { 164 return false; 165 } 166 } 167 168 const MCExpr *Add = nullptr; 169 if (SymbolicOp.AddSymbol.Present) { 170 if (SymbolicOp.AddSymbol.Name) { 171 StringRef Name(SymbolicOp.AddSymbol.Name); 172 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); 173 MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind); 174 if (Variant != MCSymbolRefExpr::VK_None) 175 Add = MCSymbolRefExpr::create(Sym, Variant, Ctx); 176 else 177 Add = MCSymbolRefExpr::create(Sym, Ctx); 178 } else { 179 Add = MCConstantExpr::create(SymbolicOp.AddSymbol.Value, Ctx); 180 } 181 } 182 183 const MCExpr *Sub = nullptr; 184 if (SymbolicOp.SubtractSymbol.Present) { 185 if (SymbolicOp.SubtractSymbol.Name) { 186 StringRef Name(SymbolicOp.SubtractSymbol.Name); 187 MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); 188 Sub = MCSymbolRefExpr::create(Sym, Ctx); 189 } else { 190 Sub = MCConstantExpr::create(SymbolicOp.SubtractSymbol.Value, Ctx); 191 } 192 } 193 194 const MCExpr *Off = nullptr; 195 if (SymbolicOp.Value != 0) 196 Off = MCConstantExpr::create(SymbolicOp.Value, Ctx); 197 198 const MCExpr *Expr; 199 if (Sub) { 200 const MCExpr *LHS; 201 if (Add) 202 LHS = MCBinaryExpr::createSub(Add, Sub, Ctx); 203 else 204 LHS = MCUnaryExpr::createMinus(Sub, Ctx); 205 if (Off) 206 Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx); 207 else 208 Expr = LHS; 209 } else if (Add) { 210 if (Off) 211 Expr = MCBinaryExpr::createAdd(Add, Off, Ctx); 212 else 213 Expr = Add; 214 } else { 215 if (Off) 216 Expr = Off; 217 else 218 Expr = MCConstantExpr::create(0, Ctx); 219 } 220 221 MI.addOperand(MCOperand::createExpr(Expr)); 222 223 return true; 224 } 225