xref: /llvm-project/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp (revision a41922ad7530ef5e311afbff2721e69cbf520890)
1 //===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AArch64ExternalSymbolizer.h"
10 #include "Utils/AArch64BaseInfo.h"
11 #include "llvm/MC/MCContext.h"
12 #include "llvm/MC/MCExpr.h"
13 #include "llvm/MC/MCInst.h"
14 #include "llvm/MC/MCRegisterInfo.h"
15 #include "llvm/Support/Format.h"
16 #include "llvm/Support/raw_ostream.h"
17 
18 using namespace llvm;
19 
20 #define DEBUG_TYPE "aarch64-disassembler"
21 
22 static MCSymbolRefExpr::VariantKind
23 getVariant(uint64_t LLVMDisassembler_VariantKind) {
24   switch (LLVMDisassembler_VariantKind) {
25   case LLVMDisassembler_VariantKind_None:
26     return MCSymbolRefExpr::VK_None;
27   case LLVMDisassembler_VariantKind_ARM64_PAGE:
28     return MCSymbolRefExpr::VK_PAGE;
29   case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
30     return MCSymbolRefExpr::VK_PAGEOFF;
31   case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
32     return MCSymbolRefExpr::VK_GOTPAGE;
33   case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
34     return MCSymbolRefExpr::VK_GOTPAGEOFF;
35   case LLVMDisassembler_VariantKind_ARM64_TLVP:
36     return MCSymbolRefExpr::VK_TLVPPAGE;
37   case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
38     return MCSymbolRefExpr::VK_TLVPPAGEOFF;
39   default:
40     llvm_unreachable("bad LLVMDisassembler_VariantKind");
41   }
42 }
43 
44 /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
45 /// operand in place of the immediate Value in the MCInst.  The immediate
46 /// Value has not had any PC adjustment made by the caller. If the instruction
47 /// is a branch that adds the PC to the immediate Value then isBranch is
48 /// Success, else Fail. If GetOpInfo is non-null, then it is called to get any
49 /// symbolic information at the Address for this instrution.  If that returns
50 /// non-zero then the symbolic information it returns is used to create an
51 /// MCExpr and that is added as an operand to the MCInst.  If GetOpInfo()
52 /// returns zero and isBranch is Success then a symbol look up for
53 /// Address + Value is done and if a symbol is found an MCExpr is created with
54 /// that, else an MCExpr with Address + Value is created.  If GetOpInfo()
55 /// returns zero and isBranch is Fail then the Opcode of the MCInst is
56 /// tested and for ADRP an other instructions that help to load of pointers
57 /// a symbol look up is done to see it is returns a specific reference type
58 /// to add to the comment stream.  This function returns Success if it adds
59 /// an operand to the MCInst and Fail otherwise.
60 bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
61     MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
62     bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
63   if (!SymbolLookUp)
64     return false;
65   // FIXME: This method shares a lot of code with
66   //        MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
67   //        refactor the MCExternalSymbolizer interface to allow more of this
68   //        implementation to be shared.
69   //
70   struct LLVMOpInfo1 SymbolicOp;
71   memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
72   SymbolicOp.Value = Value;
73   uint64_t ReferenceType;
74   const char *ReferenceName;
75   if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize,
76                                1, &SymbolicOp)) {
77     if (IsBranch) {
78       ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
79       const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
80                                       Address, &ReferenceName);
81       if (Name) {
82         SymbolicOp.AddSymbol.Name = Name;
83         SymbolicOp.AddSymbol.Present = true;
84         SymbolicOp.Value = 0;
85       } else {
86         SymbolicOp.Value = Address + Value;
87       }
88       if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
89         CommentStream << "symbol stub for: " << ReferenceName;
90       else if (ReferenceType ==
91                LLVMDisassembler_ReferenceType_Out_Objc_Message)
92         CommentStream << "Objc message: " << ReferenceName;
93     } else if (MI.getOpcode() == AArch64::ADRP) {
94         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
95         // otool expects the fully encoded ADRP instruction to be passed in as
96         // the value here, so reconstruct it:
97         const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
98         uint32_t EncodedInst = 0x90000000;
99         EncodedInst |= (Value & 0x3) << 29; // immlo
100         EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi
101         EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg
102         SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
103                      &ReferenceName);
104         CommentStream << format("0x%llx", (0xfffffffffffff000LL & Address) +
105                                               Value * 0x1000);
106     } else if (MI.getOpcode() == AArch64::ADDXri ||
107                MI.getOpcode() == AArch64::LDRXui ||
108                MI.getOpcode() == AArch64::LDRXl ||
109                MI.getOpcode() == AArch64::ADR) {
110       if (MI.getOpcode() == AArch64::ADDXri)
111         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
112       else if (MI.getOpcode() == AArch64::LDRXui)
113         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
114       if (MI.getOpcode() == AArch64::LDRXl) {
115         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
116         SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
117                      &ReferenceName);
118       } else if (MI.getOpcode() == AArch64::ADR) {
119         ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
120         SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
121                             &ReferenceName);
122       } else {
123         const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
124         // otool expects the fully encoded ADD/LDR instruction to be passed in
125         // as the value here, so reconstruct it:
126         unsigned EncodedInst =
127           MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000;
128         EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD]
129         EncodedInst |=
130           MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn
131         EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd
132 
133         SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
134                      &ReferenceName);
135       }
136       if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
137         CommentStream << "literal pool symbol address: " << ReferenceName;
138       else if (ReferenceType ==
139                LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) {
140         CommentStream << "literal pool for: \"";
141         CommentStream.write_escaped(ReferenceName);
142         CommentStream << "\"";
143       } else if (ReferenceType ==
144                LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
145         CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
146       else if (ReferenceType ==
147                LLVMDisassembler_ReferenceType_Out_Objc_Message)
148         CommentStream << "Objc message: " << ReferenceName;
149       else if (ReferenceType ==
150                LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
151         CommentStream << "Objc message ref: " << ReferenceName;
152       else if (ReferenceType ==
153                LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
154         CommentStream << "Objc selector ref: " << ReferenceName;
155       else if (ReferenceType ==
156                LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
157         CommentStream << "Objc class ref: " << ReferenceName;
158       // For these instructions, the SymbolLookUp() above is just to get the
159       // ReferenceType and ReferenceName.  We want to make sure not to
160       // fall through so we don't build an MCExpr to leave the disassembly
161       // of the immediate values of these instructions to the InstPrinter.
162       return false;
163     } else {
164       return false;
165     }
166   }
167 
168   const MCExpr *Add = nullptr;
169   if (SymbolicOp.AddSymbol.Present) {
170     if (SymbolicOp.AddSymbol.Name) {
171       StringRef Name(SymbolicOp.AddSymbol.Name);
172       MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
173       MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
174       if (Variant != MCSymbolRefExpr::VK_None)
175         Add = MCSymbolRefExpr::create(Sym, Variant, Ctx);
176       else
177         Add = MCSymbolRefExpr::create(Sym, Ctx);
178     } else {
179       Add = MCConstantExpr::create(SymbolicOp.AddSymbol.Value, Ctx);
180     }
181   }
182 
183   const MCExpr *Sub = nullptr;
184   if (SymbolicOp.SubtractSymbol.Present) {
185     if (SymbolicOp.SubtractSymbol.Name) {
186       StringRef Name(SymbolicOp.SubtractSymbol.Name);
187       MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
188       Sub = MCSymbolRefExpr::create(Sym, Ctx);
189     } else {
190       Sub = MCConstantExpr::create(SymbolicOp.SubtractSymbol.Value, Ctx);
191     }
192   }
193 
194   const MCExpr *Off = nullptr;
195   if (SymbolicOp.Value != 0)
196     Off = MCConstantExpr::create(SymbolicOp.Value, Ctx);
197 
198   const MCExpr *Expr;
199   if (Sub) {
200     const MCExpr *LHS;
201     if (Add)
202       LHS = MCBinaryExpr::createSub(Add, Sub, Ctx);
203     else
204       LHS = MCUnaryExpr::createMinus(Sub, Ctx);
205     if (Off)
206       Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx);
207     else
208       Expr = LHS;
209   } else if (Add) {
210     if (Off)
211       Expr = MCBinaryExpr::createAdd(Add, Off, Ctx);
212     else
213       Expr = Add;
214   } else {
215     if (Off)
216       Expr = Off;
217     else
218       Expr = MCConstantExpr::create(0, Ctx);
219   }
220 
221   MI.addOperand(MCOperand::createExpr(Expr));
222 
223   return true;
224 }
225