xref: /llvm-project/bolt/lib/Target/X86/X86MCSymbolizer.cpp (revision 52cf07116bf0a8cab87b0f55176d198bcaa02575)
1 //===- bolt/Target/X86/X86MCSymbolizer.cpp --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "X86MCSymbolizer.h"
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "bolt/Core/BinaryContext.h"
12 #include "bolt/Core/BinaryFunction.h"
13 #include "bolt/Core/MCPlusBuilder.h"
14 #include "bolt/Core/Relocation.h"
15 #include "llvm/MC/MCInst.h"
16 #include "llvm/MC/MCRegisterInfo.h"
17 
18 #define DEBUG_TYPE "bolt-symbolizer"
19 
20 namespace llvm {
21 namespace bolt {
22 
~X86MCSymbolizer()23 X86MCSymbolizer::~X86MCSymbolizer() {}
24 
tryAddingSymbolicOperand(MCInst & Inst,raw_ostream & CStream,int64_t Value,uint64_t InstAddress,bool IsBranch,uint64_t ImmOffset,uint64_t ImmSize,uint64_t InstSize)25 bool X86MCSymbolizer::tryAddingSymbolicOperand(
26     MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress,
27     bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) {
28   if (IsBranch)
29     return false;
30 
31   // Ignore implicit operands.
32   if (ImmSize == 0)
33     return false;
34 
35   BinaryContext &BC = Function.getBinaryContext();
36   MCContext *Ctx = BC.Ctx.get();
37 
38   if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst))
39     return false;
40 
41   /// Add symbolic operand to the instruction with an optional addend.
42   auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend) {
43     const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, *Ctx);
44     if (Addend)
45       Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Addend, *Ctx),
46                                      *Ctx);
47     Inst.addOperand(MCOperand::createExpr(Expr));
48   };
49 
50   // Check if the operand being added is a displacement part of a compound
51   // memory operand that uses PC-relative addressing. If it is, try to symbolize
52   // it without relocations. Return true on success, false otherwise.
53   auto processPCRelOperandNoRel = [&]() {
54     const int MemOp = BC.MIB->getMemoryOperandNo(Inst);
55     if (MemOp == -1)
56       return false;
57 
58     const unsigned DispOp = MemOp + X86::AddrDisp;
59     if (Inst.getNumOperands() != DispOp)
60       return false;
61 
62     const MCOperand &Base = Inst.getOperand(MemOp + X86::AddrBaseReg);
63     if (Base.getReg() != BC.MRI->getProgramCounter())
64       return false;
65 
66     const MCOperand &Scale = Inst.getOperand(MemOp + X86::AddrScaleAmt);
67     const MCOperand &Index = Inst.getOperand(MemOp + X86::AddrIndexReg);
68     if (Scale.getImm() != 0 && Index.getReg() != MCRegister::NoRegister)
69       return false;
70 
71     const MCSymbol *TargetSymbol;
72     uint64_t TargetOffset;
73 
74     if (!CreateNewSymbols) {
75       if (BinaryData *BD = BC.getBinaryDataContainingAddress(Value)) {
76         TargetSymbol = BD->getSymbol();
77         TargetOffset = Value - BD->getAddress();
78       } else {
79         return false;
80       }
81     } else {
82       std::tie(TargetSymbol, TargetOffset) =
83           BC.handleAddressRef(Value, Function, /*IsPCRel=*/true);
84     }
85 
86     addOperand(TargetSymbol, TargetOffset);
87 
88     return true;
89   };
90 
91   // Check for GOTPCRELX relocations first. Because these relocations allow the
92   // linker to modify the instruction, we have to check the offset range
93   // corresponding to the instruction, not the offset of the operand.
94   // Note that if there is GOTPCRELX relocation against the instruction, there
95   // will be no other relocation in this range, since GOTPCRELX applies only to
96   // certain instruction types.
97   const uint64_t InstOffset = InstAddress - Function.getAddress();
98   const Relocation *Relocation =
99       Function.getRelocationInRange(InstOffset, InstOffset + InstSize);
100   if (Relocation && Relocation::isX86GOTPCRELX(Relocation->Type)) {
101     // If the operand is PC-relative, convert it without using the relocation
102     // information. For GOTPCRELX, it is safe to use the absolute address
103     // instead of extracting the addend from the relocation, as non-standard
104     // forms will be rejected by linker conversion process and the operand
105     // will always reference GOT which we don't rewrite.
106     if (processPCRelOperandNoRel())
107       return true;
108 
109     // The linker converted the PC-relative address to an absolute one.
110     // Symbolize this address.
111     if (CreateNewSymbols)
112       BC.handleAddressRef(Value, Function, /*IsPCRel=*/false);
113 
114     const BinaryData *Target = BC.getBinaryDataAtAddress(Value);
115     if (!Target) {
116       assert(!CreateNewSymbols &&
117              "BinaryData should exist at converted GOTPCRELX destination");
118       return false;
119     }
120 
121     addOperand(Target->getSymbol(), /*Addend=*/0);
122 
123     return true;
124   }
125 
126   // Check for relocations against the operand.
127   if (!Relocation || Relocation->Offset != InstOffset + ImmOffset)
128     Relocation = Function.getRelocationAt(InstOffset + ImmOffset);
129 
130   if (!Relocation)
131     return processPCRelOperandNoRel();
132 
133   // GOTPC64 is special because the X86 Assembler doesn't know how to emit
134   // a PC-relative 8-byte fixup, which is what we need to cover this. The
135   // only way to do this is to use the symbol name _GLOBAL_OFFSET_TABLE_.
136   if (Relocation::isX86GOTPC64(Relocation->Type)) {
137     auto PairOrErr = handleGOTPC64(*Relocation, InstAddress);
138     if (auto E = PairOrErr.takeError()) {
139       Function.setSimple(false);
140       BC.logBOLTErrorsAndQuitOnFatal(std::move(E));
141       return false;
142     }
143     auto [Sym, Addend] = *PairOrErr;
144     addOperand(Sym, Addend);
145     return true;
146   }
147 
148   uint64_t SymbolValue = Relocation->Value - Relocation->Addend;
149   if (Relocation->isPCRelative())
150     SymbolValue += InstAddress + ImmOffset;
151 
152   // Process reference to the symbol.
153   if (CreateNewSymbols)
154     BC.handleAddressRef(SymbolValue, Function, Relocation->isPCRelative());
155 
156   uint64_t Addend = Relocation->Addend;
157   // Real addend for pc-relative targets is adjusted with a delta from
158   // the relocation placement to the next instruction.
159   if (Relocation->isPCRelative())
160     Addend += InstOffset + InstSize - Relocation->Offset;
161 
162   addOperand(Relocation->Symbol, Addend);
163 
164   return true;
165 }
166 
167 Expected<std::pair<MCSymbol *, uint64_t>>
handleGOTPC64(const Relocation & R,uint64_t InstrAddr)168 X86MCSymbolizer::handleGOTPC64(const Relocation &R, uint64_t InstrAddr) {
169   BinaryContext &BC = Function.getBinaryContext();
170   const BinaryData *GOTSymBD = BC.getGOTSymbol();
171   if (!GOTSymBD || !GOTSymBD->getAddress()) {
172     // This error is pretty serious but we can't kill the disassembler
173     // because of it, so don't make it fatal. Log it and warn the user.
174     return createNonFatalBOLTError(
175         "R_X86_GOTPC64 relocation is present but we did not detect "
176         "a valid  _GLOBAL_OFFSET_TABLE_ in symbol table\n");
177   }
178   // R_X86_GOTPC64 are not relative to the Reloc nor end of instruction,
179   // but the start of the MOVABSQ instruction. So the Target Address is
180   // whatever is encoded in the original operand when we disassembled
181   // the binary (here, R.Value) plus MOVABSQ address (InstrAddr).
182   // Here we extract the intended Addend by subtracting the real
183   // GOT addr.
184   const int64_t Addend = R.Value + InstrAddr - GOTSymBD->getAddress();
185   return std::make_pair(BC.Ctx->getOrCreateSymbol("_GLOBAL_OFFSET_TABLE_"),
186                         Addend);
187 }
188 
tryAddingPcLoadReferenceComment(raw_ostream & CStream,int64_t Value,uint64_t Address)189 void X86MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
190                                                       int64_t Value,
191                                                       uint64_t Address) {}
192 
193 } // namespace bolt
194 } // namespace llvm
195