xref: /llvm-project/bolt/lib/Target/X86/X86MCSymbolizer.cpp (revision 52cf07116bf0a8cab87b0f55176d198bcaa02575)
1e290133cSMaksim Panchenko //===- bolt/Target/X86/X86MCSymbolizer.cpp --------------------------------===//
2e290133cSMaksim Panchenko //
3e290133cSMaksim Panchenko // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e290133cSMaksim Panchenko // See https://llvm.org/LICENSE.txt for license information.
5e290133cSMaksim Panchenko // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e290133cSMaksim Panchenko //
7e290133cSMaksim Panchenko //===----------------------------------------------------------------------===//
8e290133cSMaksim Panchenko 
9e290133cSMaksim Panchenko #include "X86MCSymbolizer.h"
10e290133cSMaksim Panchenko #include "MCTargetDesc/X86BaseInfo.h"
11e290133cSMaksim Panchenko #include "bolt/Core/BinaryContext.h"
12e290133cSMaksim Panchenko #include "bolt/Core/BinaryFunction.h"
13e290133cSMaksim Panchenko #include "bolt/Core/MCPlusBuilder.h"
14e290133cSMaksim Panchenko #include "bolt/Core/Relocation.h"
15e290133cSMaksim Panchenko #include "llvm/MC/MCInst.h"
16e290133cSMaksim Panchenko #include "llvm/MC/MCRegisterInfo.h"
17e290133cSMaksim Panchenko 
18e290133cSMaksim Panchenko #define DEBUG_TYPE "bolt-symbolizer"
19e290133cSMaksim Panchenko 
20e290133cSMaksim Panchenko namespace llvm {
21e290133cSMaksim Panchenko namespace bolt {
22e290133cSMaksim Panchenko 
~X86MCSymbolizer()23e290133cSMaksim Panchenko X86MCSymbolizer::~X86MCSymbolizer() {}
24e290133cSMaksim Panchenko 
tryAddingSymbolicOperand(MCInst & Inst,raw_ostream & CStream,int64_t Value,uint64_t InstAddress,bool IsBranch,uint64_t ImmOffset,uint64_t ImmSize,uint64_t InstSize)25e290133cSMaksim Panchenko bool X86MCSymbolizer::tryAddingSymbolicOperand(
26e290133cSMaksim Panchenko     MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress,
27e290133cSMaksim Panchenko     bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) {
28e290133cSMaksim Panchenko   if (IsBranch)
29e290133cSMaksim Panchenko     return false;
30e290133cSMaksim Panchenko 
31e290133cSMaksim Panchenko   // Ignore implicit operands.
32e290133cSMaksim Panchenko   if (ImmSize == 0)
33e290133cSMaksim Panchenko     return false;
34e290133cSMaksim Panchenko 
35e290133cSMaksim Panchenko   BinaryContext &BC = Function.getBinaryContext();
36e290133cSMaksim Panchenko   MCContext *Ctx = BC.Ctx.get();
37e290133cSMaksim Panchenko 
38e290133cSMaksim Panchenko   if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst))
39e290133cSMaksim Panchenko     return false;
40e290133cSMaksim Panchenko 
41e290133cSMaksim Panchenko   /// Add symbolic operand to the instruction with an optional addend.
42e290133cSMaksim Panchenko   auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend) {
43e290133cSMaksim Panchenko     const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, *Ctx);
44e290133cSMaksim Panchenko     if (Addend)
45e290133cSMaksim Panchenko       Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Addend, *Ctx),
46e290133cSMaksim Panchenko                                      *Ctx);
47e290133cSMaksim Panchenko     Inst.addOperand(MCOperand::createExpr(Expr));
48e290133cSMaksim Panchenko   };
49e290133cSMaksim Panchenko 
50e290133cSMaksim Panchenko   // Check if the operand being added is a displacement part of a compound
51e290133cSMaksim Panchenko   // memory operand that uses PC-relative addressing. If it is, try to symbolize
5218176426SMaksim Panchenko   // it without relocations. Return true on success, false otherwise.
5318176426SMaksim Panchenko   auto processPCRelOperandNoRel = [&]() {
54e290133cSMaksim Panchenko     const int MemOp = BC.MIB->getMemoryOperandNo(Inst);
55e290133cSMaksim Panchenko     if (MemOp == -1)
56e290133cSMaksim Panchenko       return false;
57e290133cSMaksim Panchenko 
58e290133cSMaksim Panchenko     const unsigned DispOp = MemOp + X86::AddrDisp;
59e290133cSMaksim Panchenko     if (Inst.getNumOperands() != DispOp)
60e290133cSMaksim Panchenko       return false;
61e290133cSMaksim Panchenko 
62e290133cSMaksim Panchenko     const MCOperand &Base = Inst.getOperand(MemOp + X86::AddrBaseReg);
63e290133cSMaksim Panchenko     if (Base.getReg() != BC.MRI->getProgramCounter())
64e290133cSMaksim Panchenko       return false;
65e290133cSMaksim Panchenko 
66e290133cSMaksim Panchenko     const MCOperand &Scale = Inst.getOperand(MemOp + X86::AddrScaleAmt);
67e290133cSMaksim Panchenko     const MCOperand &Index = Inst.getOperand(MemOp + X86::AddrIndexReg);
68e290133cSMaksim Panchenko     if (Scale.getImm() != 0 && Index.getReg() != MCRegister::NoRegister)
69e290133cSMaksim Panchenko       return false;
70e290133cSMaksim Panchenko 
71e290133cSMaksim Panchenko     const MCSymbol *TargetSymbol;
72e290133cSMaksim Panchenko     uint64_t TargetOffset;
7343f56a2fSMaksim Panchenko 
7443f56a2fSMaksim Panchenko     if (!CreateNewSymbols) {
7543f56a2fSMaksim Panchenko       if (BinaryData *BD = BC.getBinaryDataContainingAddress(Value)) {
7643f56a2fSMaksim Panchenko         TargetSymbol = BD->getSymbol();
7743f56a2fSMaksim Panchenko         TargetOffset = Value - BD->getAddress();
7843f56a2fSMaksim Panchenko       } else {
7943f56a2fSMaksim Panchenko         return false;
8043f56a2fSMaksim Panchenko       }
8143f56a2fSMaksim Panchenko     } else {
82e290133cSMaksim Panchenko       std::tie(TargetSymbol, TargetOffset) =
8318176426SMaksim Panchenko           BC.handleAddressRef(Value, Function, /*IsPCRel=*/true);
8443f56a2fSMaksim Panchenko     }
85e290133cSMaksim Panchenko 
86e290133cSMaksim Panchenko     addOperand(TargetSymbol, TargetOffset);
87e290133cSMaksim Panchenko 
88e290133cSMaksim Panchenko     return true;
8918176426SMaksim Panchenko   };
9018176426SMaksim Panchenko 
9118176426SMaksim Panchenko   // Check for GOTPCRELX relocations first. Because these relocations allow the
9218176426SMaksim Panchenko   // linker to modify the instruction, we have to check the offset range
9318176426SMaksim Panchenko   // corresponding to the instruction, not the offset of the operand.
9418176426SMaksim Panchenko   // Note that if there is GOTPCRELX relocation against the instruction, there
9518176426SMaksim Panchenko   // will be no other relocation in this range, since GOTPCRELX applies only to
9618176426SMaksim Panchenko   // certain instruction types.
9718176426SMaksim Panchenko   const uint64_t InstOffset = InstAddress - Function.getAddress();
9818176426SMaksim Panchenko   const Relocation *Relocation =
9918176426SMaksim Panchenko       Function.getRelocationInRange(InstOffset, InstOffset + InstSize);
10018176426SMaksim Panchenko   if (Relocation && Relocation::isX86GOTPCRELX(Relocation->Type)) {
10118176426SMaksim Panchenko     // If the operand is PC-relative, convert it without using the relocation
10218176426SMaksim Panchenko     // information. For GOTPCRELX, it is safe to use the absolute address
10318176426SMaksim Panchenko     // instead of extracting the addend from the relocation, as non-standard
10418176426SMaksim Panchenko     // forms will be rejected by linker conversion process and the operand
10518176426SMaksim Panchenko     // will always reference GOT which we don't rewrite.
10618176426SMaksim Panchenko     if (processPCRelOperandNoRel())
10718176426SMaksim Panchenko       return true;
10818176426SMaksim Panchenko 
10918176426SMaksim Panchenko     // The linker converted the PC-relative address to an absolute one.
11018176426SMaksim Panchenko     // Symbolize this address.
11143f56a2fSMaksim Panchenko     if (CreateNewSymbols)
11218176426SMaksim Panchenko       BC.handleAddressRef(Value, Function, /*IsPCRel=*/false);
11343f56a2fSMaksim Panchenko 
11418176426SMaksim Panchenko     const BinaryData *Target = BC.getBinaryDataAtAddress(Value);
11543f56a2fSMaksim Panchenko     if (!Target) {
11643f56a2fSMaksim Panchenko       assert(!CreateNewSymbols &&
11718176426SMaksim Panchenko              "BinaryData should exist at converted GOTPCRELX destination");
11843f56a2fSMaksim Panchenko       return false;
11943f56a2fSMaksim Panchenko     }
12018176426SMaksim Panchenko 
12118176426SMaksim Panchenko     addOperand(Target->getSymbol(), /*Addend=*/0);
12218176426SMaksim Panchenko 
12318176426SMaksim Panchenko     return true;
12418176426SMaksim Panchenko   }
12518176426SMaksim Panchenko 
12618176426SMaksim Panchenko   // Check for relocations against the operand.
12718176426SMaksim Panchenko   if (!Relocation || Relocation->Offset != InstOffset + ImmOffset)
12818176426SMaksim Panchenko     Relocation = Function.getRelocationAt(InstOffset + ImmOffset);
12918176426SMaksim Panchenko 
13018176426SMaksim Panchenko   if (!Relocation)
13118176426SMaksim Panchenko     return processPCRelOperandNoRel();
13218176426SMaksim Panchenko 
133853e126cSRafael Auler   // GOTPC64 is special because the X86 Assembler doesn't know how to emit
134853e126cSRafael Auler   // a PC-relative 8-byte fixup, which is what we need to cover this. The
135853e126cSRafael Auler   // only way to do this is to use the symbol name _GLOBAL_OFFSET_TABLE_.
136853e126cSRafael Auler   if (Relocation::isX86GOTPC64(Relocation->Type)) {
13713d60ce2SAmir Ayupov     auto PairOrErr = handleGOTPC64(*Relocation, InstAddress);
13813d60ce2SAmir Ayupov     if (auto E = PairOrErr.takeError()) {
13913d60ce2SAmir Ayupov       Function.setSimple(false);
140*52cf0711SAmir Ayupov       BC.logBOLTErrorsAndQuitOnFatal(std::move(E));
14113d60ce2SAmir Ayupov       return false;
14213d60ce2SAmir Ayupov     }
14313d60ce2SAmir Ayupov     auto [Sym, Addend] = *PairOrErr;
144853e126cSRafael Auler     addOperand(Sym, Addend);
145853e126cSRafael Auler     return true;
146853e126cSRafael Auler   }
147853e126cSRafael Auler 
14818176426SMaksim Panchenko   uint64_t SymbolValue = Relocation->Value - Relocation->Addend;
14918176426SMaksim Panchenko   if (Relocation->isPCRelative())
15018176426SMaksim Panchenko     SymbolValue += InstAddress + ImmOffset;
15118176426SMaksim Panchenko 
15218176426SMaksim Panchenko   // Process reference to the symbol.
15343f56a2fSMaksim Panchenko   if (CreateNewSymbols)
15418176426SMaksim Panchenko     BC.handleAddressRef(SymbolValue, Function, Relocation->isPCRelative());
15518176426SMaksim Panchenko 
15618176426SMaksim Panchenko   uint64_t Addend = Relocation->Addend;
15718176426SMaksim Panchenko   // Real addend for pc-relative targets is adjusted with a delta from
15818176426SMaksim Panchenko   // the relocation placement to the next instruction.
15918176426SMaksim Panchenko   if (Relocation->isPCRelative())
16018176426SMaksim Panchenko     Addend += InstOffset + InstSize - Relocation->Offset;
16118176426SMaksim Panchenko 
16218176426SMaksim Panchenko   addOperand(Relocation->Symbol, Addend);
16318176426SMaksim Panchenko 
16418176426SMaksim Panchenko   return true;
165e290133cSMaksim Panchenko }
166e290133cSMaksim Panchenko 
16713d60ce2SAmir Ayupov Expected<std::pair<MCSymbol *, uint64_t>>
handleGOTPC64(const Relocation & R,uint64_t InstrAddr)168853e126cSRafael Auler X86MCSymbolizer::handleGOTPC64(const Relocation &R, uint64_t InstrAddr) {
169853e126cSRafael Auler   BinaryContext &BC = Function.getBinaryContext();
170853e126cSRafael Auler   const BinaryData *GOTSymBD = BC.getGOTSymbol();
171853e126cSRafael Auler   if (!GOTSymBD || !GOTSymBD->getAddress()) {
17213d60ce2SAmir Ayupov     // This error is pretty serious but we can't kill the disassembler
17313d60ce2SAmir Ayupov     // because of it, so don't make it fatal. Log it and warn the user.
17413d60ce2SAmir Ayupov     return createNonFatalBOLTError(
17513d60ce2SAmir Ayupov         "R_X86_GOTPC64 relocation is present but we did not detect "
17613d60ce2SAmir Ayupov         "a valid  _GLOBAL_OFFSET_TABLE_ in symbol table\n");
177853e126cSRafael Auler   }
178853e126cSRafael Auler   // R_X86_GOTPC64 are not relative to the Reloc nor end of instruction,
179853e126cSRafael Auler   // but the start of the MOVABSQ instruction. So the Target Address is
180853e126cSRafael Auler   // whatever is encoded in the original operand when we disassembled
181853e126cSRafael Auler   // the binary (here, R.Value) plus MOVABSQ address (InstrAddr).
182853e126cSRafael Auler   // Here we extract the intended Addend by subtracting the real
183853e126cSRafael Auler   // GOT addr.
184853e126cSRafael Auler   const int64_t Addend = R.Value + InstrAddr - GOTSymBD->getAddress();
185853e126cSRafael Auler   return std::make_pair(BC.Ctx->getOrCreateSymbol("_GLOBAL_OFFSET_TABLE_"),
186853e126cSRafael Auler                         Addend);
187853e126cSRafael Auler }
188853e126cSRafael Auler 
tryAddingPcLoadReferenceComment(raw_ostream & CStream,int64_t Value,uint64_t Address)189e290133cSMaksim Panchenko void X86MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
190e290133cSMaksim Panchenko                                                       int64_t Value,
191e290133cSMaksim Panchenko                                                       uint64_t Address) {}
192e290133cSMaksim Panchenko 
193e290133cSMaksim Panchenko } // namespace bolt
194e290133cSMaksim Panchenko } // namespace llvm
195