1 //===- Relocations.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLD_ELF_RELOCATIONS_H 10 #define LLD_ELF_RELOCATIONS_H 11 12 #include "lld/Common/LLVM.h" 13 #include "llvm/ADT/DenseMap.h" 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/Object/ELFTypes.h" 16 #include <vector> 17 18 namespace lld::elf { 19 struct Ctx; 20 class Defined; 21 class Symbol; 22 class InputSection; 23 class InputSectionBase; 24 class OutputSection; 25 class RelocationBaseSection; 26 class SectionBase; 27 28 // Represents a relocation type, such as R_X86_64_PC32 or R_ARM_THM_CALL. 29 struct RelType { 30 uint32_t v = 0; 31 /*implicit*/ constexpr RelType(uint32_t v = 0) : v(v) {} 32 /*implicit*/ operator uint32_t() const { return v; } 33 }; 34 35 using JumpModType = uint32_t; 36 37 // List of target-independent relocation types. Relocations read 38 // from files are converted to these types so that the main code 39 // doesn't have to know about architecture-specific details. 40 enum RelExpr { 41 R_ABS, 42 R_ADDEND, 43 R_DTPREL, 44 R_GOT, 45 R_GOT_OFF, 46 R_GOT_PC, 47 R_GOTONLY_PC, 48 R_GOTPLTONLY_PC, 49 R_GOTPLT, 50 R_GOTPLTREL, 51 R_GOTREL, 52 R_GOTPLT_GOTREL, 53 R_GOTPLT_PC, 54 R_NONE, 55 R_PC, 56 R_PLT, 57 R_PLT_PC, 58 R_PLT_GOTPLT, 59 R_PLT_GOTREL, 60 R_RELAX_HINT, 61 R_RELAX_GOT_PC, 62 R_RELAX_GOT_PC_NOPIC, 63 R_RELAX_TLS_GD_TO_IE, 64 R_RELAX_TLS_GD_TO_IE_ABS, 65 R_RELAX_TLS_GD_TO_IE_GOT_OFF, 66 R_RELAX_TLS_GD_TO_IE_GOTPLT, 67 R_RELAX_TLS_GD_TO_LE, 68 R_RELAX_TLS_GD_TO_LE_NEG, 69 R_RELAX_TLS_IE_TO_LE, 70 R_RELAX_TLS_LD_TO_LE, 71 R_RELAX_TLS_LD_TO_LE_ABS, 72 R_SIZE, 73 R_TPREL, 74 R_TPREL_NEG, 75 R_TLSDESC, 76 R_TLSDESC_CALL, 77 R_TLSDESC_PC, 78 R_TLSDESC_GOTPLT, 79 R_TLSGD_GOT, 80 R_TLSGD_GOTPLT, 81 R_TLSGD_PC, 82 R_TLSIE_HINT, 83 R_TLSLD_GOT, 84 R_TLSLD_GOTPLT, 85 R_TLSLD_GOT_OFF, 86 R_TLSLD_HINT, 87 R_TLSLD_PC, 88 89 // The following is abstract relocation types used for only one target. 90 // 91 // Even though RelExpr is intended to be a target-neutral representation 92 // of a relocation type, there are some relocations whose semantics are 93 // unique to a target. Such relocation are marked with RE_<TARGET_NAME>. 94 RE_AARCH64_GOT_PAGE_PC, 95 RE_AARCH64_AUTH_GOT_PAGE_PC, 96 RE_AARCH64_GOT_PAGE, 97 RE_AARCH64_AUTH_GOT, 98 RE_AARCH64_AUTH_GOT_PC, 99 RE_AARCH64_PAGE_PC, 100 RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC, 101 RE_AARCH64_TLSDESC_PAGE, 102 RE_AARCH64_AUTH_TLSDESC_PAGE, 103 RE_AARCH64_AUTH_TLSDESC, 104 RE_AARCH64_AUTH, 105 RE_ARM_PCA, 106 RE_ARM_SBREL, 107 RE_MIPS_GOTREL, 108 RE_MIPS_GOT_GP, 109 RE_MIPS_GOT_GP_PC, 110 RE_MIPS_GOT_LOCAL_PAGE, 111 RE_MIPS_GOT_OFF, 112 RE_MIPS_GOT_OFF32, 113 RE_MIPS_TLSGD, 114 RE_MIPS_TLSLD, 115 RE_PPC32_PLTREL, 116 RE_PPC64_CALL, 117 RE_PPC64_CALL_PLT, 118 RE_PPC64_RELAX_TOC, 119 RE_PPC64_TOCBASE, 120 RE_PPC64_RELAX_GOT_PC, 121 RE_RISCV_ADD, 122 RE_RISCV_LEB128, 123 RE_RISCV_PC_INDIRECT, 124 // Same as R_PC but with page-aligned semantics. 125 RE_LOONGARCH_PAGE_PC, 126 // Same as R_PLT_PC but with page-aligned semantics. 127 RE_LOONGARCH_PLT_PAGE_PC, 128 // In addition to having page-aligned semantics, LoongArch GOT relocs are 129 // also reused for TLS, making the semantics differ from other architectures. 130 RE_LOONGARCH_GOT, 131 RE_LOONGARCH_GOT_PAGE_PC, 132 RE_LOONGARCH_TLSGD_PAGE_PC, 133 RE_LOONGARCH_TLSDESC_PAGE_PC, 134 }; 135 136 // Architecture-neutral representation of relocation. 137 struct Relocation { 138 RelExpr expr; 139 RelType type; 140 uint64_t offset; 141 int64_t addend; 142 Symbol *sym; 143 }; 144 145 // Manipulate jump instructions with these modifiers. These are used to relax 146 // jump instruction opcodes at basic block boundaries and are particularly 147 // useful when basic block sections are enabled. 148 struct JumpInstrMod { 149 uint64_t offset; 150 JumpModType original; 151 unsigned size; 152 }; 153 154 // This function writes undefined symbol diagnostics to an internal buffer. 155 // Call reportUndefinedSymbols() after calling scanRelocations() to emit 156 // the diagnostics. 157 template <class ELFT> void scanRelocations(Ctx &ctx); 158 template <class ELFT> void checkNoCrossRefs(Ctx &ctx); 159 void reportUndefinedSymbols(Ctx &); 160 void postScanRelocations(Ctx &ctx); 161 void addGotEntry(Ctx &ctx, Symbol &sym); 162 163 void hexagonTLSSymbolUpdate(Ctx &ctx); 164 bool hexagonNeedsTLSSymbol(ArrayRef<OutputSection *> outputSections); 165 166 class ThunkSection; 167 class Thunk; 168 class InputSectionDescription; 169 170 class ThunkCreator { 171 public: 172 // Thunk may be incomplete. Avoid inline ctor/dtor. 173 ThunkCreator(Ctx &ctx); 174 ~ThunkCreator(); 175 // Return true if Thunks have been added to OutputSections 176 bool createThunks(uint32_t pass, ArrayRef<OutputSection *> outputSections); 177 178 private: 179 void mergeThunks(ArrayRef<OutputSection *> outputSections); 180 181 ThunkSection *getISDThunkSec(OutputSection *os, InputSection *isec, 182 InputSectionDescription *isd, 183 const Relocation &rel, uint64_t src); 184 185 ThunkSection *getISThunkSec(InputSection *isec); 186 187 void createInitialThunkSections(ArrayRef<OutputSection *> outputSections); 188 189 std::pair<Thunk *, bool> getThunk(InputSection *isec, Relocation &rel, 190 uint64_t src); 191 192 std::pair<Thunk *, bool> getSyntheticLandingPad(Defined &d, int64_t a); 193 194 ThunkSection *addThunkSection(OutputSection *os, InputSectionDescription *, 195 uint64_t off); 196 197 bool normalizeExistingThunk(Relocation &rel, uint64_t src); 198 199 bool addSyntheticLandingPads(); 200 201 Ctx &ctx; 202 203 // Record all the available Thunks for a (Symbol, addend) pair, where Symbol 204 // is represented as a (section, offset) pair. There may be multiple 205 // relocations sharing the same (section, offset + addend) pair. We may revert 206 // a relocation back to its original non-Thunk target, and restore the 207 // original addend, so we cannot fold offset + addend. A nested pair is used 208 // because DenseMapInfo is not specialized for std::tuple. 209 llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>, 210 SmallVector<std::unique_ptr<Thunk>, 0>> 211 thunkedSymbolsBySectionAndAddend; 212 llvm::DenseMap<std::pair<Symbol *, int64_t>, 213 SmallVector<std::unique_ptr<Thunk>, 0>> 214 thunkedSymbols; 215 216 // Find a Thunk from the Thunks symbol definition, we can use this to find 217 // the Thunk from a relocation to the Thunks symbol definition. 218 llvm::DenseMap<Symbol *, Thunk *> thunks; 219 220 // Track InputSections that have an inline ThunkSection placed in front 221 // an inline ThunkSection may have control fall through to the section below 222 // so we need to make sure that there is only one of them. 223 // The Mips LA25 Thunk is an example of an inline ThunkSection, as is 224 // the AArch64BTLandingPadThunk. 225 llvm::DenseMap<InputSection *, ThunkSection *> thunkedSections; 226 227 // Record landing pads, generated for a section + offset destination. 228 // Landling pads are alternative entry points for destinations that need 229 // to be reached via thunks that use indirect branches. A destination 230 // needs at most one landing pad as that can be reused by all callers. 231 llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>, 232 std::unique_ptr<Thunk>> 233 landingPadsBySectionAndAddend; 234 235 // All the nonLandingPad thunks that have been created, in order of creation. 236 std::vector<Thunk *> allThunks; 237 238 // The number of completed passes of createThunks this permits us 239 // to do one time initialization on Pass 0 and put a limit on the 240 // number of times it can be called to prevent infinite loops. 241 uint32_t pass = 0; 242 }; 243 244 // Decode LEB128 without error checking. Only used by performance critical code 245 // like RelocsCrel. 246 inline uint64_t readLEB128(const uint8_t *&p, uint64_t leb) { 247 uint64_t acc = 0, shift = 0, byte; 248 do { 249 byte = *p++; 250 acc |= (byte - 128 * (byte >= leb)) << shift; 251 shift += 7; 252 } while (byte >= 128); 253 return acc; 254 } 255 inline uint64_t readULEB128(const uint8_t *&p) { return readLEB128(p, 128); } 256 inline int64_t readSLEB128(const uint8_t *&p) { return readLEB128(p, 64); } 257 258 // This class implements a CREL iterator that does not allocate extra memory. 259 template <bool is64> struct RelocsCrel { 260 using uint = std::conditional_t<is64, uint64_t, uint32_t>; 261 struct const_iterator { 262 using iterator_category = std::forward_iterator_tag; 263 using value_type = llvm::object::Elf_Crel_Impl<is64>; 264 using difference_type = ptrdiff_t; 265 using pointer = value_type *; 266 using reference = const value_type &; 267 uint32_t count; 268 uint8_t flagBits, shift; 269 const uint8_t *p; 270 llvm::object::Elf_Crel_Impl<is64> crel{}; 271 const_iterator(size_t hdr, const uint8_t *p) 272 : count(hdr / 8), flagBits(hdr & 4 ? 3 : 2), shift(hdr % 4), p(p) { 273 if (count) 274 step(); 275 } 276 void step() { 277 // See object::decodeCrel. 278 const uint8_t b = *p++; 279 crel.r_offset += b >> flagBits << shift; 280 if (b >= 0x80) 281 crel.r_offset += 282 ((readULEB128(p) << (7 - flagBits)) - (0x80 >> flagBits)) << shift; 283 if (b & 1) 284 crel.r_symidx += readSLEB128(p); 285 if (b & 2) 286 crel.r_type += readSLEB128(p); 287 if (b & 4 && flagBits == 3) 288 crel.r_addend += static_cast<uint>(readSLEB128(p)); 289 } 290 llvm::object::Elf_Crel_Impl<is64> operator*() const { return crel; }; 291 const llvm::object::Elf_Crel_Impl<is64> *operator->() const { 292 return &crel; 293 } 294 // For llvm::enumerate. 295 bool operator==(const const_iterator &r) const { return count == r.count; } 296 bool operator!=(const const_iterator &r) const { return count != r.count; } 297 const_iterator &operator++() { 298 if (--count) 299 step(); 300 return *this; 301 } 302 // For RelocationScanner::scanOne. 303 void operator+=(size_t n) { 304 for (; n; --n) 305 operator++(); 306 } 307 }; 308 309 size_t hdr = 0; 310 const uint8_t *p = nullptr; 311 312 constexpr RelocsCrel() = default; 313 RelocsCrel(const uint8_t *p) : hdr(readULEB128(p)) { this->p = p; } 314 size_t size() const { return hdr / 8; } 315 const_iterator begin() const { return {hdr, p}; } 316 const_iterator end() const { return {0, nullptr}; } 317 }; 318 319 template <class RelTy> struct Relocs : ArrayRef<RelTy> { 320 Relocs() = default; 321 Relocs(ArrayRef<RelTy> a) : ArrayRef<RelTy>(a) {} 322 }; 323 324 template <bool is64> 325 struct Relocs<llvm::object::Elf_Crel_Impl<is64>> : RelocsCrel<is64> { 326 using RelocsCrel<is64>::RelocsCrel; 327 }; 328 329 // Return a int64_t to make sure we get the sign extension out of the way as 330 // early as possible. 331 template <class ELFT> 332 static inline int64_t getAddend(const typename ELFT::Rel &rel) { 333 return 0; 334 } 335 template <class ELFT> 336 static inline int64_t getAddend(const typename ELFT::Rela &rel) { 337 return rel.r_addend; 338 } 339 template <class ELFT> 340 static inline int64_t getAddend(const typename ELFT::Crel &rel) { 341 return rel.r_addend; 342 } 343 344 template <typename RelTy> 345 inline Relocs<RelTy> sortRels(Relocs<RelTy> rels, 346 SmallVector<RelTy, 0> &storage) { 347 auto cmp = [](const RelTy &a, const RelTy &b) { 348 return a.r_offset < b.r_offset; 349 }; 350 if (!llvm::is_sorted(rels, cmp)) { 351 storage.assign(rels.begin(), rels.end()); 352 llvm::stable_sort(storage, cmp); 353 rels = Relocs<RelTy>(storage); 354 } 355 return rels; 356 } 357 358 template <bool is64> 359 inline Relocs<llvm::object::Elf_Crel_Impl<is64>> 360 sortRels(Relocs<llvm::object::Elf_Crel_Impl<is64>> rels, 361 SmallVector<llvm::object::Elf_Crel_Impl<is64>, 0> &storage) { 362 return {}; 363 } 364 365 RelocationBaseSection &getIRelativeSection(Ctx &ctx); 366 367 // Returns true if Expr refers a GOT entry. Note that this function returns 368 // false for TLS variables even though they need GOT, because TLS variables uses 369 // GOT differently than the regular variables. 370 bool needsGot(RelExpr expr); 371 } // namespace lld::elf 372 373 #endif 374