1 //===- Symbols.cpp --------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Symbols.h" 10 #include "InputFiles.h" 11 #include "InputSection.h" 12 #include "OutputSections.h" 13 #include "SyntheticSections.h" 14 #include "Target.h" 15 #include "Writer.h" 16 #include "lld/Common/ErrorHandler.h" 17 #include "lld/Common/Strings.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/Support/Path.h" 20 #include <cstring> 21 22 using namespace llvm; 23 using namespace llvm::object; 24 using namespace llvm::ELF; 25 26 namespace lld { 27 // Returns a symbol for an error message. 28 static std::string demangle(StringRef symName) { 29 if (elf::config->demangle) 30 return demangleItanium(symName); 31 return symName; 32 } 33 34 std::string toString(const elf::Symbol &b) { return demangle(b.getName()); } 35 std::string toELFString(const Archive::Symbol &b) { 36 return demangle(b.getName()); 37 } 38 39 namespace elf { 40 Defined *ElfSym::bss; 41 Defined *ElfSym::data; 42 Defined *ElfSym::etext1; 43 Defined *ElfSym::etext2; 44 Defined *ElfSym::edata1; 45 Defined *ElfSym::edata2; 46 Defined *ElfSym::end1; 47 Defined *ElfSym::end2; 48 Defined *ElfSym::globalOffsetTable; 49 Defined *ElfSym::mipsGp; 50 Defined *ElfSym::mipsGpDisp; 51 Defined *ElfSym::mipsLocalGp; 52 Defined *ElfSym::relaIpltStart; 53 Defined *ElfSym::relaIpltEnd; 54 Defined *ElfSym::riscvGlobalPointer; 55 Defined *ElfSym::tlsModuleBase; 56 57 static uint64_t getSymVA(const Symbol &sym, int64_t &addend) { 58 switch (sym.kind()) { 59 case Symbol::DefinedKind: { 60 auto &d = cast<Defined>(sym); 61 SectionBase *isec = d.section; 62 63 // This is an absolute symbol. 64 if (!isec) 65 return d.value; 66 67 assert(isec != &InputSection::discarded); 68 isec = isec->repl; 69 70 uint64_t offset = d.value; 71 72 // An object in an SHF_MERGE section might be referenced via a 73 // section symbol (as a hack for reducing the number of local 74 // symbols). 75 // Depending on the addend, the reference via a section symbol 76 // refers to a different object in the merge section. 77 // Since the objects in the merge section are not necessarily 78 // contiguous in the output, the addend can thus affect the final 79 // VA in a non-linear way. 80 // To make this work, we incorporate the addend into the section 81 // offset (and zero out the addend for later processing) so that 82 // we find the right object in the section. 83 if (d.isSection()) { 84 offset += addend; 85 addend = 0; 86 } 87 88 // In the typical case, this is actually very simple and boils 89 // down to adding together 3 numbers: 90 // 1. The address of the output section. 91 // 2. The offset of the input section within the output section. 92 // 3. The offset within the input section (this addition happens 93 // inside InputSection::getOffset). 94 // 95 // If you understand the data structures involved with this next 96 // line (and how they get built), then you have a pretty good 97 // understanding of the linker. 98 uint64_t va = isec->getVA(offset); 99 100 // MIPS relocatable files can mix regular and microMIPS code. 101 // Linker needs to distinguish such code. To do so microMIPS 102 // symbols has the `STO_MIPS_MICROMIPS` flag in the `st_other` 103 // field. Unfortunately, the `MIPS::relocateOne()` method has 104 // a symbol value only. To pass type of the symbol (regular/microMIPS) 105 // to that routine as well as other places where we write 106 // a symbol value as-is (.dynamic section, `Elf_Ehdr::e_entry` 107 // field etc) do the same trick as compiler uses to mark microMIPS 108 // for CPU - set the less-significant bit. 109 if (config->emachine == EM_MIPS && isMicroMips() && 110 ((sym.stOther & STO_MIPS_MICROMIPS) || sym.needsPltAddr)) 111 va |= 1; 112 113 if (d.isTls() && !config->relocatable) { 114 // Use the address of the TLS segment's first section rather than the 115 // segment's address, because segment addresses aren't initialized until 116 // after sections are finalized. (e.g. Measuring the size of .rela.dyn 117 // for Android relocation packing requires knowing TLS symbol addresses 118 // during section finalization.) 119 if (!Out::tlsPhdr || !Out::tlsPhdr->firstSec) 120 fatal(toString(d.file) + 121 " has an STT_TLS symbol but doesn't have an SHF_TLS section"); 122 return va - Out::tlsPhdr->firstSec->addr; 123 } 124 return va; 125 } 126 case Symbol::SharedKind: 127 case Symbol::UndefinedKind: 128 return 0; 129 case Symbol::LazyArchiveKind: 130 case Symbol::LazyObjectKind: 131 assert(sym.isUsedInRegularObj && "lazy symbol reached writer"); 132 return 0; 133 case Symbol::CommonKind: 134 llvm_unreachable("common symbol reached writer"); 135 case Symbol::PlaceholderKind: 136 llvm_unreachable("placeholder symbol reached writer"); 137 } 138 llvm_unreachable("invalid symbol kind"); 139 } 140 141 uint64_t Symbol::getVA(int64_t addend) const { 142 uint64_t outVA = getSymVA(*this, addend); 143 return outVA + addend; 144 } 145 146 uint64_t Symbol::getGotVA() const { 147 if (gotInIgot) 148 return in.igotPlt->getVA() + getGotPltOffset(); 149 return in.got->getVA() + getGotOffset(); 150 } 151 152 uint64_t Symbol::getGotOffset() const { return gotIndex * config->wordsize; } 153 154 uint64_t Symbol::getGotPltVA() const { 155 if (isInIplt) 156 return in.igotPlt->getVA() + getGotPltOffset(); 157 return in.gotPlt->getVA() + getGotPltOffset(); 158 } 159 160 uint64_t Symbol::getGotPltOffset() const { 161 if (isInIplt) 162 return pltIndex * config->wordsize; 163 return (pltIndex + target->gotPltHeaderEntriesNum) * config->wordsize; 164 } 165 166 uint64_t Symbol::getPltVA() const { 167 uint64_t outVA = isInIplt 168 ? in.iplt->getVA() + pltIndex * target->ipltEntrySize 169 : in.plt->getVA() + in.plt->headerSize + 170 pltIndex * target->pltEntrySize; 171 172 // While linking microMIPS code PLT code are always microMIPS 173 // code. Set the less-significant bit to track that fact. 174 // See detailed comment in the `getSymVA` function. 175 if (config->emachine == EM_MIPS && isMicroMips()) 176 outVA |= 1; 177 return outVA; 178 } 179 180 uint64_t Symbol::getSize() const { 181 if (const auto *dr = dyn_cast<Defined>(this)) 182 return dr->size; 183 return cast<SharedSymbol>(this)->size; 184 } 185 186 OutputSection *Symbol::getOutputSection() const { 187 if (auto *s = dyn_cast<Defined>(this)) { 188 if (auto *sec = s->section) 189 return sec->repl->getOutputSection(); 190 return nullptr; 191 } 192 return nullptr; 193 } 194 195 // If a symbol name contains '@', the characters after that is 196 // a symbol version name. This function parses that. 197 void Symbol::parseSymbolVersion() { 198 StringRef s = getName(); 199 size_t pos = s.find('@'); 200 if (pos == 0 || pos == StringRef::npos) 201 return; 202 StringRef verstr = s.substr(pos + 1); 203 if (verstr.empty()) 204 return; 205 206 // Truncate the symbol name so that it doesn't include the version string. 207 nameSize = pos; 208 209 // If this is not in this DSO, it is not a definition. 210 if (!isDefined()) 211 return; 212 213 // '@@' in a symbol name means the default version. 214 // It is usually the most recent one. 215 bool isDefault = (verstr[0] == '@'); 216 if (isDefault) 217 verstr = verstr.substr(1); 218 219 for (const VersionDefinition &ver : namedVersionDefs()) { 220 if (ver.name != verstr) 221 continue; 222 223 if (isDefault) 224 versionId = ver.id; 225 else 226 versionId = ver.id | VERSYM_HIDDEN; 227 return; 228 } 229 230 // It is an error if the specified version is not defined. 231 // Usually version script is not provided when linking executable, 232 // but we may still want to override a versioned symbol from DSO, 233 // so we do not report error in this case. We also do not error 234 // if the symbol has a local version as it won't be in the dynamic 235 // symbol table. 236 if (config->shared && versionId != VER_NDX_LOCAL) 237 error(toString(file) + ": symbol " + s + " has undefined version " + 238 verstr); 239 } 240 241 void Symbol::fetch() const { 242 if (auto *sym = dyn_cast<LazyArchive>(this)) { 243 cast<ArchiveFile>(sym->file)->fetch(sym->sym); 244 return; 245 } 246 247 if (auto *sym = dyn_cast<LazyObject>(this)) { 248 dyn_cast<LazyObjFile>(sym->file)->fetch(); 249 return; 250 } 251 252 llvm_unreachable("Symbol::fetch() is called on a non-lazy symbol"); 253 } 254 255 MemoryBufferRef LazyArchive::getMemberBuffer() { 256 Archive::Child c = 257 CHECK(sym.getMember(), 258 "could not get the member for symbol " + toELFString(sym)); 259 260 return CHECK(c.getMemoryBufferRef(), 261 "could not get the buffer for the member defining symbol " + 262 toELFString(sym)); 263 } 264 265 uint8_t Symbol::computeBinding() const { 266 if (config->relocatable) 267 return binding; 268 if ((visibility != STV_DEFAULT && visibility != STV_PROTECTED) || 269 versionId == VER_NDX_LOCAL) 270 return STB_LOCAL; 271 if (!config->gnuUnique && binding == STB_GNU_UNIQUE) 272 return STB_GLOBAL; 273 return binding; 274 } 275 276 bool Symbol::includeInDynsym() const { 277 if (!config->hasDynSymTab) 278 return false; 279 if (computeBinding() == STB_LOCAL) 280 return false; 281 if (!isDefined() && !isCommon()) 282 // This should unconditionally return true, unfortunately glibc -static-pie 283 // expects undefined weak symbols not to exist in .dynsym, e.g. 284 // __pthread_mutex_lock reference in _dl_add_to_namespace_list, 285 // __pthread_initialize_minimal reference in csu/libc-start.c. 286 return !(config->noDynamicLinker && isUndefWeak()); 287 288 return exportDynamic || inDynamicList; 289 } 290 291 // Print out a log message for --trace-symbol. 292 void printTraceSymbol(const Symbol *sym) { 293 std::string s; 294 if (sym->isUndefined()) 295 s = ": reference to "; 296 else if (sym->isLazy()) 297 s = ": lazy definition of "; 298 else if (sym->isShared()) 299 s = ": shared definition of "; 300 else if (sym->isCommon()) 301 s = ": common definition of "; 302 else 303 s = ": definition of "; 304 305 message(toString(sym->file) + s + sym->getName()); 306 } 307 308 void maybeWarnUnorderableSymbol(const Symbol *sym) { 309 if (!config->warnSymbolOrdering) 310 return; 311 312 // If UnresolvedPolicy::Ignore is used, no "undefined symbol" error/warning 313 // is emitted. It makes sense to not warn on undefined symbols. 314 // 315 // Note, ld.bfd --symbol-ordering-file= does not warn on undefined symbols, 316 // but we don't have to be compatible here. 317 if (sym->isUndefined() && 318 config->unresolvedSymbols == UnresolvedPolicy::Ignore) 319 return; 320 321 const InputFile *file = sym->file; 322 auto *d = dyn_cast<Defined>(sym); 323 324 auto report = [&](StringRef s) { warn(toString(file) + s + sym->getName()); }; 325 326 if (sym->isUndefined()) 327 report(": unable to order undefined symbol: "); 328 else if (sym->isShared()) 329 report(": unable to order shared symbol: "); 330 else if (d && !d->section) 331 report(": unable to order absolute symbol: "); 332 else if (d && isa<OutputSection>(d->section)) 333 report(": unable to order synthetic symbol: "); 334 else if (d && !d->section->repl->isLive()) 335 report(": unable to order discarded symbol: "); 336 } 337 338 // Returns true if a symbol can be replaced at load-time by a symbol 339 // with the same name defined in other ELF executable or DSO. 340 bool computeIsPreemptible(const Symbol &sym) { 341 assert(!sym.isLocal()); 342 343 // Only symbols with default visibility that appear in dynsym can be 344 // preempted. Symbols with protected visibility cannot be preempted. 345 if (!sym.includeInDynsym() || sym.visibility != STV_DEFAULT) 346 return false; 347 348 // At this point copy relocations have not been created yet, so any 349 // symbol that is not defined locally is preemptible. 350 if (!sym.isDefined()) 351 return true; 352 353 if (!config->shared) 354 return false; 355 356 // If the dynamic list is present, it specifies preemptable symbols in a DSO. 357 if (config->hasDynamicList) 358 return sym.inDynamicList; 359 360 // -Bsymbolic means that definitions are not preempted. 361 if (config->bsymbolic || (config->bsymbolicFunctions && sym.isFunc())) 362 return false; 363 return true; 364 } 365 366 static uint8_t getMinVisibility(uint8_t va, uint8_t vb) { 367 if (va == STV_DEFAULT) 368 return vb; 369 if (vb == STV_DEFAULT) 370 return va; 371 return std::min(va, vb); 372 } 373 374 // Merge symbol properties. 375 // 376 // When we have many symbols of the same name, we choose one of them, 377 // and that's the result of symbol resolution. However, symbols that 378 // were not chosen still affect some symbol properties. 379 void Symbol::mergeProperties(const Symbol &other) { 380 if (other.exportDynamic) 381 exportDynamic = true; 382 if (other.isUsedInRegularObj) 383 isUsedInRegularObj = true; 384 385 // DSO symbols do not affect visibility in the output. 386 if (!other.isShared()) 387 visibility = getMinVisibility(visibility, other.visibility); 388 } 389 390 void Symbol::resolve(const Symbol &other) { 391 mergeProperties(other); 392 393 if (isPlaceholder()) { 394 replace(other); 395 return; 396 } 397 398 switch (other.kind()) { 399 case Symbol::UndefinedKind: 400 resolveUndefined(cast<Undefined>(other)); 401 break; 402 case Symbol::CommonKind: 403 resolveCommon(cast<CommonSymbol>(other)); 404 break; 405 case Symbol::DefinedKind: 406 resolveDefined(cast<Defined>(other)); 407 break; 408 case Symbol::LazyArchiveKind: 409 resolveLazy(cast<LazyArchive>(other)); 410 break; 411 case Symbol::LazyObjectKind: 412 resolveLazy(cast<LazyObject>(other)); 413 break; 414 case Symbol::SharedKind: 415 resolveShared(cast<SharedSymbol>(other)); 416 break; 417 case Symbol::PlaceholderKind: 418 llvm_unreachable("bad symbol kind"); 419 } 420 } 421 422 void Symbol::resolveUndefined(const Undefined &other) { 423 // An undefined symbol with non default visibility must be satisfied 424 // in the same DSO. 425 // 426 // If this is a non-weak defined symbol in a discarded section, override the 427 // existing undefined symbol for better error message later. 428 if ((isShared() && other.visibility != STV_DEFAULT) || 429 (isUndefined() && other.binding != STB_WEAK && other.discardedSecIdx)) { 430 replace(other); 431 return; 432 } 433 434 if (traced) 435 printTraceSymbol(&other); 436 437 if (isLazy()) { 438 // An undefined weak will not fetch archive members. See comment on Lazy in 439 // Symbols.h for the details. 440 if (other.binding == STB_WEAK) { 441 binding = STB_WEAK; 442 type = other.type; 443 return; 444 } 445 446 // Do extra check for --warn-backrefs. 447 // 448 // --warn-backrefs is an option to prevent an undefined reference from 449 // fetching an archive member written earlier in the command line. It can be 450 // used to keep compatibility with GNU linkers to some degree. 451 // I'll explain the feature and why you may find it useful in this comment. 452 // 453 // lld's symbol resolution semantics is more relaxed than traditional Unix 454 // linkers. For example, 455 // 456 // ld.lld foo.a bar.o 457 // 458 // succeeds even if bar.o contains an undefined symbol that has to be 459 // resolved by some object file in foo.a. Traditional Unix linkers don't 460 // allow this kind of backward reference, as they visit each file only once 461 // from left to right in the command line while resolving all undefined 462 // symbols at the moment of visiting. 463 // 464 // In the above case, since there's no undefined symbol when a linker visits 465 // foo.a, no files are pulled out from foo.a, and because the linker forgets 466 // about foo.a after visiting, it can't resolve undefined symbols in bar.o 467 // that could have been resolved otherwise. 468 // 469 // That lld accepts more relaxed form means that (besides it'd make more 470 // sense) you can accidentally write a command line or a build file that 471 // works only with lld, even if you have a plan to distribute it to wider 472 // users who may be using GNU linkers. With --warn-backrefs, you can detect 473 // a library order that doesn't work with other Unix linkers. 474 // 475 // The option is also useful to detect cyclic dependencies between static 476 // archives. Again, lld accepts 477 // 478 // ld.lld foo.a bar.a 479 // 480 // even if foo.a and bar.a depend on each other. With --warn-backrefs, it is 481 // handled as an error. 482 // 483 // Here is how the option works. We assign a group ID to each file. A file 484 // with a smaller group ID can pull out object files from an archive file 485 // with an equal or greater group ID. Otherwise, it is a reverse dependency 486 // and an error. 487 // 488 // A file outside --{start,end}-group gets a fresh ID when instantiated. All 489 // files within the same --{start,end}-group get the same group ID. E.g. 490 // 491 // ld.lld A B --start-group C D --end-group E 492 // 493 // A forms group 0. B form group 1. C and D (including their member object 494 // files) form group 2. E forms group 3. I think that you can see how this 495 // group assignment rule simulates the traditional linker's semantics. 496 bool backref = config->warnBackrefs && other.file && 497 file->groupId < other.file->groupId; 498 fetch(); 499 500 // We don't report backward references to weak symbols as they can be 501 // overridden later. 502 if (backref && !isWeak()) 503 warn("backward reference detected: " + other.getName() + " in " + 504 toString(other.file) + " refers to " + toString(file)); 505 return; 506 } 507 508 // Undefined symbols in a SharedFile do not change the binding. 509 if (dyn_cast_or_null<SharedFile>(other.file)) 510 return; 511 512 if (isUndefined() || isShared()) { 513 // The binding will be weak if there is at least one reference and all are 514 // weak. The binding has one opportunity to change to weak: if the first 515 // reference is weak. 516 if (other.binding != STB_WEAK || !referenced) 517 binding = other.binding; 518 referenced = true; 519 } 520 } 521 522 // Using .symver foo,foo@@VER unfortunately creates two symbols: foo and 523 // foo@@VER. We want to effectively ignore foo, so give precedence to 524 // foo@@VER. 525 // FIXME: If users can transition to using 526 // .symver foo,foo@@@VER 527 // we can delete this hack. 528 static int compareVersion(StringRef a, StringRef b) { 529 bool x = a.contains("@@"); 530 bool y = b.contains("@@"); 531 if (!x && y) 532 return 1; 533 if (x && !y) 534 return -1; 535 return 0; 536 } 537 538 // Compare two symbols. Return 1 if the new symbol should win, -1 if 539 // the new symbol should lose, or 0 if there is a conflict. 540 int Symbol::compare(const Symbol *other) const { 541 assert(other->isDefined() || other->isCommon()); 542 543 if (!isDefined() && !isCommon()) 544 return 1; 545 546 if (int cmp = compareVersion(getName(), other->getName())) 547 return cmp; 548 549 if (other->isWeak()) 550 return -1; 551 552 if (isWeak()) 553 return 1; 554 555 if (isCommon() && other->isCommon()) { 556 if (config->warnCommon) 557 warn("multiple common of " + getName()); 558 return 0; 559 } 560 561 if (isCommon()) { 562 if (config->warnCommon) 563 warn("common " + getName() + " is overridden"); 564 return 1; 565 } 566 567 if (other->isCommon()) { 568 if (config->warnCommon) 569 warn("common " + getName() + " is overridden"); 570 return -1; 571 } 572 573 auto *oldSym = cast<Defined>(this); 574 auto *newSym = cast<Defined>(other); 575 576 if (dyn_cast_or_null<BitcodeFile>(other->file)) 577 return 0; 578 579 if (!oldSym->section && !newSym->section && oldSym->value == newSym->value && 580 newSym->binding == STB_GLOBAL) 581 return -1; 582 583 return 0; 584 } 585 586 static void reportDuplicate(Symbol *sym, InputFile *newFile, 587 InputSectionBase *errSec, uint64_t errOffset) { 588 if (config->allowMultipleDefinition) 589 return; 590 591 Defined *d = cast<Defined>(sym); 592 if (!d->section || !errSec) { 593 error("duplicate symbol: " + toString(*sym) + "\n>>> defined in " + 594 toString(sym->file) + "\n>>> defined in " + toString(newFile)); 595 return; 596 } 597 598 // Construct and print an error message in the form of: 599 // 600 // ld.lld: error: duplicate symbol: foo 601 // >>> defined at bar.c:30 602 // >>> bar.o (/home/alice/src/bar.o) 603 // >>> defined at baz.c:563 604 // >>> baz.o in archive libbaz.a 605 auto *sec1 = cast<InputSectionBase>(d->section); 606 std::string src1 = sec1->getSrcMsg(*sym, d->value); 607 std::string obj1 = sec1->getObjMsg(d->value); 608 std::string src2 = errSec->getSrcMsg(*sym, errOffset); 609 std::string obj2 = errSec->getObjMsg(errOffset); 610 611 std::string msg = "duplicate symbol: " + toString(*sym) + "\n>>> defined at "; 612 if (!src1.empty()) 613 msg += src1 + "\n>>> "; 614 msg += obj1 + "\n>>> defined at "; 615 if (!src2.empty()) 616 msg += src2 + "\n>>> "; 617 msg += obj2; 618 error(msg); 619 } 620 621 void Symbol::resolveCommon(const CommonSymbol &other) { 622 int cmp = compare(&other); 623 if (cmp < 0) 624 return; 625 626 if (cmp > 0) { 627 if (auto *s = dyn_cast<SharedSymbol>(this)) { 628 // Increase st_size if the shared symbol has a larger st_size. The shared 629 // symbol may be created from common symbols. The fact that some object 630 // files were linked into a shared object first should not change the 631 // regular rule that picks the largest st_size. 632 uint64_t size = s->size; 633 replace(other); 634 if (size > cast<CommonSymbol>(this)->size) 635 cast<CommonSymbol>(this)->size = size; 636 } else { 637 replace(other); 638 } 639 return; 640 } 641 642 CommonSymbol *oldSym = cast<CommonSymbol>(this); 643 644 oldSym->alignment = std::max(oldSym->alignment, other.alignment); 645 if (oldSym->size < other.size) { 646 oldSym->file = other.file; 647 oldSym->size = other.size; 648 } 649 } 650 651 void Symbol::resolveDefined(const Defined &other) { 652 int cmp = compare(&other); 653 if (cmp > 0) 654 replace(other); 655 else if (cmp == 0) 656 reportDuplicate(this, other.file, 657 dyn_cast_or_null<InputSectionBase>(other.section), 658 other.value); 659 } 660 661 template <class LazyT> void Symbol::resolveLazy(const LazyT &other) { 662 if (!isUndefined()) 663 return; 664 665 // An undefined weak will not fetch archive members. See comment on Lazy in 666 // Symbols.h for the details. 667 if (isWeak()) { 668 uint8_t ty = type; 669 replace(other); 670 type = ty; 671 binding = STB_WEAK; 672 return; 673 } 674 675 other.fetch(); 676 } 677 678 void Symbol::resolveShared(const SharedSymbol &other) { 679 if (isCommon()) { 680 // See the comment in resolveCommon() above. 681 if (other.size > cast<CommonSymbol>(this)->size) 682 cast<CommonSymbol>(this)->size = other.size; 683 return; 684 } 685 if (visibility == STV_DEFAULT && (isUndefined() || isLazy())) { 686 // An undefined symbol with non default visibility must be satisfied 687 // in the same DSO. 688 uint8_t bind = binding; 689 replace(other); 690 binding = bind; 691 referenced = true; 692 } 693 } 694 695 } // namespace elf 696 } // namespace lld 697