15ffd83dbSDimitry Andric //===- SymbolTable.cpp ----------------------------------------------------===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric 95ffd83dbSDimitry Andric #include "SymbolTable.h" 10fe6060f1SDimitry Andric #include "ConcatOutputSection.h" 11e8d8bef9SDimitry Andric #include "Config.h" 125ffd83dbSDimitry Andric #include "InputFiles.h" 13349cc55cSDimitry Andric #include "InputSection.h" 145ffd83dbSDimitry Andric #include "Symbols.h" 15fe6060f1SDimitry Andric #include "SyntheticSections.h" 165ffd83dbSDimitry Andric #include "lld/Common/ErrorHandler.h" 175ffd83dbSDimitry Andric #include "lld/Common/Memory.h" 18bdd1243dSDimitry Andric #include "llvm/Demangle/Demangle.h" 195ffd83dbSDimitry Andric 205ffd83dbSDimitry Andric using namespace llvm; 215ffd83dbSDimitry Andric using namespace lld; 225ffd83dbSDimitry Andric using namespace lld::macho; 235ffd83dbSDimitry Andric 24fe6060f1SDimitry Andric Symbol *SymbolTable::find(CachedHashStringRef cachedName) { 25fe6060f1SDimitry Andric auto it = symMap.find(cachedName); 265ffd83dbSDimitry Andric if (it == symMap.end()) 275ffd83dbSDimitry Andric return nullptr; 285ffd83dbSDimitry Andric return symVector[it->second]; 295ffd83dbSDimitry Andric } 305ffd83dbSDimitry Andric 31fe6060f1SDimitry Andric std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, 32fe6060f1SDimitry Andric const InputFile *file) { 335ffd83dbSDimitry Andric auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()}); 345ffd83dbSDimitry Andric 35fe6060f1SDimitry Andric Symbol *sym; 36fe6060f1SDimitry Andric if (!p.second) { 375ffd83dbSDimitry Andric // Name already present in the symbol table. 38fe6060f1SDimitry Andric sym = symVector[p.first->second]; 39fe6060f1SDimitry Andric } else { 405ffd83dbSDimitry Andric // Name is a new symbol. 41fe6060f1SDimitry Andric sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); 425ffd83dbSDimitry Andric symVector.push_back(sym); 435ffd83dbSDimitry Andric } 445ffd83dbSDimitry Andric 45fe6060f1SDimitry Andric sym->isUsedInRegularObj |= !file || isa<ObjFile>(file); 46fe6060f1SDimitry Andric return {sym, p.second}; 47fe6060f1SDimitry Andric } 48fe6060f1SDimitry Andric 49bdd1243dSDimitry Andric namespace { 50bdd1243dSDimitry Andric struct DuplicateSymbolDiag { 51bdd1243dSDimitry Andric // Pair containing source location and source file 52bdd1243dSDimitry Andric const std::pair<std::string, std::string> src1; 53bdd1243dSDimitry Andric const std::pair<std::string, std::string> src2; 54bdd1243dSDimitry Andric const Symbol *sym; 55bdd1243dSDimitry Andric 56bdd1243dSDimitry Andric DuplicateSymbolDiag(const std::pair<std::string, std::string> src1, 57bdd1243dSDimitry Andric const std::pair<std::string, std::string> src2, 58bdd1243dSDimitry Andric const Symbol *sym) 59bdd1243dSDimitry Andric : src1(src1), src2(src2), sym(sym) {} 60bdd1243dSDimitry Andric }; 61bdd1243dSDimitry Andric SmallVector<DuplicateSymbolDiag> dupSymDiags; 62bdd1243dSDimitry Andric } // namespace 63bdd1243dSDimitry Andric 6406c3fb27SDimitry Andric // Move symbols at \p fromOff in \p fromIsec into \p toIsec, unless that symbol 6506c3fb27SDimitry Andric // is \p skip. 6606c3fb27SDimitry Andric static void transplantSymbolsAtOffset(InputSection *fromIsec, 6706c3fb27SDimitry Andric InputSection *toIsec, Defined *skip, 6806c3fb27SDimitry Andric uint64_t fromOff, uint64_t toOff) { 6906c3fb27SDimitry Andric // Ensure the symbols will still be in address order after our insertions. 7006c3fb27SDimitry Andric auto insertIt = llvm::upper_bound(toIsec->symbols, toOff, 7106c3fb27SDimitry Andric [](uint64_t off, const Symbol *s) { 7206c3fb27SDimitry Andric return cast<Defined>(s)->value < off; 7306c3fb27SDimitry Andric }); 7406c3fb27SDimitry Andric llvm::erase_if(fromIsec->symbols, [&](Symbol *s) { 7506c3fb27SDimitry Andric auto *d = cast<Defined>(s); 7606c3fb27SDimitry Andric if (d->value != fromOff) 7706c3fb27SDimitry Andric return false; 7806c3fb27SDimitry Andric if (d != skip) { 7906c3fb27SDimitry Andric // This repeated insertion will be quadratic unless insertIt is the end 8006c3fb27SDimitry Andric // iterator. However, that is typically the case for files that have 8106c3fb27SDimitry Andric // .subsections_via_symbols set. 8206c3fb27SDimitry Andric insertIt = toIsec->symbols.insert(insertIt, d); 83*0fca6ea1SDimitry Andric d->originalIsec = toIsec; 8406c3fb27SDimitry Andric d->value = toOff; 8506c3fb27SDimitry Andric // We don't want to have more than one unwindEntry at a given address, so 8606c3fb27SDimitry Andric // drop the redundant ones. We We can safely drop the unwindEntries of 8706c3fb27SDimitry Andric // the symbols in fromIsec since we will be adding another unwindEntry as 8806c3fb27SDimitry Andric // we finish parsing toIsec's file. (We can assume that toIsec has its 8906c3fb27SDimitry Andric // own unwindEntry because of the ODR.) 90*0fca6ea1SDimitry Andric d->originalUnwindEntry = nullptr; 9106c3fb27SDimitry Andric } 9206c3fb27SDimitry Andric return true; 9306c3fb27SDimitry Andric }); 9406c3fb27SDimitry Andric } 9506c3fb27SDimitry Andric 96fe6060f1SDimitry Andric Defined *SymbolTable::addDefined(StringRef name, InputFile *file, 97fe6060f1SDimitry Andric InputSection *isec, uint64_t value, 98fe6060f1SDimitry Andric uint64_t size, bool isWeakDef, 9906c3fb27SDimitry Andric bool isPrivateExtern, 100349cc55cSDimitry Andric bool isReferencedDynamically, bool noDeadStrip, 101349cc55cSDimitry Andric bool isWeakDefCanBeHidden) { 102e8d8bef9SDimitry Andric bool overridesWeakDef = false; 103bdd1243dSDimitry Andric auto [s, wasInserted] = insert(name, file); 104fe6060f1SDimitry Andric 105bdd1243dSDimitry Andric assert(!file || !isa<BitcodeFile>(file) || !isec); 1065ffd83dbSDimitry Andric 107e8d8bef9SDimitry Andric if (!wasInserted) { 108e8d8bef9SDimitry Andric if (auto *defined = dyn_cast<Defined>(s)) { 109e8d8bef9SDimitry Andric if (isWeakDef) { 110349cc55cSDimitry Andric // See further comment in createDefined() in InputFiles.cpp 111fe6060f1SDimitry Andric if (defined->isWeakDef()) { 112e8d8bef9SDimitry Andric defined->privateExtern &= isPrivateExtern; 113349cc55cSDimitry Andric defined->weakDefCanBeHidden &= isWeakDefCanBeHidden; 114fe6060f1SDimitry Andric defined->referencedDynamically |= isReferencedDynamically; 115fe6060f1SDimitry Andric defined->noDeadStrip |= noDeadStrip; 116349cc55cSDimitry Andric } 11706c3fb27SDimitry Andric if (auto concatIsec = dyn_cast_or_null<ConcatInputSection>(isec)) { 118fe6060f1SDimitry Andric concatIsec->wasCoalesced = true; 11906c3fb27SDimitry Andric // Any local symbols that alias the coalesced symbol should be moved 12006c3fb27SDimitry Andric // into the prevailing section. Note that we have sorted the symbols 12106c3fb27SDimitry Andric // in ObjFile::parseSymbols() such that extern weak symbols appear 12206c3fb27SDimitry Andric // last, so we don't need to worry about subsequent symbols being 12306c3fb27SDimitry Andric // added to an already-coalesced section. 124*0fca6ea1SDimitry Andric if (defined->isec()) 125*0fca6ea1SDimitry Andric transplantSymbolsAtOffset(concatIsec, defined->isec(), 12606c3fb27SDimitry Andric /*skip=*/nullptr, value, defined->value); 12706c3fb27SDimitry Andric } 128fe6060f1SDimitry Andric return defined; 129e8d8bef9SDimitry Andric } 130349cc55cSDimitry Andric 131349cc55cSDimitry Andric if (defined->isWeakDef()) { 132349cc55cSDimitry Andric if (auto concatIsec = 133*0fca6ea1SDimitry Andric dyn_cast_or_null<ConcatInputSection>(defined->isec())) { 134349cc55cSDimitry Andric concatIsec->wasCoalesced = true; 13506c3fb27SDimitry Andric if (isec) 13606c3fb27SDimitry Andric transplantSymbolsAtOffset(concatIsec, isec, defined, defined->value, 13706c3fb27SDimitry Andric value); 138349cc55cSDimitry Andric } 139349cc55cSDimitry Andric } else { 140bdd1243dSDimitry Andric std::string srcLoc1 = defined->getSourceLocation(); 141bdd1243dSDimitry Andric std::string srcLoc2 = isec ? isec->getSourceLocation(value) : ""; 142bdd1243dSDimitry Andric std::string srcFile1 = toString(defined->getFile()); 143bdd1243dSDimitry Andric std::string srcFile2 = toString(file); 14481ad6265SDimitry Andric 145bdd1243dSDimitry Andric dupSymDiags.push_back({make_pair(srcLoc1, srcFile1), 146bdd1243dSDimitry Andric make_pair(srcLoc2, srcFile2), defined}); 147349cc55cSDimitry Andric } 148349cc55cSDimitry Andric 149e8d8bef9SDimitry Andric } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 150e8d8bef9SDimitry Andric overridesWeakDef = !isWeakDef && dysym->isWeakDef(); 151fe6060f1SDimitry Andric dysym->unreference(); 152bdd1243dSDimitry Andric } else if (auto *undef = dyn_cast<Undefined>(s)) { 1535f757f3fSDimitry Andric if (undef->wasBitcodeSymbol) { 1545f757f3fSDimitry Andric auto objFile = dyn_cast<ObjFile>(file); 1555f757f3fSDimitry Andric if (!objFile) { 1565f757f3fSDimitry Andric // The file must be a native object file, as opposed to potentially 1575f757f3fSDimitry Andric // being another bitcode file. A situation arises when some symbols 1585f757f3fSDimitry Andric // are defined thru `module asm` and thus they are not present in the 1595f757f3fSDimitry Andric // bitcode's symbol table. Consider bitcode modules `A`, `B`, and `C`. 1605f757f3fSDimitry Andric // LTO compiles only `A` and `C`, since there's no explicit symbol 1615f757f3fSDimitry Andric // reference to `B` other than a symbol from `A` via `module asm`. 1625f757f3fSDimitry Andric // After LTO is finished, the missing symbol now appears in the 1635f757f3fSDimitry Andric // resulting object file for `A`, which prematurely resolves another 1645f757f3fSDimitry Andric // prevailing symbol with `B` that hasn't been compiled, instead of 1655f757f3fSDimitry Andric // the resulting object for `C`. Consequently, an incorrect 1665f757f3fSDimitry Andric // relocation is generated for the prevailing symbol. 1675f757f3fSDimitry Andric assert(isa<BitcodeFile>(file) && "Bitcode file is expected."); 1685f757f3fSDimitry Andric std::string message = 1695f757f3fSDimitry Andric "The pending prevailing symbol(" + name.str() + 1705f757f3fSDimitry Andric ") in the bitcode file(" + toString(undef->getFile()) + 1715f757f3fSDimitry Andric ") is overridden by a non-native object (from bitcode): " + 1725f757f3fSDimitry Andric toString(file); 1735f757f3fSDimitry Andric error(message); 1745f757f3fSDimitry Andric } else if (!objFile->builtFromBitcode) { 1755f757f3fSDimitry Andric // Ideally, this should be an object file compiled from a bitcode 1765f757f3fSDimitry Andric // file. However, this might not hold true if a LC linker option is 1775f757f3fSDimitry Andric // used. In case LTO internalizes a prevailing hidden weak symbol, 1785f757f3fSDimitry Andric // there's a situation where an unresolved prevailing symbol might be 1795f757f3fSDimitry Andric // linked with the corresponding one from a native library, which is 1805f757f3fSDimitry Andric // loaded later after LTO. Although this could potentially result in 1815f757f3fSDimitry Andric // an ODR violation, we choose to permit this scenario as a warning. 1825f757f3fSDimitry Andric std::string message = "The pending prevailing symbol(" + name.str() + 1835f757f3fSDimitry Andric ") in the bitcode file(" + 1845f757f3fSDimitry Andric toString(undef->getFile()) + 1855f757f3fSDimitry Andric ") is overridden by a post-processed native " 1865f757f3fSDimitry Andric "object (from native archive): " + 1875f757f3fSDimitry Andric toString(file); 1885f757f3fSDimitry Andric warn(message); 1895f757f3fSDimitry Andric } else { 1905f757f3fSDimitry Andric // Preserve the original bitcode file name (instead of using the 1915f757f3fSDimitry Andric // object file name). 192bdd1243dSDimitry Andric file = undef->getFile(); 193e8d8bef9SDimitry Andric } 1945f757f3fSDimitry Andric } 1955f757f3fSDimitry Andric } 196e8d8bef9SDimitry Andric // Defined symbols take priority over other types of symbols, so in case 197e8d8bef9SDimitry Andric // of a name conflict, we fall through to the replaceSymbol() call below. 198e8d8bef9SDimitry Andric } 1995ffd83dbSDimitry Andric 20081ad6265SDimitry Andric // With -flat_namespace, all extern symbols in dylibs are interposable. 20181ad6265SDimitry Andric // FIXME: Add support for `-interposable` (PR53680). 20281ad6265SDimitry Andric bool interposable = config->namespaceKind == NamespaceKind::flat && 20381ad6265SDimitry Andric config->outputType != MachO::MH_EXECUTE && 20481ad6265SDimitry Andric !isPrivateExtern; 205fe6060f1SDimitry Andric Defined *defined = replaceSymbol<Defined>( 206fe6060f1SDimitry Andric s, name, file, isec, value, size, isWeakDef, /*isExternal=*/true, 20706c3fb27SDimitry Andric isPrivateExtern, /*includeInSymtab=*/true, isReferencedDynamically, 20806c3fb27SDimitry Andric noDeadStrip, overridesWeakDef, isWeakDefCanBeHidden, interposable); 209fe6060f1SDimitry Andric return defined; 2105ffd83dbSDimitry Andric } 2115ffd83dbSDimitry Andric 212bdd1243dSDimitry Andric Defined *SymbolTable::aliasDefined(Defined *src, StringRef target, 213bdd1243dSDimitry Andric InputFile *newFile, bool makePrivateExtern) { 214bdd1243dSDimitry Andric bool isPrivateExtern = makePrivateExtern || src->privateExtern; 215*0fca6ea1SDimitry Andric return addDefined(target, newFile, src->isec(), src->value, src->size, 21606c3fb27SDimitry Andric src->isWeakDef(), isPrivateExtern, 217fcaf7f86SDimitry Andric src->referencedDynamically, src->noDeadStrip, 218fcaf7f86SDimitry Andric src->weakDefCanBeHidden); 219fcaf7f86SDimitry Andric } 220fcaf7f86SDimitry Andric 221fe6060f1SDimitry Andric Symbol *SymbolTable::addUndefined(StringRef name, InputFile *file, 222fe6060f1SDimitry Andric bool isWeakRef) { 223bdd1243dSDimitry Andric auto [s, wasInserted] = insert(name, file); 2245ffd83dbSDimitry Andric 225fe6060f1SDimitry Andric RefState refState = isWeakRef ? RefState::Weak : RefState::Strong; 226e8d8bef9SDimitry Andric 2275ffd83dbSDimitry Andric if (wasInserted) 228bdd1243dSDimitry Andric replaceSymbol<Undefined>(s, name, file, refState, 229bdd1243dSDimitry Andric /*wasBitcodeSymbol=*/false); 23004eeddc0SDimitry Andric else if (auto *lazy = dyn_cast<LazyArchive>(s)) 2315ffd83dbSDimitry Andric lazy->fetchArchiveMember(); 23204eeddc0SDimitry Andric else if (isa<LazyObject>(s)) 23304eeddc0SDimitry Andric extract(*s->getFile(), s->getName()); 234e8d8bef9SDimitry Andric else if (auto *dynsym = dyn_cast<DylibSymbol>(s)) 235fe6060f1SDimitry Andric dynsym->reference(refState); 236e8d8bef9SDimitry Andric else if (auto *undefined = dyn_cast<Undefined>(s)) 237e8d8bef9SDimitry Andric undefined->refState = std::max(undefined->refState, refState); 2385ffd83dbSDimitry Andric return s; 2395ffd83dbSDimitry Andric } 2405ffd83dbSDimitry Andric 241e8d8bef9SDimitry Andric Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size, 242e8d8bef9SDimitry Andric uint32_t align, bool isPrivateExtern) { 243bdd1243dSDimitry Andric auto [s, wasInserted] = insert(name, file); 2445ffd83dbSDimitry Andric 245e8d8bef9SDimitry Andric if (!wasInserted) { 246e8d8bef9SDimitry Andric if (auto *common = dyn_cast<CommonSymbol>(s)) { 247e8d8bef9SDimitry Andric if (size < common->size) 248e8d8bef9SDimitry Andric return s; 249e8d8bef9SDimitry Andric } else if (isa<Defined>(s)) { 250e8d8bef9SDimitry Andric return s; 251e8d8bef9SDimitry Andric } 252e8d8bef9SDimitry Andric // Common symbols take priority over all non-Defined symbols, so in case of 253e8d8bef9SDimitry Andric // a name conflict, we fall through to the replaceSymbol() call below. 254e8d8bef9SDimitry Andric } 255e8d8bef9SDimitry Andric 256e8d8bef9SDimitry Andric replaceSymbol<CommonSymbol>(s, name, file, size, align, isPrivateExtern); 257e8d8bef9SDimitry Andric return s; 258e8d8bef9SDimitry Andric } 259e8d8bef9SDimitry Andric 260e8d8bef9SDimitry Andric Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef, 261e8d8bef9SDimitry Andric bool isTlv) { 262bdd1243dSDimitry Andric auto [s, wasInserted] = insert(name, file); 263e8d8bef9SDimitry Andric 264fe6060f1SDimitry Andric RefState refState = RefState::Unreferenced; 265e8d8bef9SDimitry Andric if (!wasInserted) { 266e8d8bef9SDimitry Andric if (auto *defined = dyn_cast<Defined>(s)) { 267e8d8bef9SDimitry Andric if (isWeakDef && !defined->isWeakDef()) 268e8d8bef9SDimitry Andric defined->overridesWeakDef = true; 269e8d8bef9SDimitry Andric } else if (auto *undefined = dyn_cast<Undefined>(s)) { 270e8d8bef9SDimitry Andric refState = undefined->refState; 271e8d8bef9SDimitry Andric } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 272fe6060f1SDimitry Andric refState = dysym->getRefState(); 273e8d8bef9SDimitry Andric } 274e8d8bef9SDimitry Andric } 275e8d8bef9SDimitry Andric 276fe6060f1SDimitry Andric bool isDynamicLookup = file == nullptr; 277e8d8bef9SDimitry Andric if (wasInserted || isa<Undefined>(s) || 278fe6060f1SDimitry Andric (isa<DylibSymbol>(s) && 279fe6060f1SDimitry Andric ((!isWeakDef && s->isWeakDef()) || 280fe6060f1SDimitry Andric (!isDynamicLookup && cast<DylibSymbol>(s)->isDynamicLookup())))) { 281fe6060f1SDimitry Andric if (auto *dynsym = dyn_cast<DylibSymbol>(s)) 282fe6060f1SDimitry Andric dynsym->unreference(); 283e8d8bef9SDimitry Andric replaceSymbol<DylibSymbol>(s, file, name, isWeakDef, refState, isTlv); 284fe6060f1SDimitry Andric } 285e8d8bef9SDimitry Andric 2865ffd83dbSDimitry Andric return s; 2875ffd83dbSDimitry Andric } 2885ffd83dbSDimitry Andric 289fe6060f1SDimitry Andric Symbol *SymbolTable::addDynamicLookup(StringRef name) { 290fe6060f1SDimitry Andric return addDylib(name, /*file=*/nullptr, /*isWeakDef=*/false, /*isTlv=*/false); 291fe6060f1SDimitry Andric } 292fe6060f1SDimitry Andric 29304eeddc0SDimitry Andric Symbol *SymbolTable::addLazyArchive(StringRef name, ArchiveFile *file, 294e8d8bef9SDimitry Andric const object::Archive::Symbol &sym) { 295bdd1243dSDimitry Andric auto [s, wasInserted] = insert(name, file); 2965ffd83dbSDimitry Andric 2970eae32dcSDimitry Andric if (wasInserted) { 29804eeddc0SDimitry Andric replaceSymbol<LazyArchive>(s, file, sym); 2990eae32dcSDimitry Andric } else if (isa<Undefined>(s)) { 3005ffd83dbSDimitry Andric file->fetch(sym); 3010eae32dcSDimitry Andric } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 3020eae32dcSDimitry Andric if (dysym->isWeakDef()) { 3030eae32dcSDimitry Andric if (dysym->getRefState() != RefState::Unreferenced) 3040eae32dcSDimitry Andric file->fetch(sym); 3050eae32dcSDimitry Andric else 30604eeddc0SDimitry Andric replaceSymbol<LazyArchive>(s, file, sym); 30704eeddc0SDimitry Andric } 30804eeddc0SDimitry Andric } 30904eeddc0SDimitry Andric return s; 31004eeddc0SDimitry Andric } 31104eeddc0SDimitry Andric 31204eeddc0SDimitry Andric Symbol *SymbolTable::addLazyObject(StringRef name, InputFile &file) { 313bdd1243dSDimitry Andric auto [s, wasInserted] = insert(name, &file); 31404eeddc0SDimitry Andric 31504eeddc0SDimitry Andric if (wasInserted) { 31604eeddc0SDimitry Andric replaceSymbol<LazyObject>(s, file, name); 31704eeddc0SDimitry Andric } else if (isa<Undefined>(s)) { 31804eeddc0SDimitry Andric extract(file, name); 31904eeddc0SDimitry Andric } else if (auto *dysym = dyn_cast<DylibSymbol>(s)) { 32004eeddc0SDimitry Andric if (dysym->isWeakDef()) { 32104eeddc0SDimitry Andric if (dysym->getRefState() != RefState::Unreferenced) 32204eeddc0SDimitry Andric extract(file, name); 32304eeddc0SDimitry Andric else 32404eeddc0SDimitry Andric replaceSymbol<LazyObject>(s, file, name); 3250eae32dcSDimitry Andric } 3260eae32dcSDimitry Andric } 3275ffd83dbSDimitry Andric return s; 3285ffd83dbSDimitry Andric } 3295ffd83dbSDimitry Andric 330fe6060f1SDimitry Andric Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec, 331fe6060f1SDimitry Andric uint64_t value, bool isPrivateExtern, 332fe6060f1SDimitry Andric bool includeInSymtab, 333fe6060f1SDimitry Andric bool referencedDynamically) { 33481ad6265SDimitry Andric assert(!isec || !isec->getFile()); // See makeSyntheticInputSection(). 33506c3fb27SDimitry Andric Defined *s = addDefined(name, /*file=*/nullptr, isec, value, /*size=*/0, 33606c3fb27SDimitry Andric /*isWeakDef=*/false, isPrivateExtern, 33781ad6265SDimitry Andric referencedDynamically, /*noDeadStrip=*/false, 33881ad6265SDimitry Andric /*isWeakDefCanBeHidden=*/false); 339fe6060f1SDimitry Andric s->includeInSymtab = includeInSymtab; 340e8d8bef9SDimitry Andric return s; 341e8d8bef9SDimitry Andric } 342e8d8bef9SDimitry Andric 343fe6060f1SDimitry Andric enum class Boundary { 344fe6060f1SDimitry Andric Start, 345fe6060f1SDimitry Andric End, 346fe6060f1SDimitry Andric }; 347fe6060f1SDimitry Andric 348fe6060f1SDimitry Andric static Defined *createBoundarySymbol(const Undefined &sym) { 349fe6060f1SDimitry Andric return symtab->addSynthetic( 350fe6060f1SDimitry Andric sym.getName(), /*isec=*/nullptr, /*value=*/-1, /*isPrivateExtern=*/true, 351fe6060f1SDimitry Andric /*includeInSymtab=*/false, /*referencedDynamically=*/false); 352fe6060f1SDimitry Andric } 353fe6060f1SDimitry Andric 354fe6060f1SDimitry Andric static void handleSectionBoundarySymbol(const Undefined &sym, StringRef segSect, 355fe6060f1SDimitry Andric Boundary which) { 356bdd1243dSDimitry Andric auto [segName, sectName] = segSect.split('$'); 357fe6060f1SDimitry Andric 358fe6060f1SDimitry Andric // Attach the symbol to any InputSection that will end up in the right 359fe6060f1SDimitry Andric // OutputSection -- it doesn't matter which one we pick. 360fe6060f1SDimitry Andric // Don't bother looking through inputSections for a matching 361fe6060f1SDimitry Andric // ConcatInputSection -- we need to create ConcatInputSection for 362fe6060f1SDimitry Andric // non-existing sections anyways, and that codepath works even if we should 363fe6060f1SDimitry Andric // already have a ConcatInputSection with the right name. 364fe6060f1SDimitry Andric 365fe6060f1SDimitry Andric OutputSection *osec = nullptr; 366fe6060f1SDimitry Andric // This looks for __TEXT,__cstring etc. 367fe6060f1SDimitry Andric for (SyntheticSection *ssec : syntheticSections) 368fe6060f1SDimitry Andric if (ssec->segname == segName && ssec->name == sectName) { 369fe6060f1SDimitry Andric osec = ssec->isec->parent; 370e8d8bef9SDimitry Andric break; 371fe6060f1SDimitry Andric } 372fe6060f1SDimitry Andric 373fe6060f1SDimitry Andric if (!osec) { 37481ad6265SDimitry Andric ConcatInputSection *isec = makeSyntheticInputSection(segName, sectName); 375fe6060f1SDimitry Andric 376fe6060f1SDimitry Andric // This runs after markLive() and is only called for Undefineds that are 377fe6060f1SDimitry Andric // live. Marking the isec live ensures an OutputSection is created that the 378fe6060f1SDimitry Andric // start/end symbol can refer to. 379fe6060f1SDimitry Andric assert(sym.isLive()); 380*0fca6ea1SDimitry Andric assert(isec->live); 381fe6060f1SDimitry Andric 382fe6060f1SDimitry Andric // This runs after gatherInputSections(), so need to explicitly set parent 383fe6060f1SDimitry Andric // and add to inputSections. 384fe6060f1SDimitry Andric osec = isec->parent = ConcatOutputSection::getOrCreateForInput(isec); 385fe6060f1SDimitry Andric inputSections.push_back(isec); 386fe6060f1SDimitry Andric } 387fe6060f1SDimitry Andric 388fe6060f1SDimitry Andric if (which == Boundary::Start) 389fe6060f1SDimitry Andric osec->sectionStartSymbols.push_back(createBoundarySymbol(sym)); 390fe6060f1SDimitry Andric else 391fe6060f1SDimitry Andric osec->sectionEndSymbols.push_back(createBoundarySymbol(sym)); 392fe6060f1SDimitry Andric } 393fe6060f1SDimitry Andric 394fe6060f1SDimitry Andric static void handleSegmentBoundarySymbol(const Undefined &sym, StringRef segName, 395fe6060f1SDimitry Andric Boundary which) { 396fe6060f1SDimitry Andric OutputSegment *seg = getOrCreateOutputSegment(segName); 397fe6060f1SDimitry Andric if (which == Boundary::Start) 398fe6060f1SDimitry Andric seg->segmentStartSymbols.push_back(createBoundarySymbol(sym)); 399fe6060f1SDimitry Andric else 400fe6060f1SDimitry Andric seg->segmentEndSymbols.push_back(createBoundarySymbol(sym)); 401fe6060f1SDimitry Andric } 402fe6060f1SDimitry Andric 40381ad6265SDimitry Andric // Try to find a definition for an undefined symbol. 40481ad6265SDimitry Andric // Returns true if a definition was found and no diagnostics are needed. 40581ad6265SDimitry Andric static bool recoverFromUndefinedSymbol(const Undefined &sym) { 406fe6060f1SDimitry Andric // Handle start/end symbols. 407fe6060f1SDimitry Andric StringRef name = sym.getName(); 40881ad6265SDimitry Andric if (name.consume_front("section$start$")) { 40981ad6265SDimitry Andric handleSectionBoundarySymbol(sym, name, Boundary::Start); 41081ad6265SDimitry Andric return true; 41181ad6265SDimitry Andric } 41281ad6265SDimitry Andric if (name.consume_front("section$end$")) { 41381ad6265SDimitry Andric handleSectionBoundarySymbol(sym, name, Boundary::End); 41481ad6265SDimitry Andric return true; 41581ad6265SDimitry Andric } 41681ad6265SDimitry Andric if (name.consume_front("segment$start$")) { 41781ad6265SDimitry Andric handleSegmentBoundarySymbol(sym, name, Boundary::Start); 41881ad6265SDimitry Andric return true; 41981ad6265SDimitry Andric } 42081ad6265SDimitry Andric if (name.consume_front("segment$end$")) { 42181ad6265SDimitry Andric handleSegmentBoundarySymbol(sym, name, Boundary::End); 42281ad6265SDimitry Andric return true; 42381ad6265SDimitry Andric } 424fe6060f1SDimitry Andric 425753f127fSDimitry Andric // Leave dtrace symbols, since we will handle them when we do the relocation 42606c3fb27SDimitry Andric if (name.starts_with("___dtrace_")) 427753f127fSDimitry Andric return true; 428753f127fSDimitry Andric 429fe6060f1SDimitry Andric // Handle -U. 430fe6060f1SDimitry Andric if (config->explicitDynamicLookups.count(sym.getName())) { 431fe6060f1SDimitry Andric symtab->addDynamicLookup(sym.getName()); 43281ad6265SDimitry Andric return true; 433fe6060f1SDimitry Andric } 434fe6060f1SDimitry Andric 435fe6060f1SDimitry Andric // Handle -undefined. 43681ad6265SDimitry Andric if (config->undefinedSymbolTreatment == 43781ad6265SDimitry Andric UndefinedSymbolTreatment::dynamic_lookup || 43881ad6265SDimitry Andric config->undefinedSymbolTreatment == UndefinedSymbolTreatment::suppress) { 43981ad6265SDimitry Andric symtab->addDynamicLookup(sym.getName()); 44081ad6265SDimitry Andric return true; 44181ad6265SDimitry Andric } 44281ad6265SDimitry Andric 44381ad6265SDimitry Andric // We do not return true here, as we still need to print diagnostics. 44481ad6265SDimitry Andric if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::warning) 44581ad6265SDimitry Andric symtab->addDynamicLookup(sym.getName()); 44681ad6265SDimitry Andric 44781ad6265SDimitry Andric return false; 44881ad6265SDimitry Andric } 44981ad6265SDimitry Andric 45081ad6265SDimitry Andric namespace { 45181ad6265SDimitry Andric struct UndefinedDiag { 45281ad6265SDimitry Andric struct SectionAndOffset { 45381ad6265SDimitry Andric const InputSection *isec; 45481ad6265SDimitry Andric uint64_t offset; 45581ad6265SDimitry Andric }; 45681ad6265SDimitry Andric 45781ad6265SDimitry Andric std::vector<SectionAndOffset> codeReferences; 45881ad6265SDimitry Andric std::vector<std::string> otherReferences; 45981ad6265SDimitry Andric }; 46081ad6265SDimitry Andric 46181ad6265SDimitry Andric MapVector<const Undefined *, UndefinedDiag> undefs; 46206c3fb27SDimitry Andric } // namespace 46381ad6265SDimitry Andric 464bdd1243dSDimitry Andric void macho::reportPendingDuplicateSymbols() { 465bdd1243dSDimitry Andric for (const auto &duplicate : dupSymDiags) { 466bdd1243dSDimitry Andric if (!config->deadStripDuplicates || duplicate.sym->isLive()) { 467bdd1243dSDimitry Andric std::string message = 468bdd1243dSDimitry Andric "duplicate symbol: " + toString(*duplicate.sym) + "\n>>> defined in "; 469bdd1243dSDimitry Andric if (!duplicate.src1.first.empty()) 470bdd1243dSDimitry Andric message += duplicate.src1.first + "\n>>> "; 471bdd1243dSDimitry Andric message += duplicate.src1.second + "\n>>> defined in "; 472bdd1243dSDimitry Andric if (!duplicate.src2.first.empty()) 473bdd1243dSDimitry Andric message += duplicate.src2.first + "\n>>> "; 474bdd1243dSDimitry Andric error(message + duplicate.src2.second); 475bdd1243dSDimitry Andric } 476bdd1243dSDimitry Andric } 477bdd1243dSDimitry Andric } 47881ad6265SDimitry Andric 479bdd1243dSDimitry Andric // Check whether the definition name def is a mangled function name that matches 480bdd1243dSDimitry Andric // the reference name ref. 481bdd1243dSDimitry Andric static bool canSuggestExternCForCXX(StringRef ref, StringRef def) { 482bdd1243dSDimitry Andric llvm::ItaniumPartialDemangler d; 483bdd1243dSDimitry Andric std::string name = def.str(); 484bdd1243dSDimitry Andric if (d.partialDemangle(name.c_str())) 485bdd1243dSDimitry Andric return false; 486bdd1243dSDimitry Andric char *buf = d.getFunctionName(nullptr, nullptr); 487bdd1243dSDimitry Andric if (!buf) 488bdd1243dSDimitry Andric return false; 489bdd1243dSDimitry Andric bool ret = ref == buf; 490bdd1243dSDimitry Andric free(buf); 491bdd1243dSDimitry Andric return ret; 492bdd1243dSDimitry Andric } 493bdd1243dSDimitry Andric 494bdd1243dSDimitry Andric // Suggest an alternative spelling of an "undefined symbol" diagnostic. Returns 495bdd1243dSDimitry Andric // the suggested symbol, which is either in the symbol table, or in the same 496bdd1243dSDimitry Andric // file of sym. 497bdd1243dSDimitry Andric static const Symbol *getAlternativeSpelling(const Undefined &sym, 49806c3fb27SDimitry Andric std::string &preHint, 49906c3fb27SDimitry Andric std::string &postHint) { 500bdd1243dSDimitry Andric DenseMap<StringRef, const Symbol *> map; 501bdd1243dSDimitry Andric if (sym.getFile() && sym.getFile()->kind() == InputFile::ObjKind) { 502bdd1243dSDimitry Andric // Build a map of local defined symbols. 503bdd1243dSDimitry Andric for (const Symbol *s : sym.getFile()->symbols) 504bdd1243dSDimitry Andric if (auto *defined = dyn_cast_or_null<Defined>(s)) 505bdd1243dSDimitry Andric if (!defined->isExternal()) 506bdd1243dSDimitry Andric map.try_emplace(s->getName(), s); 507bdd1243dSDimitry Andric } 508bdd1243dSDimitry Andric 509bdd1243dSDimitry Andric auto suggest = [&](StringRef newName) -> const Symbol * { 510bdd1243dSDimitry Andric // If defined locally. 511bdd1243dSDimitry Andric if (const Symbol *s = map.lookup(newName)) 512bdd1243dSDimitry Andric return s; 513bdd1243dSDimitry Andric 514bdd1243dSDimitry Andric // If in the symbol table and not undefined. 515bdd1243dSDimitry Andric if (const Symbol *s = symtab->find(newName)) 516bdd1243dSDimitry Andric if (dyn_cast<Undefined>(s) == nullptr) 517bdd1243dSDimitry Andric return s; 518bdd1243dSDimitry Andric 519bdd1243dSDimitry Andric return nullptr; 520bdd1243dSDimitry Andric }; 521bdd1243dSDimitry Andric 522bdd1243dSDimitry Andric // This loop enumerates all strings of Levenshtein distance 1 as typo 523bdd1243dSDimitry Andric // correction candidates and suggests the one that exists as a non-undefined 524bdd1243dSDimitry Andric // symbol. 525bdd1243dSDimitry Andric StringRef name = sym.getName(); 526bdd1243dSDimitry Andric for (size_t i = 0, e = name.size(); i != e + 1; ++i) { 527bdd1243dSDimitry Andric // Insert a character before name[i]. 528bdd1243dSDimitry Andric std::string newName = (name.substr(0, i) + "0" + name.substr(i)).str(); 529bdd1243dSDimitry Andric for (char c = '0'; c <= 'z'; ++c) { 530bdd1243dSDimitry Andric newName[i] = c; 531bdd1243dSDimitry Andric if (const Symbol *s = suggest(newName)) 532bdd1243dSDimitry Andric return s; 533bdd1243dSDimitry Andric } 534bdd1243dSDimitry Andric if (i == e) 535bdd1243dSDimitry Andric break; 536bdd1243dSDimitry Andric 537bdd1243dSDimitry Andric // Substitute name[i]. 538bdd1243dSDimitry Andric newName = std::string(name); 539bdd1243dSDimitry Andric for (char c = '0'; c <= 'z'; ++c) { 540bdd1243dSDimitry Andric newName[i] = c; 541bdd1243dSDimitry Andric if (const Symbol *s = suggest(newName)) 542bdd1243dSDimitry Andric return s; 543bdd1243dSDimitry Andric } 544bdd1243dSDimitry Andric 545bdd1243dSDimitry Andric // Transpose name[i] and name[i+1]. This is of edit distance 2 but it is 546bdd1243dSDimitry Andric // common. 547bdd1243dSDimitry Andric if (i + 1 < e) { 548bdd1243dSDimitry Andric newName[i] = name[i + 1]; 549bdd1243dSDimitry Andric newName[i + 1] = name[i]; 550bdd1243dSDimitry Andric if (const Symbol *s = suggest(newName)) 551bdd1243dSDimitry Andric return s; 552bdd1243dSDimitry Andric } 553bdd1243dSDimitry Andric 554bdd1243dSDimitry Andric // Delete name[i]. 555bdd1243dSDimitry Andric newName = (name.substr(0, i) + name.substr(i + 1)).str(); 556bdd1243dSDimitry Andric if (const Symbol *s = suggest(newName)) 557bdd1243dSDimitry Andric return s; 558bdd1243dSDimitry Andric } 559bdd1243dSDimitry Andric 560bdd1243dSDimitry Andric // Case mismatch, e.g. Foo vs FOO. 561bdd1243dSDimitry Andric for (auto &it : map) 562bdd1243dSDimitry Andric if (name.equals_insensitive(it.first)) 563bdd1243dSDimitry Andric return it.second; 564bdd1243dSDimitry Andric for (Symbol *sym : symtab->getSymbols()) 565bdd1243dSDimitry Andric if (dyn_cast<Undefined>(sym) == nullptr && 566bdd1243dSDimitry Andric name.equals_insensitive(sym->getName())) 567bdd1243dSDimitry Andric return sym; 568bdd1243dSDimitry Andric 569bdd1243dSDimitry Andric // The reference may be a mangled name while the definition is not. Suggest a 570bdd1243dSDimitry Andric // missing extern "C". 57106c3fb27SDimitry Andric if (name.starts_with("__Z")) { 572bdd1243dSDimitry Andric std::string buf = name.str(); 573bdd1243dSDimitry Andric llvm::ItaniumPartialDemangler d; 574bdd1243dSDimitry Andric if (!d.partialDemangle(buf.c_str())) 575bdd1243dSDimitry Andric if (char *buf = d.getFunctionName(nullptr, nullptr)) { 576bdd1243dSDimitry Andric const Symbol *s = suggest((Twine("_") + buf).str()); 577bdd1243dSDimitry Andric free(buf); 578bdd1243dSDimitry Andric if (s) { 57906c3fb27SDimitry Andric preHint = ": extern \"C\" "; 580bdd1243dSDimitry Andric return s; 581bdd1243dSDimitry Andric } 582bdd1243dSDimitry Andric } 583bdd1243dSDimitry Andric } else { 58406c3fb27SDimitry Andric StringRef nameWithoutUnderscore = name; 58506c3fb27SDimitry Andric nameWithoutUnderscore.consume_front("_"); 586bdd1243dSDimitry Andric const Symbol *s = nullptr; 587bdd1243dSDimitry Andric for (auto &it : map) 58806c3fb27SDimitry Andric if (canSuggestExternCForCXX(nameWithoutUnderscore, it.first)) { 589bdd1243dSDimitry Andric s = it.second; 590bdd1243dSDimitry Andric break; 591bdd1243dSDimitry Andric } 592bdd1243dSDimitry Andric if (!s) 593bdd1243dSDimitry Andric for (Symbol *sym : symtab->getSymbols()) 59406c3fb27SDimitry Andric if (canSuggestExternCForCXX(nameWithoutUnderscore, sym->getName())) { 595bdd1243dSDimitry Andric s = sym; 596bdd1243dSDimitry Andric break; 597bdd1243dSDimitry Andric } 598bdd1243dSDimitry Andric if (s) { 59906c3fb27SDimitry Andric preHint = " to declare "; 60006c3fb27SDimitry Andric postHint = " as extern \"C\"?"; 601bdd1243dSDimitry Andric return s; 602bdd1243dSDimitry Andric } 603bdd1243dSDimitry Andric } 604bdd1243dSDimitry Andric 605bdd1243dSDimitry Andric return nullptr; 606bdd1243dSDimitry Andric } 607bdd1243dSDimitry Andric 608bdd1243dSDimitry Andric static void reportUndefinedSymbol(const Undefined &sym, 609bdd1243dSDimitry Andric const UndefinedDiag &locations, 610bdd1243dSDimitry Andric bool correctSpelling) { 611fe6060f1SDimitry Andric std::string message = "undefined symbol"; 612fe6060f1SDimitry Andric if (config->archMultiple) 613fe6060f1SDimitry Andric message += (" for arch " + getArchitectureName(config->arch())).str(); 614bdd1243dSDimitry Andric message += ": " + toString(sym); 61581ad6265SDimitry Andric 61681ad6265SDimitry Andric const size_t maxUndefinedReferences = 3; 61781ad6265SDimitry Andric size_t i = 0; 61881ad6265SDimitry Andric for (const std::string &loc : locations.otherReferences) { 61981ad6265SDimitry Andric if (i >= maxUndefinedReferences) 620e8d8bef9SDimitry Andric break; 62181ad6265SDimitry Andric message += "\n>>> referenced by " + loc; 62281ad6265SDimitry Andric ++i; 623e8d8bef9SDimitry Andric } 62481ad6265SDimitry Andric 625bdd1243dSDimitry Andric for (const UndefinedDiag::SectionAndOffset &loc : locations.codeReferences) { 62681ad6265SDimitry Andric if (i >= maxUndefinedReferences) 62781ad6265SDimitry Andric break; 62881ad6265SDimitry Andric message += "\n>>> referenced by "; 62981ad6265SDimitry Andric std::string src = loc.isec->getSourceLocation(loc.offset); 63081ad6265SDimitry Andric if (!src.empty()) 63181ad6265SDimitry Andric message += src + "\n>>> "; 63281ad6265SDimitry Andric message += loc.isec->getLocation(loc.offset); 63381ad6265SDimitry Andric ++i; 63481ad6265SDimitry Andric } 63581ad6265SDimitry Andric 63681ad6265SDimitry Andric size_t totalReferences = 63781ad6265SDimitry Andric locations.otherReferences.size() + locations.codeReferences.size(); 63881ad6265SDimitry Andric if (totalReferences > i) 63981ad6265SDimitry Andric message += 64081ad6265SDimitry Andric ("\n>>> referenced " + Twine(totalReferences - i) + " more times") 64181ad6265SDimitry Andric .str(); 64281ad6265SDimitry Andric 643bdd1243dSDimitry Andric if (correctSpelling) { 64406c3fb27SDimitry Andric std::string preHint = ": ", postHint; 645bdd1243dSDimitry Andric if (const Symbol *corrected = 64606c3fb27SDimitry Andric getAlternativeSpelling(sym, preHint, postHint)) { 647bdd1243dSDimitry Andric message += 64806c3fb27SDimitry Andric "\n>>> did you mean" + preHint + toString(*corrected) + postHint; 649bdd1243dSDimitry Andric if (corrected->getFile()) 650bdd1243dSDimitry Andric message += "\n>>> defined in: " + toString(corrected->getFile()); 651bdd1243dSDimitry Andric } 652bdd1243dSDimitry Andric } 653bdd1243dSDimitry Andric 65481ad6265SDimitry Andric if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::error) 65581ad6265SDimitry Andric error(message); 65681ad6265SDimitry Andric else if (config->undefinedSymbolTreatment == 65781ad6265SDimitry Andric UndefinedSymbolTreatment::warning) 65881ad6265SDimitry Andric warn(message); 65981ad6265SDimitry Andric else 660bdd1243dSDimitry Andric assert(false && "diagnostics make sense for -undefined error|warning only"); 66181ad6265SDimitry Andric } 66281ad6265SDimitry Andric 663bdd1243dSDimitry Andric void macho::reportPendingUndefinedSymbols() { 664bdd1243dSDimitry Andric // Enable spell corrector for the first 2 diagnostics. 665bdd1243dSDimitry Andric for (const auto &[i, undef] : llvm::enumerate(undefs)) 666bdd1243dSDimitry Andric reportUndefinedSymbol(*undef.first, undef.second, i < 2); 667bdd1243dSDimitry Andric 66881ad6265SDimitry Andric // This function is called multiple times during execution. Clear the printed 66981ad6265SDimitry Andric // diagnostics to avoid printing the same things again the next time. 67081ad6265SDimitry Andric undefs.clear(); 67181ad6265SDimitry Andric } 67281ad6265SDimitry Andric 67381ad6265SDimitry Andric void macho::treatUndefinedSymbol(const Undefined &sym, StringRef source) { 67481ad6265SDimitry Andric if (recoverFromUndefinedSymbol(sym)) 67581ad6265SDimitry Andric return; 67681ad6265SDimitry Andric 67781ad6265SDimitry Andric undefs[&sym].otherReferences.push_back(source.str()); 67881ad6265SDimitry Andric } 67981ad6265SDimitry Andric 68081ad6265SDimitry Andric void macho::treatUndefinedSymbol(const Undefined &sym, const InputSection *isec, 68181ad6265SDimitry Andric uint64_t offset) { 68281ad6265SDimitry Andric if (recoverFromUndefinedSymbol(sym)) 68381ad6265SDimitry Andric return; 68481ad6265SDimitry Andric 68581ad6265SDimitry Andric undefs[&sym].codeReferences.push_back({isec, offset}); 686e8d8bef9SDimitry Andric } 687e8d8bef9SDimitry Andric 68804eeddc0SDimitry Andric std::unique_ptr<SymbolTable> macho::symtab; 689