1e8d8bef9SDimitry Andric //===- ObjC.cpp -----------------------------------------------------------===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric 9e8d8bef9SDimitry Andric #include "ObjC.h" 100fca6ea1SDimitry Andric #include "ConcatOutputSection.h" 11e8d8bef9SDimitry Andric #include "InputFiles.h" 12fe6060f1SDimitry Andric #include "InputSection.h" 1306c3fb27SDimitry Andric #include "Layout.h" 14e8d8bef9SDimitry Andric #include "OutputSegment.h" 150fca6ea1SDimitry Andric #include "SyntheticSections.h" 16fe6060f1SDimitry Andric #include "Target.h" 17e8d8bef9SDimitry Andric 18bdd1243dSDimitry Andric #include "lld/Common/ErrorHandler.h" 1906c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h" 20e8d8bef9SDimitry Andric #include "llvm/BinaryFormat/MachO.h" 21349cc55cSDimitry Andric #include "llvm/Bitcode/BitcodeReader.h" 220fca6ea1SDimitry Andric #include "llvm/Support/TimeProfiler.h" 23e8d8bef9SDimitry Andric 24e8d8bef9SDimitry Andric using namespace llvm; 25e8d8bef9SDimitry Andric using namespace llvm::MachO; 26e8d8bef9SDimitry Andric using namespace lld; 27fe6060f1SDimitry Andric using namespace lld::macho; 28e8d8bef9SDimitry Andric 29349cc55cSDimitry Andric template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) { 30349cc55cSDimitry Andric using SectionHeader = typename LP::section; 31fe6060f1SDimitry Andric 32fe6060f1SDimitry Andric auto *hdr = 33fe6060f1SDimitry Andric reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart()); 34fe6060f1SDimitry Andric if (hdr->magic != LP::magic) 35fe6060f1SDimitry Andric return false; 36fe6060f1SDimitry Andric 37fe6060f1SDimitry Andric if (const auto *c = 38fe6060f1SDimitry Andric findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) { 39349cc55cSDimitry Andric auto sectionHeaders = ArrayRef<SectionHeader>{ 40349cc55cSDimitry Andric reinterpret_cast<const SectionHeader *>(c + 1), c->nsects}; 41349cc55cSDimitry Andric for (const SectionHeader &secHead : sectionHeaders) { 42349cc55cSDimitry Andric StringRef sectname(secHead.sectname, 43349cc55cSDimitry Andric strnlen(secHead.sectname, sizeof(secHead.sectname))); 44349cc55cSDimitry Andric StringRef segname(secHead.segname, 45349cc55cSDimitry Andric strnlen(secHead.segname, sizeof(secHead.segname))); 46fe6060f1SDimitry Andric if ((segname == segment_names::data && 47fe6060f1SDimitry Andric sectname == section_names::objcCatList) || 48fe6060f1SDimitry Andric (segname == segment_names::text && 4906c3fb27SDimitry Andric sectname.starts_with(section_names::swift))) { 50e8d8bef9SDimitry Andric return true; 51e8d8bef9SDimitry Andric } 52e8d8bef9SDimitry Andric } 53e8d8bef9SDimitry Andric } 54e8d8bef9SDimitry Andric return false; 55e8d8bef9SDimitry Andric } 56fe6060f1SDimitry Andric 57349cc55cSDimitry Andric static bool objectHasObjCSection(MemoryBufferRef mb) { 58fe6060f1SDimitry Andric if (target->wordSize == 8) 59349cc55cSDimitry Andric return ::objectHasObjCSection<LP64>(mb); 60fe6060f1SDimitry Andric else 61349cc55cSDimitry Andric return ::objectHasObjCSection<ILP32>(mb); 62349cc55cSDimitry Andric } 63349cc55cSDimitry Andric 64349cc55cSDimitry Andric bool macho::hasObjCSection(MemoryBufferRef mb) { 65349cc55cSDimitry Andric switch (identify_magic(mb.getBuffer())) { 66349cc55cSDimitry Andric case file_magic::macho_object: 67349cc55cSDimitry Andric return objectHasObjCSection(mb); 68349cc55cSDimitry Andric case file_magic::bitcode: 69349cc55cSDimitry Andric return check(isBitcodeContainingObjCCategory(mb)); 70349cc55cSDimitry Andric default: 71349cc55cSDimitry Andric return false; 72349cc55cSDimitry Andric } 73fe6060f1SDimitry Andric } 7406c3fb27SDimitry Andric 7506c3fb27SDimitry Andric namespace { 7606c3fb27SDimitry Andric 7706c3fb27SDimitry Andric #define FOR_EACH_CATEGORY_FIELD(DO) \ 7806c3fb27SDimitry Andric DO(Ptr, name) \ 7906c3fb27SDimitry Andric DO(Ptr, klass) \ 8006c3fb27SDimitry Andric DO(Ptr, instanceMethods) \ 8106c3fb27SDimitry Andric DO(Ptr, classMethods) \ 8206c3fb27SDimitry Andric DO(Ptr, protocols) \ 8306c3fb27SDimitry Andric DO(Ptr, instanceProps) \ 840fca6ea1SDimitry Andric DO(Ptr, classProps) \ 850fca6ea1SDimitry Andric DO(uint32_t, size) 8606c3fb27SDimitry Andric 8706c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD); 8806c3fb27SDimitry Andric 8906c3fb27SDimitry Andric #undef FOR_EACH_CATEGORY_FIELD 9006c3fb27SDimitry Andric 9106c3fb27SDimitry Andric #define FOR_EACH_CLASS_FIELD(DO) \ 9206c3fb27SDimitry Andric DO(Ptr, metaClass) \ 9306c3fb27SDimitry Andric DO(Ptr, superClass) \ 9406c3fb27SDimitry Andric DO(Ptr, methodCache) \ 9506c3fb27SDimitry Andric DO(Ptr, vtable) \ 9606c3fb27SDimitry Andric DO(Ptr, roData) 9706c3fb27SDimitry Andric 9806c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD); 9906c3fb27SDimitry Andric 10006c3fb27SDimitry Andric #undef FOR_EACH_CLASS_FIELD 10106c3fb27SDimitry Andric 10206c3fb27SDimitry Andric #define FOR_EACH_RO_CLASS_FIELD(DO) \ 10306c3fb27SDimitry Andric DO(uint32_t, flags) \ 10406c3fb27SDimitry Andric DO(uint32_t, instanceStart) \ 10506c3fb27SDimitry Andric DO(Ptr, instanceSize) \ 10606c3fb27SDimitry Andric DO(Ptr, ivarLayout) \ 10706c3fb27SDimitry Andric DO(Ptr, name) \ 10806c3fb27SDimitry Andric DO(Ptr, baseMethods) \ 10906c3fb27SDimitry Andric DO(Ptr, baseProtocols) \ 11006c3fb27SDimitry Andric DO(Ptr, ivars) \ 11106c3fb27SDimitry Andric DO(Ptr, weakIvarLayout) \ 11206c3fb27SDimitry Andric DO(Ptr, baseProperties) 11306c3fb27SDimitry Andric 11406c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD); 11506c3fb27SDimitry Andric 11606c3fb27SDimitry Andric #undef FOR_EACH_RO_CLASS_FIELD 11706c3fb27SDimitry Andric 11806c3fb27SDimitry Andric #define FOR_EACH_LIST_HEADER(DO) \ 1190fca6ea1SDimitry Andric DO(uint32_t, structSize) \ 1200fca6ea1SDimitry Andric DO(uint32_t, structCount) 12106c3fb27SDimitry Andric 12206c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER); 12306c3fb27SDimitry Andric 12406c3fb27SDimitry Andric #undef FOR_EACH_LIST_HEADER 12506c3fb27SDimitry Andric 1260fca6ea1SDimitry Andric #define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount) 1270fca6ea1SDimitry Andric 1280fca6ea1SDimitry Andric CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER); 1290fca6ea1SDimitry Andric 1300fca6ea1SDimitry Andric #undef FOR_EACH_PROTOCOL_LIST_HEADER 1310fca6ea1SDimitry Andric 13206c3fb27SDimitry Andric #define FOR_EACH_METHOD(DO) \ 13306c3fb27SDimitry Andric DO(Ptr, name) \ 13406c3fb27SDimitry Andric DO(Ptr, type) \ 13506c3fb27SDimitry Andric DO(Ptr, impl) 13606c3fb27SDimitry Andric 13706c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD); 13806c3fb27SDimitry Andric 13906c3fb27SDimitry Andric #undef FOR_EACH_METHOD 14006c3fb27SDimitry Andric 14106c3fb27SDimitry Andric enum MethodContainerKind { 14206c3fb27SDimitry Andric MCK_Class, 14306c3fb27SDimitry Andric MCK_Category, 14406c3fb27SDimitry Andric }; 14506c3fb27SDimitry Andric 14606c3fb27SDimitry Andric struct MethodContainer { 14706c3fb27SDimitry Andric MethodContainerKind kind; 14806c3fb27SDimitry Andric const ConcatInputSection *isec; 14906c3fb27SDimitry Andric }; 15006c3fb27SDimitry Andric 15106c3fb27SDimitry Andric enum MethodKind { 15206c3fb27SDimitry Andric MK_Instance, 15306c3fb27SDimitry Andric MK_Static, 15406c3fb27SDimitry Andric }; 15506c3fb27SDimitry Andric 15606c3fb27SDimitry Andric struct ObjcClass { 15706c3fb27SDimitry Andric DenseMap<CachedHashStringRef, MethodContainer> instanceMethods; 15806c3fb27SDimitry Andric DenseMap<CachedHashStringRef, MethodContainer> classMethods; 15906c3fb27SDimitry Andric }; 16006c3fb27SDimitry Andric 16106c3fb27SDimitry Andric } // namespace 16206c3fb27SDimitry Andric 16306c3fb27SDimitry Andric class ObjcCategoryChecker { 16406c3fb27SDimitry Andric public: 16506c3fb27SDimitry Andric ObjcCategoryChecker(); 16606c3fb27SDimitry Andric void parseCategory(const ConcatInputSection *catListIsec); 16706c3fb27SDimitry Andric 16806c3fb27SDimitry Andric private: 16906c3fb27SDimitry Andric void parseClass(const Defined *classSym); 17006c3fb27SDimitry Andric void parseMethods(const ConcatInputSection *methodsIsec, 17106c3fb27SDimitry Andric const Symbol *methodContainer, 17206c3fb27SDimitry Andric const ConcatInputSection *containerIsec, 17306c3fb27SDimitry Andric MethodContainerKind, MethodKind); 17406c3fb27SDimitry Andric 17506c3fb27SDimitry Andric CategoryLayout catLayout; 17606c3fb27SDimitry Andric ClassLayout classLayout; 17706c3fb27SDimitry Andric ROClassLayout roClassLayout; 17806c3fb27SDimitry Andric ListHeaderLayout listHeaderLayout; 17906c3fb27SDimitry Andric MethodLayout methodLayout; 18006c3fb27SDimitry Andric 18106c3fb27SDimitry Andric DenseMap<const Symbol *, ObjcClass> classMap; 18206c3fb27SDimitry Andric }; 18306c3fb27SDimitry Andric 18406c3fb27SDimitry Andric ObjcCategoryChecker::ObjcCategoryChecker() 18506c3fb27SDimitry Andric : catLayout(target->wordSize), classLayout(target->wordSize), 18606c3fb27SDimitry Andric roClassLayout(target->wordSize), listHeaderLayout(target->wordSize), 18706c3fb27SDimitry Andric methodLayout(target->wordSize) {} 18806c3fb27SDimitry Andric 1890fca6ea1SDimitry Andric // \p r must point to an offset within a CStringInputSection or a 1900fca6ea1SDimitry Andric // ConcatInputSection 19106c3fb27SDimitry Andric static StringRef getReferentString(const Reloc &r) { 19206c3fb27SDimitry Andric if (auto *isec = r.referent.dyn_cast<InputSection *>()) 19306c3fb27SDimitry Andric return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend); 1940fca6ea1SDimitry Andric 19506c3fb27SDimitry Andric auto *sym = cast<Defined>(r.referent.get<Symbol *>()); 1960fca6ea1SDimitry Andric auto *symIsec = sym->isec(); 1970fca6ea1SDimitry Andric auto symOffset = sym->value + r.addend; 1980fca6ea1SDimitry Andric 1990fca6ea1SDimitry Andric if (auto *s = dyn_cast_or_null<CStringInputSection>(symIsec)) 2000fca6ea1SDimitry Andric return s->getStringRefAtOffset(symOffset); 2010fca6ea1SDimitry Andric 2020fca6ea1SDimitry Andric if (isa<ConcatInputSection>(symIsec)) { 2030fca6ea1SDimitry Andric auto strData = symIsec->data.slice(symOffset); 2040fca6ea1SDimitry Andric const char *pszData = reinterpret_cast<const char *>(strData.data()); 2050fca6ea1SDimitry Andric return StringRef(pszData, strnlen(pszData, strData.size())); 2060fca6ea1SDimitry Andric } 2070fca6ea1SDimitry Andric 2080fca6ea1SDimitry Andric llvm_unreachable("unknown reference section in getReferentString"); 20906c3fb27SDimitry Andric } 21006c3fb27SDimitry Andric 21106c3fb27SDimitry Andric void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec, 21206c3fb27SDimitry Andric const Symbol *methodContainerSym, 21306c3fb27SDimitry Andric const ConcatInputSection *containerIsec, 21406c3fb27SDimitry Andric MethodContainerKind mcKind, 21506c3fb27SDimitry Andric MethodKind mKind) { 21606c3fb27SDimitry Andric ObjcClass &klass = classMap[methodContainerSym]; 21706c3fb27SDimitry Andric for (const Reloc &r : methodsIsec->relocs) { 21806c3fb27SDimitry Andric if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize != 21906c3fb27SDimitry Andric methodLayout.nameOffset) 22006c3fb27SDimitry Andric continue; 22106c3fb27SDimitry Andric 22206c3fb27SDimitry Andric CachedHashStringRef methodName(getReferentString(r)); 22306c3fb27SDimitry Andric // +load methods are special: all implementations are called by the runtime 22406c3fb27SDimitry Andric // even if they are part of the same class. Thus there is no need to check 22506c3fb27SDimitry Andric // for duplicates. 22606c3fb27SDimitry Andric // NOTE: Instead of specifically checking for this method name, ld64 simply 22706c3fb27SDimitry Andric // checks whether a class / category is present in __objc_nlclslist / 22806c3fb27SDimitry Andric // __objc_nlcatlist respectively. This will be the case if the class / 22906c3fb27SDimitry Andric // category has a +load method. It skips optimizing the categories if there 23006c3fb27SDimitry Andric // are multiple +load methods. Since it does dupe checking as part of the 23106c3fb27SDimitry Andric // optimization process, this avoids spurious dupe messages around +load, 23206c3fb27SDimitry Andric // but it also means that legit dupe issues for other methods are ignored. 23306c3fb27SDimitry Andric if (mKind == MK_Static && methodName.val() == "load") 23406c3fb27SDimitry Andric continue; 23506c3fb27SDimitry Andric 23606c3fb27SDimitry Andric auto &methodMap = 23706c3fb27SDimitry Andric mKind == MK_Instance ? klass.instanceMethods : klass.classMethods; 23806c3fb27SDimitry Andric if (methodMap 23906c3fb27SDimitry Andric .try_emplace(methodName, MethodContainer{mcKind, containerIsec}) 24006c3fb27SDimitry Andric .second) 24106c3fb27SDimitry Andric continue; 24206c3fb27SDimitry Andric 24306c3fb27SDimitry Andric // We have a duplicate; generate a warning message. 24406c3fb27SDimitry Andric const auto &mc = methodMap.lookup(methodName); 24506c3fb27SDimitry Andric const Reloc *nameReloc = nullptr; 24606c3fb27SDimitry Andric if (mc.kind == MCK_Category) { 24706c3fb27SDimitry Andric nameReloc = mc.isec->getRelocAt(catLayout.nameOffset); 24806c3fb27SDimitry Andric } else { 24906c3fb27SDimitry Andric assert(mc.kind == MCK_Class); 25006c3fb27SDimitry Andric const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset) 25106c3fb27SDimitry Andric ->getReferentInputSection(); 25206c3fb27SDimitry Andric nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset); 25306c3fb27SDimitry Andric } 25406c3fb27SDimitry Andric StringRef containerName = getReferentString(*nameReloc); 25506c3fb27SDimitry Andric StringRef methPrefix = mKind == MK_Instance ? "-" : "+"; 25606c3fb27SDimitry Andric 25706c3fb27SDimitry Andric // We should only ever encounter collisions when parsing category methods 25806c3fb27SDimitry Andric // (since the Class struct is parsed before any of its categories). 25906c3fb27SDimitry Andric assert(mcKind == MCK_Category); 26006c3fb27SDimitry Andric StringRef newCatName = 26106c3fb27SDimitry Andric getReferentString(*containerIsec->getRelocAt(catLayout.nameOffset)); 26206c3fb27SDimitry Andric 2630fca6ea1SDimitry Andric auto formatObjAndSrcFileName = [](const InputSection *section) { 2640fca6ea1SDimitry Andric lld::macho::InputFile *inputFile = section->getFile(); 2650fca6ea1SDimitry Andric std::string result = toString(inputFile); 2660fca6ea1SDimitry Andric 2670fca6ea1SDimitry Andric auto objFile = dyn_cast_or_null<ObjFile>(inputFile); 2680fca6ea1SDimitry Andric if (objFile && objFile->compileUnit) 2690fca6ea1SDimitry Andric result += " (" + objFile->sourceFile() + ")"; 2700fca6ea1SDimitry Andric 2710fca6ea1SDimitry Andric return result; 2720fca6ea1SDimitry Andric }; 2730fca6ea1SDimitry Andric 27406c3fb27SDimitry Andric StringRef containerType = mc.kind == MCK_Category ? "category" : "class"; 27506c3fb27SDimitry Andric warn("method '" + methPrefix + methodName.val() + 27606c3fb27SDimitry Andric "' has conflicting definitions:\n>>> defined in category " + 2770fca6ea1SDimitry Andric newCatName + " from " + formatObjAndSrcFileName(containerIsec) + 27806c3fb27SDimitry Andric "\n>>> defined in " + containerType + " " + containerName + " from " + 2790fca6ea1SDimitry Andric formatObjAndSrcFileName(mc.isec)); 28006c3fb27SDimitry Andric } 28106c3fb27SDimitry Andric } 28206c3fb27SDimitry Andric 28306c3fb27SDimitry Andric void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) { 28406c3fb27SDimitry Andric auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset); 28506c3fb27SDimitry Andric if (!classReloc) 28606c3fb27SDimitry Andric return; 28706c3fb27SDimitry Andric 28806c3fb27SDimitry Andric auto *classSym = classReloc->referent.get<Symbol *>(); 28906c3fb27SDimitry Andric if (auto *d = dyn_cast<Defined>(classSym)) 29006c3fb27SDimitry Andric if (!classMap.count(d)) 29106c3fb27SDimitry Andric parseClass(d); 29206c3fb27SDimitry Andric 29306c3fb27SDimitry Andric if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) { 29406c3fb27SDimitry Andric parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()), 29506c3fb27SDimitry Andric classSym, catIsec, MCK_Category, MK_Static); 29606c3fb27SDimitry Andric } 29706c3fb27SDimitry Andric 29806c3fb27SDimitry Andric if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) { 29906c3fb27SDimitry Andric parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()), 30006c3fb27SDimitry Andric classSym, catIsec, MCK_Category, MK_Instance); 30106c3fb27SDimitry Andric } 30206c3fb27SDimitry Andric } 30306c3fb27SDimitry Andric 30406c3fb27SDimitry Andric void ObjcCategoryChecker::parseClass(const Defined *classSym) { 30506c3fb27SDimitry Andric // Given a Class struct, get its corresponding Methods struct 30606c3fb27SDimitry Andric auto getMethodsIsec = 30706c3fb27SDimitry Andric [&](const InputSection *classIsec) -> ConcatInputSection * { 30806c3fb27SDimitry Andric if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) { 30906c3fb27SDimitry Andric if (const auto *roIsec = 31006c3fb27SDimitry Andric cast_or_null<ConcatInputSection>(r->getReferentInputSection())) { 31106c3fb27SDimitry Andric if (const auto *r = 31206c3fb27SDimitry Andric roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) { 31306c3fb27SDimitry Andric if (auto *methodsIsec = cast_or_null<ConcatInputSection>( 31406c3fb27SDimitry Andric r->getReferentInputSection())) 31506c3fb27SDimitry Andric return methodsIsec; 31606c3fb27SDimitry Andric } 31706c3fb27SDimitry Andric } 31806c3fb27SDimitry Andric } 31906c3fb27SDimitry Andric return nullptr; 32006c3fb27SDimitry Andric }; 32106c3fb27SDimitry Andric 3220fca6ea1SDimitry Andric const auto *classIsec = cast<ConcatInputSection>(classSym->isec()); 32306c3fb27SDimitry Andric 32406c3fb27SDimitry Andric // Parse instance methods. 32506c3fb27SDimitry Andric if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec)) 32606c3fb27SDimitry Andric parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class, 32706c3fb27SDimitry Andric MK_Instance); 32806c3fb27SDimitry Andric 32906c3fb27SDimitry Andric // Class methods are contained in the metaclass. 3300fca6ea1SDimitry Andric if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset)) 33106c3fb27SDimitry Andric if (const auto *classMethodsIsec = getMethodsIsec( 33206c3fb27SDimitry Andric cast<ConcatInputSection>(r->getReferentInputSection()))) 33306c3fb27SDimitry Andric parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static); 33406c3fb27SDimitry Andric } 33506c3fb27SDimitry Andric 33606c3fb27SDimitry Andric void objc::checkCategories() { 3370fca6ea1SDimitry Andric TimeTraceScope timeScope("ObjcCategoryChecker"); 3380fca6ea1SDimitry Andric 33906c3fb27SDimitry Andric ObjcCategoryChecker checker; 34006c3fb27SDimitry Andric for (const InputSection *isec : inputSections) { 34106c3fb27SDimitry Andric if (isec->getName() == section_names::objcCatList) 34206c3fb27SDimitry Andric for (const Reloc &r : isec->relocs) { 34306c3fb27SDimitry Andric auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection()); 34406c3fb27SDimitry Andric checker.parseCategory(catIsec); 34506c3fb27SDimitry Andric } 34606c3fb27SDimitry Andric } 34706c3fb27SDimitry Andric } 3480fca6ea1SDimitry Andric 3490fca6ea1SDimitry Andric namespace { 3500fca6ea1SDimitry Andric 3510fca6ea1SDimitry Andric class ObjcCategoryMerger { 3520fca6ea1SDimitry Andric // In which language was a particular construct originally defined 3530fca6ea1SDimitry Andric enum SourceLanguage { Unknown, ObjC, Swift }; 3540fca6ea1SDimitry Andric 3550fca6ea1SDimitry Andric // Information about an input category 3560fca6ea1SDimitry Andric struct InfoInputCategory { 3570fca6ea1SDimitry Andric ConcatInputSection *catListIsec; 3580fca6ea1SDimitry Andric ConcatInputSection *catBodyIsec; 3590fca6ea1SDimitry Andric uint32_t offCatListIsec = 0; 3600fca6ea1SDimitry Andric SourceLanguage sourceLanguage = SourceLanguage::Unknown; 3610fca6ea1SDimitry Andric 3620fca6ea1SDimitry Andric bool wasMerged = false; 3630fca6ea1SDimitry Andric }; 3640fca6ea1SDimitry Andric 3650fca6ea1SDimitry Andric // To write new (merged) categories or classes, we will try make limited 3660fca6ea1SDimitry Andric // assumptions about the alignment and the sections the various class/category 3670fca6ea1SDimitry Andric // info are stored in and . So we'll just reuse the same sections and 3680fca6ea1SDimitry Andric // alignment as already used in existing (input) categories. To do this we 3690fca6ea1SDimitry Andric // have InfoCategoryWriter which contains the various sections that the 3700fca6ea1SDimitry Andric // generated categories will be written to. 3710fca6ea1SDimitry Andric struct InfoWriteSection { 3720fca6ea1SDimitry Andric bool valid = false; // Data has been successfully collected from input 3730fca6ea1SDimitry Andric uint32_t align = 0; 3740fca6ea1SDimitry Andric Section *inputSection; 3750fca6ea1SDimitry Andric Reloc relocTemplate; 3760fca6ea1SDimitry Andric OutputSection *outputSection; 3770fca6ea1SDimitry Andric }; 3780fca6ea1SDimitry Andric 3790fca6ea1SDimitry Andric struct InfoCategoryWriter { 3800fca6ea1SDimitry Andric InfoWriteSection catListInfo; 3810fca6ea1SDimitry Andric InfoWriteSection catBodyInfo; 3820fca6ea1SDimitry Andric InfoWriteSection catNameInfo; 3830fca6ea1SDimitry Andric InfoWriteSection catPtrListInfo; 3840fca6ea1SDimitry Andric }; 3850fca6ea1SDimitry Andric 3860fca6ea1SDimitry Andric // Information about a pointer list in the original categories or class(method 3870fca6ea1SDimitry Andric // lists, protocol lists, etc) 3880fca6ea1SDimitry Andric struct PointerListInfo { 3890fca6ea1SDimitry Andric PointerListInfo() = default; 3900fca6ea1SDimitry Andric PointerListInfo(const PointerListInfo &) = default; 3910fca6ea1SDimitry Andric PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct) 3920fca6ea1SDimitry Andric : categoryPrefix(_categoryPrefix), 3930fca6ea1SDimitry Andric pointersPerStruct(_pointersPerStruct) {} 3940fca6ea1SDimitry Andric 3950fca6ea1SDimitry Andric inline bool operator==(const PointerListInfo &cmp) const { 3960fca6ea1SDimitry Andric return pointersPerStruct == cmp.pointersPerStruct && 3970fca6ea1SDimitry Andric structSize == cmp.structSize && structCount == cmp.structCount && 3980fca6ea1SDimitry Andric allPtrs == cmp.allPtrs; 3990fca6ea1SDimitry Andric } 4000fca6ea1SDimitry Andric 4010fca6ea1SDimitry Andric const char *categoryPrefix; 4020fca6ea1SDimitry Andric 4030fca6ea1SDimitry Andric uint32_t pointersPerStruct = 0; 4040fca6ea1SDimitry Andric 4050fca6ea1SDimitry Andric uint32_t structSize = 0; 4060fca6ea1SDimitry Andric uint32_t structCount = 0; 4070fca6ea1SDimitry Andric 4080fca6ea1SDimitry Andric std::vector<Symbol *> allPtrs; 4090fca6ea1SDimitry Andric }; 4100fca6ea1SDimitry Andric 4110fca6ea1SDimitry Andric // Full information describing an ObjC class . This will include all the 4120fca6ea1SDimitry Andric // additional methods, protocols, and properties that are contained in the 4130fca6ea1SDimitry Andric // class and all the categories that extend a particular class. 4140fca6ea1SDimitry Andric struct ClassExtensionInfo { 4150fca6ea1SDimitry Andric ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){}; 4160fca6ea1SDimitry Andric 4170fca6ea1SDimitry Andric // Merged names of containers. Ex: base|firstCategory|secondCategory|... 4180fca6ea1SDimitry Andric std::string mergedContainerName; 4190fca6ea1SDimitry Andric std::string baseClassName; 4200fca6ea1SDimitry Andric const Symbol *baseClass = nullptr; 4210fca6ea1SDimitry Andric SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown; 4220fca6ea1SDimitry Andric 4230fca6ea1SDimitry Andric CategoryLayout &catLayout; 4240fca6ea1SDimitry Andric 4250fca6ea1SDimitry Andric // In case we generate new data, mark the new data as belonging to this file 4260fca6ea1SDimitry Andric ObjFile *objFileForMergeData = nullptr; 4270fca6ea1SDimitry Andric 4280fca6ea1SDimitry Andric PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods, 4290fca6ea1SDimitry Andric /*pointersPerStruct=*/3}; 4300fca6ea1SDimitry Andric PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods, 4310fca6ea1SDimitry Andric /*pointersPerStruct=*/3}; 4320fca6ea1SDimitry Andric PointerListInfo protocols = {objc::symbol_names::categoryProtocols, 4330fca6ea1SDimitry Andric /*pointersPerStruct=*/0}; 4340fca6ea1SDimitry Andric PointerListInfo instanceProps = {objc::symbol_names::listProprieties, 4350fca6ea1SDimitry Andric /*pointersPerStruct=*/2}; 4360fca6ea1SDimitry Andric PointerListInfo classProps = {objc::symbol_names::klassPropList, 4370fca6ea1SDimitry Andric /*pointersPerStruct=*/2}; 4380fca6ea1SDimitry Andric }; 4390fca6ea1SDimitry Andric 4400fca6ea1SDimitry Andric public: 4410fca6ea1SDimitry Andric ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections); 4420fca6ea1SDimitry Andric void doMerge(); 4430fca6ea1SDimitry Andric static void doCleanup(); 4440fca6ea1SDimitry Andric 4450fca6ea1SDimitry Andric private: 4460fca6ea1SDimitry Andric DenseSet<const Symbol *> collectNlCategories(); 4470fca6ea1SDimitry Andric void collectAndValidateCategoriesData(); 4480fca6ea1SDimitry Andric void 4490fca6ea1SDimitry Andric mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories); 4500fca6ea1SDimitry Andric 4510fca6ea1SDimitry Andric void eraseISec(ConcatInputSection *isec); 4520fca6ea1SDimitry Andric void eraseMergedCategories(); 4530fca6ea1SDimitry Andric 4540fca6ea1SDimitry Andric void generateCatListForNonErasedCategories( 4550fca6ea1SDimitry Andric MapVector<ConcatInputSection *, std::set<uint64_t>> 4560fca6ea1SDimitry Andric catListToErasedOffsets); 4570fca6ea1SDimitry Andric void collectSectionWriteInfoFromIsec(const InputSection *isec, 4580fca6ea1SDimitry Andric InfoWriteSection &catWriteInfo); 4590fca6ea1SDimitry Andric void collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo); 4600fca6ea1SDimitry Andric void parseCatInfoToExtInfo(const InfoInputCategory &catInfo, 4610fca6ea1SDimitry Andric ClassExtensionInfo &extInfo); 4620fca6ea1SDimitry Andric 4630fca6ea1SDimitry Andric void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset, 4640fca6ea1SDimitry Andric PointerListInfo &ptrList, 4650fca6ea1SDimitry Andric SourceLanguage sourceLang); 4660fca6ea1SDimitry Andric 4670fca6ea1SDimitry Andric PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec, 4680fca6ea1SDimitry Andric uint32_t secOffset, 4690fca6ea1SDimitry Andric SourceLanguage sourceLang); 4700fca6ea1SDimitry Andric 4710fca6ea1SDimitry Andric void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset, 4720fca6ea1SDimitry Andric PointerListInfo &ptrList); 4730fca6ea1SDimitry Andric 4740fca6ea1SDimitry Andric void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset, 4750fca6ea1SDimitry Andric const ClassExtensionInfo &extInfo, 4760fca6ea1SDimitry Andric const PointerListInfo &ptrList); 4770fca6ea1SDimitry Andric 4780fca6ea1SDimitry Andric Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset, 4790fca6ea1SDimitry Andric const ClassExtensionInfo &extInfo, 4800fca6ea1SDimitry Andric const PointerListInfo &ptrList); 4810fca6ea1SDimitry Andric 4820fca6ea1SDimitry Andric Defined *emitCategory(const ClassExtensionInfo &extInfo); 4830fca6ea1SDimitry Andric Defined *emitCatListEntrySec(const std::string &forCategoryName, 4840fca6ea1SDimitry Andric const std::string &forBaseClassName, 4850fca6ea1SDimitry Andric ObjFile *objFile); 4860fca6ea1SDimitry Andric Defined *emitCategoryBody(const std::string &name, const Defined *nameSym, 4870fca6ea1SDimitry Andric const Symbol *baseClassSym, 4880fca6ea1SDimitry Andric const std::string &baseClassName, ObjFile *objFile); 4890fca6ea1SDimitry Andric Defined *emitCategoryName(const std::string &name, ObjFile *objFile); 4900fca6ea1SDimitry Andric void createSymbolReference(Defined *refFrom, const Symbol *refTo, 4910fca6ea1SDimitry Andric uint32_t offset, const Reloc &relocTemplate); 4920fca6ea1SDimitry Andric Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset); 4930fca6ea1SDimitry Andric Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec, 4940fca6ea1SDimitry Andric uint32_t offset); 4950fca6ea1SDimitry Andric Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, 4960fca6ea1SDimitry Andric uint32_t offset); 4970fca6ea1SDimitry Andric Defined *getClassRo(const Defined *classSym, bool getMetaRo); 4980fca6ea1SDimitry Andric SourceLanguage getClassSymSourceLang(const Defined *classSym); 4990fca6ea1SDimitry Andric void mergeCategoriesIntoBaseClass(const Defined *baseClass, 5000fca6ea1SDimitry Andric std::vector<InfoInputCategory> &categories); 5010fca6ea1SDimitry Andric void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset); 5020fca6ea1SDimitry Andric void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec, 5030fca6ea1SDimitry Andric uint32_t offset); 5040fca6ea1SDimitry Andric 5050fca6ea1SDimitry Andric // Allocate a null-terminated StringRef backed by generatedSectionData 5060fca6ea1SDimitry Andric StringRef newStringData(const char *str); 5070fca6ea1SDimitry Andric // Allocate section data, backed by generatedSectionData 5080fca6ea1SDimitry Andric SmallVector<uint8_t> &newSectionData(uint32_t size); 5090fca6ea1SDimitry Andric 5100fca6ea1SDimitry Andric CategoryLayout catLayout; 5110fca6ea1SDimitry Andric ClassLayout classLayout; 5120fca6ea1SDimitry Andric ROClassLayout roClassLayout; 5130fca6ea1SDimitry Andric ListHeaderLayout listHeaderLayout; 5140fca6ea1SDimitry Andric MethodLayout methodLayout; 5150fca6ea1SDimitry Andric ProtocolListHeaderLayout protocolListHeaderLayout; 5160fca6ea1SDimitry Andric 5170fca6ea1SDimitry Andric InfoCategoryWriter infoCategoryWriter; 5180fca6ea1SDimitry Andric std::vector<ConcatInputSection *> &allInputSections; 5190fca6ea1SDimitry Andric // Map of base class Symbol to list of InfoInputCategory's for it 5200fca6ea1SDimitry Andric MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap; 5210fca6ea1SDimitry Andric 5220fca6ea1SDimitry Andric // Normally, the binary data comes from the input files, but since we're 5230fca6ea1SDimitry Andric // generating binary data ourselves, we use the below array to store it in. 5240fca6ea1SDimitry Andric // Need this to be 'static' so the data survives past the ObjcCategoryMerger 5250fca6ea1SDimitry Andric // object, as the data will be read by the Writer when the final binary is 5260fca6ea1SDimitry Andric // generated. 5270fca6ea1SDimitry Andric static SmallVector<std::unique_ptr<SmallVector<uint8_t>>> 5280fca6ea1SDimitry Andric generatedSectionData; 5290fca6ea1SDimitry Andric }; 5300fca6ea1SDimitry Andric 5310fca6ea1SDimitry Andric SmallVector<std::unique_ptr<SmallVector<uint8_t>>> 5320fca6ea1SDimitry Andric ObjcCategoryMerger::generatedSectionData; 5330fca6ea1SDimitry Andric 5340fca6ea1SDimitry Andric ObjcCategoryMerger::ObjcCategoryMerger( 5350fca6ea1SDimitry Andric std::vector<ConcatInputSection *> &_allInputSections) 5360fca6ea1SDimitry Andric : catLayout(target->wordSize), classLayout(target->wordSize), 5370fca6ea1SDimitry Andric roClassLayout(target->wordSize), listHeaderLayout(target->wordSize), 5380fca6ea1SDimitry Andric methodLayout(target->wordSize), 5390fca6ea1SDimitry Andric protocolListHeaderLayout(target->wordSize), 5400fca6ea1SDimitry Andric allInputSections(_allInputSections) {} 5410fca6ea1SDimitry Andric 5420fca6ea1SDimitry Andric void ObjcCategoryMerger::collectSectionWriteInfoFromIsec( 5430fca6ea1SDimitry Andric const InputSection *isec, InfoWriteSection &catWriteInfo) { 5440fca6ea1SDimitry Andric 5450fca6ea1SDimitry Andric catWriteInfo.inputSection = const_cast<Section *>(&isec->section); 5460fca6ea1SDimitry Andric catWriteInfo.align = isec->align; 5470fca6ea1SDimitry Andric catWriteInfo.outputSection = isec->parent; 5480fca6ea1SDimitry Andric 5490fca6ea1SDimitry Andric assert(catWriteInfo.outputSection && 5500fca6ea1SDimitry Andric "outputSection may not be null in collectSectionWriteInfoFromIsec."); 5510fca6ea1SDimitry Andric 5520fca6ea1SDimitry Andric if (isec->relocs.size()) 5530fca6ea1SDimitry Andric catWriteInfo.relocTemplate = isec->relocs[0]; 5540fca6ea1SDimitry Andric 5550fca6ea1SDimitry Andric catWriteInfo.valid = true; 5560fca6ea1SDimitry Andric } 5570fca6ea1SDimitry Andric 5580fca6ea1SDimitry Andric Symbol * 5590fca6ea1SDimitry Andric ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec, 5600fca6ea1SDimitry Andric uint32_t offset) { 5610fca6ea1SDimitry Andric if (!isec) 5620fca6ea1SDimitry Andric return nullptr; 5630fca6ea1SDimitry Andric const Reloc *reloc = isec->getRelocAt(offset); 5640fca6ea1SDimitry Andric 5650fca6ea1SDimitry Andric if (!reloc) 5660fca6ea1SDimitry Andric return nullptr; 5670fca6ea1SDimitry Andric 5680fca6ea1SDimitry Andric Symbol *sym = reloc->referent.get<Symbol *>(); 5690fca6ea1SDimitry Andric 5700fca6ea1SDimitry Andric if (reloc->addend) { 5710fca6ea1SDimitry Andric assert(isa<Defined>(sym) && "Expected defined for non-zero addend"); 5720fca6ea1SDimitry Andric Defined *definedSym = cast<Defined>(sym); 5730fca6ea1SDimitry Andric sym = tryFindDefinedOnIsec(definedSym->isec(), 5740fca6ea1SDimitry Andric definedSym->value + reloc->addend); 5750fca6ea1SDimitry Andric } 5760fca6ea1SDimitry Andric 5770fca6ea1SDimitry Andric return sym; 5780fca6ea1SDimitry Andric } 5790fca6ea1SDimitry Andric 5800fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec, 5810fca6ea1SDimitry Andric uint32_t offset) { 5820fca6ea1SDimitry Andric for (Defined *sym : isec->symbols) 5830fca6ea1SDimitry Andric if ((sym->value <= offset) && (sym->value + sym->size > offset)) 5840fca6ea1SDimitry Andric return sym; 5850fca6ea1SDimitry Andric 5860fca6ea1SDimitry Andric return nullptr; 5870fca6ea1SDimitry Andric } 5880fca6ea1SDimitry Andric 5890fca6ea1SDimitry Andric Defined * 5900fca6ea1SDimitry Andric ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, 5910fca6ea1SDimitry Andric uint32_t offset) { 5920fca6ea1SDimitry Andric Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset); 5930fca6ea1SDimitry Andric return dyn_cast_or_null<Defined>(sym); 5940fca6ea1SDimitry Andric } 5950fca6ea1SDimitry Andric 5960fca6ea1SDimitry Andric // Get the class's ro_data symbol. If getMetaRo is true, then we will return 5970fca6ea1SDimitry Andric // the meta-class's ro_data symbol. Otherwise, we will return the class 5980fca6ea1SDimitry Andric // (instance) ro_data symbol. 5990fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym, 6000fca6ea1SDimitry Andric bool getMetaRo) { 6010fca6ea1SDimitry Andric ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec()); 6020fca6ea1SDimitry Andric if (!isec) 6030fca6ea1SDimitry Andric return nullptr; 6040fca6ea1SDimitry Andric 6050fca6ea1SDimitry Andric if (!getMetaRo) 6060fca6ea1SDimitry Andric return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset + 6070fca6ea1SDimitry Andric classSym->value); 6080fca6ea1SDimitry Andric 6090fca6ea1SDimitry Andric Defined *metaClass = tryGetDefinedAtIsecOffset( 6100fca6ea1SDimitry Andric isec, classLayout.metaClassOffset + classSym->value); 6110fca6ea1SDimitry Andric if (!metaClass) 6120fca6ea1SDimitry Andric return nullptr; 6130fca6ea1SDimitry Andric 6140fca6ea1SDimitry Andric return tryGetDefinedAtIsecOffset( 6150fca6ea1SDimitry Andric dyn_cast<ConcatInputSection>(metaClass->isec()), 6160fca6ea1SDimitry Andric classLayout.roDataOffset); 6170fca6ea1SDimitry Andric } 6180fca6ea1SDimitry Andric 6190fca6ea1SDimitry Andric // Given an ConcatInputSection or CStringInputSection and an offset, if there is 6200fca6ea1SDimitry Andric // a symbol(Defined) at that offset, then erase the symbol (mark it not live) 6210fca6ea1SDimitry Andric void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset( 6220fca6ea1SDimitry Andric const ConcatInputSection *isec, uint32_t offset) { 6230fca6ea1SDimitry Andric const Reloc *reloc = isec->getRelocAt(offset); 6240fca6ea1SDimitry Andric 6250fca6ea1SDimitry Andric if (!reloc) 6260fca6ea1SDimitry Andric return; 6270fca6ea1SDimitry Andric 6280fca6ea1SDimitry Andric Defined *sym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 6290fca6ea1SDimitry Andric if (!sym) 6300fca6ea1SDimitry Andric return; 6310fca6ea1SDimitry Andric 6320fca6ea1SDimitry Andric if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec())) 6330fca6ea1SDimitry Andric eraseISec(cisec); 6340fca6ea1SDimitry Andric else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) { 6350fca6ea1SDimitry Andric uint32_t totalOffset = sym->value + reloc->addend; 6360fca6ea1SDimitry Andric StringPiece &piece = csisec->getStringPiece(totalOffset); 6370fca6ea1SDimitry Andric piece.live = false; 6380fca6ea1SDimitry Andric } else { 6390fca6ea1SDimitry Andric llvm_unreachable("erased symbol has to be Defined or CStringInputSection"); 6400fca6ea1SDimitry Andric } 6410fca6ea1SDimitry Andric } 6420fca6ea1SDimitry Andric 6430fca6ea1SDimitry Andric void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory( 6440fca6ea1SDimitry Andric const InfoInputCategory &catInfo) { 6450fca6ea1SDimitry Andric 6460fca6ea1SDimitry Andric if (!infoCategoryWriter.catListInfo.valid) 6470fca6ea1SDimitry Andric collectSectionWriteInfoFromIsec(catInfo.catListIsec, 6480fca6ea1SDimitry Andric infoCategoryWriter.catListInfo); 6490fca6ea1SDimitry Andric if (!infoCategoryWriter.catBodyInfo.valid) 6500fca6ea1SDimitry Andric collectSectionWriteInfoFromIsec(catInfo.catBodyIsec, 6510fca6ea1SDimitry Andric infoCategoryWriter.catBodyInfo); 6520fca6ea1SDimitry Andric 6530fca6ea1SDimitry Andric if (!infoCategoryWriter.catNameInfo.valid) { 6540fca6ea1SDimitry Andric lld::macho::Defined *catNameSym = 6550fca6ea1SDimitry Andric tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset); 6560fca6ea1SDimitry Andric assert(catNameSym && "Category does not have a valid name Symbol"); 6570fca6ea1SDimitry Andric 6580fca6ea1SDimitry Andric collectSectionWriteInfoFromIsec(catNameSym->isec(), 6590fca6ea1SDimitry Andric infoCategoryWriter.catNameInfo); 6600fca6ea1SDimitry Andric } 6610fca6ea1SDimitry Andric 6620fca6ea1SDimitry Andric // Collect writer info from all the category lists (we're assuming they all 6630fca6ea1SDimitry Andric // would provide the same info) 6640fca6ea1SDimitry Andric if (!infoCategoryWriter.catPtrListInfo.valid) { 6650fca6ea1SDimitry Andric for (uint32_t off = catLayout.instanceMethodsOffset; 6660fca6ea1SDimitry Andric off <= catLayout.classPropsOffset; off += target->wordSize) { 6670fca6ea1SDimitry Andric if (Defined *ptrList = 6680fca6ea1SDimitry Andric tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) { 6690fca6ea1SDimitry Andric collectSectionWriteInfoFromIsec(ptrList->isec(), 6700fca6ea1SDimitry Andric infoCategoryWriter.catPtrListInfo); 6710fca6ea1SDimitry Andric // we've successfully collected data, so we can break 6720fca6ea1SDimitry Andric break; 6730fca6ea1SDimitry Andric } 6740fca6ea1SDimitry Andric } 6750fca6ea1SDimitry Andric } 6760fca6ea1SDimitry Andric } 6770fca6ea1SDimitry Andric 6780fca6ea1SDimitry Andric // Parse a protocol list that might be linked to ConcatInputSection at a given 6790fca6ea1SDimitry Andric // offset. The format of the protocol list is different than other lists (prop 6800fca6ea1SDimitry Andric // lists, method lists) so we need to parse it differently 6810fca6ea1SDimitry Andric void ObjcCategoryMerger::parseProtocolListInfo( 6820fca6ea1SDimitry Andric const ConcatInputSection *isec, uint32_t secOffset, 6830fca6ea1SDimitry Andric PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) { 6840fca6ea1SDimitry Andric assert((isec && (secOffset + target->wordSize <= isec->data.size())) && 6850fca6ea1SDimitry Andric "Tried to read pointer list beyond protocol section end"); 6860fca6ea1SDimitry Andric 6870fca6ea1SDimitry Andric const Reloc *reloc = isec->getRelocAt(secOffset); 6880fca6ea1SDimitry Andric if (!reloc) 6890fca6ea1SDimitry Andric return; 6900fca6ea1SDimitry Andric 6910fca6ea1SDimitry Andric auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 6920fca6ea1SDimitry Andric assert(ptrListSym && "Protocol list reloc does not have a valid Defined"); 6930fca6ea1SDimitry Andric 6940fca6ea1SDimitry Andric // Theoretically protocol count can be either 32b or 64b, depending on 6950fca6ea1SDimitry Andric // platform pointer size, but to simplify implementation we always just read 6960fca6ea1SDimitry Andric // the lower 32b which should be good enough. 6970fca6ea1SDimitry Andric uint32_t protocolCount = *reinterpret_cast<const uint32_t *>( 6980fca6ea1SDimitry Andric ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset); 6990fca6ea1SDimitry Andric 7000fca6ea1SDimitry Andric ptrList.structCount += protocolCount; 7010fca6ea1SDimitry Andric ptrList.structSize = target->wordSize; 7020fca6ea1SDimitry Andric 7030fca6ea1SDimitry Andric [[maybe_unused]] uint32_t expectedListSize = 7040fca6ea1SDimitry Andric (protocolCount * target->wordSize) + 7050fca6ea1SDimitry Andric /*header(count)*/ protocolListHeaderLayout.totalSize + 7060fca6ea1SDimitry Andric /*extra null value*/ target->wordSize; 7070fca6ea1SDimitry Andric 7080fca6ea1SDimitry Andric // On Swift, the protocol list does not have the extra (unnecessary) null 7090fca6ea1SDimitry Andric [[maybe_unused]] uint32_t expectedListSizeSwift = 7100fca6ea1SDimitry Andric expectedListSize - target->wordSize; 7110fca6ea1SDimitry Andric 7120fca6ea1SDimitry Andric assert(((expectedListSize == ptrListSym->isec()->data.size() && 7130fca6ea1SDimitry Andric sourceLang == SourceLanguage::ObjC) || 7140fca6ea1SDimitry Andric (expectedListSizeSwift == ptrListSym->isec()->data.size() && 7150fca6ea1SDimitry Andric sourceLang == SourceLanguage::Swift)) && 7160fca6ea1SDimitry Andric "Protocol list does not match expected size"); 7170fca6ea1SDimitry Andric 7180fca6ea1SDimitry Andric uint32_t off = protocolListHeaderLayout.totalSize; 7190fca6ea1SDimitry Andric for (uint32_t inx = 0; inx < protocolCount; ++inx) { 7200fca6ea1SDimitry Andric const Reloc *reloc = ptrListSym->isec()->getRelocAt(off); 7210fca6ea1SDimitry Andric assert(reloc && "No reloc found at protocol list offset"); 7220fca6ea1SDimitry Andric 7230fca6ea1SDimitry Andric auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 7240fca6ea1SDimitry Andric assert(listSym && "Protocol list reloc does not have a valid Defined"); 7250fca6ea1SDimitry Andric 7260fca6ea1SDimitry Andric ptrList.allPtrs.push_back(listSym); 7270fca6ea1SDimitry Andric off += target->wordSize; 7280fca6ea1SDimitry Andric } 7290fca6ea1SDimitry Andric assert((ptrListSym->isec()->getRelocAt(off) == nullptr) && 7300fca6ea1SDimitry Andric "expected null terminating protocol"); 7310fca6ea1SDimitry Andric assert(off + /*extra null value*/ target->wordSize == expectedListSize && 7320fca6ea1SDimitry Andric "Protocol list end offset does not match expected size"); 7330fca6ea1SDimitry Andric } 7340fca6ea1SDimitry Andric 7350fca6ea1SDimitry Andric // Parse a protocol list and return the PointerListInfo for it 7360fca6ea1SDimitry Andric ObjcCategoryMerger::PointerListInfo 7370fca6ea1SDimitry Andric ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec, 7380fca6ea1SDimitry Andric uint32_t secOffset, 7390fca6ea1SDimitry Andric SourceLanguage sourceLang) { 7400fca6ea1SDimitry Andric PointerListInfo ptrList; 7410fca6ea1SDimitry Andric parseProtocolListInfo(isec, secOffset, ptrList, sourceLang); 7420fca6ea1SDimitry Andric return ptrList; 7430fca6ea1SDimitry Andric } 7440fca6ea1SDimitry Andric 7450fca6ea1SDimitry Andric // Parse a pointer list that might be linked to ConcatInputSection at a given 7460fca6ea1SDimitry Andric // offset. This can be used for instance methods, class methods, instance props 7470fca6ea1SDimitry Andric // and class props since they have the same format. 7480fca6ea1SDimitry Andric void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec, 7490fca6ea1SDimitry Andric uint32_t secOffset, 7500fca6ea1SDimitry Andric PointerListInfo &ptrList) { 7510fca6ea1SDimitry Andric assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3); 7520fca6ea1SDimitry Andric assert(isec && "Trying to parse pointer list from null isec"); 7530fca6ea1SDimitry Andric assert(secOffset + target->wordSize <= isec->data.size() && 7540fca6ea1SDimitry Andric "Trying to read pointer list beyond section end"); 7550fca6ea1SDimitry Andric 7560fca6ea1SDimitry Andric const Reloc *reloc = isec->getRelocAt(secOffset); 7570fca6ea1SDimitry Andric if (!reloc) 7580fca6ea1SDimitry Andric return; 7590fca6ea1SDimitry Andric 7600fca6ea1SDimitry Andric auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 7610fca6ea1SDimitry Andric assert(ptrListSym && "Reloc does not have a valid Defined"); 7620fca6ea1SDimitry Andric 7630fca6ea1SDimitry Andric uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>( 7640fca6ea1SDimitry Andric ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset); 7650fca6ea1SDimitry Andric uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>( 7660fca6ea1SDimitry Andric ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset); 7670fca6ea1SDimitry Andric assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize); 7680fca6ea1SDimitry Andric 7690fca6ea1SDimitry Andric assert(!ptrList.structSize || (thisStructSize == ptrList.structSize)); 7700fca6ea1SDimitry Andric 7710fca6ea1SDimitry Andric ptrList.structCount += thisStructCount; 7720fca6ea1SDimitry Andric ptrList.structSize = thisStructSize; 7730fca6ea1SDimitry Andric 7740fca6ea1SDimitry Andric uint32_t expectedListSize = 7750fca6ea1SDimitry Andric listHeaderLayout.totalSize + (thisStructSize * thisStructCount); 7760fca6ea1SDimitry Andric assert(expectedListSize == ptrListSym->isec()->data.size() && 7770fca6ea1SDimitry Andric "Pointer list does not match expected size"); 7780fca6ea1SDimitry Andric 7790fca6ea1SDimitry Andric for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize; 7800fca6ea1SDimitry Andric off += target->wordSize) { 7810fca6ea1SDimitry Andric const Reloc *reloc = ptrListSym->isec()->getRelocAt(off); 7820fca6ea1SDimitry Andric assert(reloc && "No reloc found at pointer list offset"); 7830fca6ea1SDimitry Andric 7840fca6ea1SDimitry Andric auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 7850fca6ea1SDimitry Andric assert(listSym && "Reloc does not have a valid Defined"); 7860fca6ea1SDimitry Andric 7870fca6ea1SDimitry Andric ptrList.allPtrs.push_back(listSym); 7880fca6ea1SDimitry Andric } 7890fca6ea1SDimitry Andric } 7900fca6ea1SDimitry Andric 7910fca6ea1SDimitry Andric // Here we parse all the information of an input category (catInfo) and 7920fca6ea1SDimitry Andric // append the parsed info into the structure which will contain all the 7930fca6ea1SDimitry Andric // information about how a class is extended (extInfo) 7940fca6ea1SDimitry Andric void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo, 7950fca6ea1SDimitry Andric ClassExtensionInfo &extInfo) { 7960fca6ea1SDimitry Andric const Reloc *catNameReloc = 7970fca6ea1SDimitry Andric catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset); 7980fca6ea1SDimitry Andric 7990fca6ea1SDimitry Andric // Parse name 8000fca6ea1SDimitry Andric assert(catNameReloc && "Category does not have a reloc at 'nameOffset'"); 8010fca6ea1SDimitry Andric 8020fca6ea1SDimitry Andric // is this the first category we are parsing? 8030fca6ea1SDimitry Andric if (extInfo.mergedContainerName.empty()) 8040fca6ea1SDimitry Andric extInfo.objFileForMergeData = 8050fca6ea1SDimitry Andric dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile()); 8060fca6ea1SDimitry Andric else 8070fca6ea1SDimitry Andric extInfo.mergedContainerName += "|"; 8080fca6ea1SDimitry Andric 8090fca6ea1SDimitry Andric assert(extInfo.objFileForMergeData && 8100fca6ea1SDimitry Andric "Expected to already have valid objextInfo.objFileForMergeData"); 8110fca6ea1SDimitry Andric 8120fca6ea1SDimitry Andric StringRef catName = getReferentString(*catNameReloc); 8130fca6ea1SDimitry Andric extInfo.mergedContainerName += catName.str(); 8140fca6ea1SDimitry Andric 8150fca6ea1SDimitry Andric // Parse base class 8160fca6ea1SDimitry Andric if (!extInfo.baseClass) { 8170fca6ea1SDimitry Andric Symbol *classSym = 8180fca6ea1SDimitry Andric tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset); 8190fca6ea1SDimitry Andric assert(extInfo.baseClassName.empty()); 8200fca6ea1SDimitry Andric extInfo.baseClass = classSym; 8210fca6ea1SDimitry Andric llvm::StringRef classPrefix(objc::symbol_names::klass); 8220fca6ea1SDimitry Andric assert(classSym->getName().starts_with(classPrefix) && 8230fca6ea1SDimitry Andric "Base class symbol does not start with expected prefix"); 8240fca6ea1SDimitry Andric extInfo.baseClassName = classSym->getName().substr(classPrefix.size()); 8250fca6ea1SDimitry Andric } else { 8260fca6ea1SDimitry Andric assert((extInfo.baseClass == 8270fca6ea1SDimitry Andric tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, 8280fca6ea1SDimitry Andric catLayout.klassOffset)) && 8290fca6ea1SDimitry Andric "Trying to parse category info into container with different base " 8300fca6ea1SDimitry Andric "class"); 8310fca6ea1SDimitry Andric } 8320fca6ea1SDimitry Andric 8330fca6ea1SDimitry Andric parsePointerListInfo(catInfo.catBodyIsec, catLayout.instanceMethodsOffset, 8340fca6ea1SDimitry Andric extInfo.instanceMethods); 8350fca6ea1SDimitry Andric 8360fca6ea1SDimitry Andric parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset, 8370fca6ea1SDimitry Andric extInfo.classMethods); 8380fca6ea1SDimitry Andric 8390fca6ea1SDimitry Andric parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset, 8400fca6ea1SDimitry Andric extInfo.protocols, catInfo.sourceLanguage); 8410fca6ea1SDimitry Andric 8420fca6ea1SDimitry Andric parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset, 8430fca6ea1SDimitry Andric extInfo.instanceProps); 8440fca6ea1SDimitry Andric 8450fca6ea1SDimitry Andric parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset, 8460fca6ea1SDimitry Andric extInfo.classProps); 8470fca6ea1SDimitry Andric } 8480fca6ea1SDimitry Andric 8490fca6ea1SDimitry Andric // Generate a protocol list (including header) and link it into the parent at 8500fca6ea1SDimitry Andric // the specified offset. 8510fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::emitAndLinkProtocolList( 8520fca6ea1SDimitry Andric Defined *parentSym, uint32_t linkAtOffset, 8530fca6ea1SDimitry Andric const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) { 8540fca6ea1SDimitry Andric if (ptrList.allPtrs.empty()) 8550fca6ea1SDimitry Andric return nullptr; 8560fca6ea1SDimitry Andric 8570fca6ea1SDimitry Andric assert(ptrList.allPtrs.size() == ptrList.structCount); 8580fca6ea1SDimitry Andric 8590fca6ea1SDimitry Andric uint32_t bodySize = (ptrList.structCount * target->wordSize) + 8600fca6ea1SDimitry Andric /*header(count)*/ protocolListHeaderLayout.totalSize + 8610fca6ea1SDimitry Andric /*extra null value*/ target->wordSize; 8620fca6ea1SDimitry Andric llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize); 8630fca6ea1SDimitry Andric 8640fca6ea1SDimitry Andric // This theoretically can be either 32b or 64b, but writing just the first 32b 8650fca6ea1SDimitry Andric // is good enough 8660fca6ea1SDimitry Andric const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>( 8670fca6ea1SDimitry Andric bodyData.data() + protocolListHeaderLayout.protocolCountOffset); 8680fca6ea1SDimitry Andric 8690fca6ea1SDimitry Andric *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size(); 8700fca6ea1SDimitry Andric 8710fca6ea1SDimitry Andric ConcatInputSection *listSec = make<ConcatInputSection>( 8720fca6ea1SDimitry Andric *infoCategoryWriter.catPtrListInfo.inputSection, bodyData, 8730fca6ea1SDimitry Andric infoCategoryWriter.catPtrListInfo.align); 8740fca6ea1SDimitry Andric listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 8750fca6ea1SDimitry Andric listSec->live = true; 8760fca6ea1SDimitry Andric 8770fca6ea1SDimitry Andric listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 8780fca6ea1SDimitry Andric 8790fca6ea1SDimitry Andric std::string symName = ptrList.categoryPrefix; 8800fca6ea1SDimitry Andric symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")"; 8810fca6ea1SDimitry Andric 8820fca6ea1SDimitry Andric Defined *ptrListSym = make<Defined>( 8830fca6ea1SDimitry Andric newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(), 8840fca6ea1SDimitry Andric listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false, 8850fca6ea1SDimitry Andric /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 8860fca6ea1SDimitry Andric /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 8870fca6ea1SDimitry Andric /*isWeakDefCanBeHidden=*/false); 8880fca6ea1SDimitry Andric 8890fca6ea1SDimitry Andric ptrListSym->used = true; 8900fca6ea1SDimitry Andric parentSym->getObjectFile()->symbols.push_back(ptrListSym); 891*6c4b055cSDimitry Andric addInputSection(listSec); 8920fca6ea1SDimitry Andric 8930fca6ea1SDimitry Andric createSymbolReference(parentSym, ptrListSym, linkAtOffset, 8940fca6ea1SDimitry Andric infoCategoryWriter.catBodyInfo.relocTemplate); 8950fca6ea1SDimitry Andric 8960fca6ea1SDimitry Andric uint32_t offset = protocolListHeaderLayout.totalSize; 8970fca6ea1SDimitry Andric for (Symbol *symbol : ptrList.allPtrs) { 8980fca6ea1SDimitry Andric createSymbolReference(ptrListSym, symbol, offset, 8990fca6ea1SDimitry Andric infoCategoryWriter.catPtrListInfo.relocTemplate); 9000fca6ea1SDimitry Andric offset += target->wordSize; 9010fca6ea1SDimitry Andric } 9020fca6ea1SDimitry Andric 9030fca6ea1SDimitry Andric return ptrListSym; 9040fca6ea1SDimitry Andric } 9050fca6ea1SDimitry Andric 9060fca6ea1SDimitry Andric // Generate a pointer list (including header) and link it into the parent at the 9070fca6ea1SDimitry Andric // specified offset. This is used for instance and class methods and 9080fca6ea1SDimitry Andric // proprieties. 9090fca6ea1SDimitry Andric void ObjcCategoryMerger::emitAndLinkPointerList( 9100fca6ea1SDimitry Andric Defined *parentSym, uint32_t linkAtOffset, 9110fca6ea1SDimitry Andric const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) { 9120fca6ea1SDimitry Andric if (ptrList.allPtrs.empty()) 9130fca6ea1SDimitry Andric return; 9140fca6ea1SDimitry Andric 9150fca6ea1SDimitry Andric assert(ptrList.allPtrs.size() * target->wordSize == 9160fca6ea1SDimitry Andric ptrList.structCount * ptrList.structSize); 9170fca6ea1SDimitry Andric 9180fca6ea1SDimitry Andric // Generate body 9190fca6ea1SDimitry Andric uint32_t bodySize = 9200fca6ea1SDimitry Andric listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount); 9210fca6ea1SDimitry Andric llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize); 9220fca6ea1SDimitry Andric 9230fca6ea1SDimitry Andric const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>( 9240fca6ea1SDimitry Andric bodyData.data() + listHeaderLayout.structSizeOffset); 9250fca6ea1SDimitry Andric const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>( 9260fca6ea1SDimitry Andric bodyData.data() + listHeaderLayout.structCountOffset); 9270fca6ea1SDimitry Andric 9280fca6ea1SDimitry Andric *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize; 9290fca6ea1SDimitry Andric *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount; 9300fca6ea1SDimitry Andric 9310fca6ea1SDimitry Andric ConcatInputSection *listSec = make<ConcatInputSection>( 9320fca6ea1SDimitry Andric *infoCategoryWriter.catPtrListInfo.inputSection, bodyData, 9330fca6ea1SDimitry Andric infoCategoryWriter.catPtrListInfo.align); 9340fca6ea1SDimitry Andric listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 9350fca6ea1SDimitry Andric listSec->live = true; 9360fca6ea1SDimitry Andric 9370fca6ea1SDimitry Andric listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 9380fca6ea1SDimitry Andric 9390fca6ea1SDimitry Andric std::string symName = ptrList.categoryPrefix; 9400fca6ea1SDimitry Andric symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")"; 9410fca6ea1SDimitry Andric 9420fca6ea1SDimitry Andric Defined *ptrListSym = make<Defined>( 9430fca6ea1SDimitry Andric newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(), 9440fca6ea1SDimitry Andric listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false, 9450fca6ea1SDimitry Andric /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 9460fca6ea1SDimitry Andric /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 9470fca6ea1SDimitry Andric /*isWeakDefCanBeHidden=*/false); 9480fca6ea1SDimitry Andric 9490fca6ea1SDimitry Andric ptrListSym->used = true; 9500fca6ea1SDimitry Andric parentSym->getObjectFile()->symbols.push_back(ptrListSym); 951*6c4b055cSDimitry Andric addInputSection(listSec); 9520fca6ea1SDimitry Andric 9530fca6ea1SDimitry Andric createSymbolReference(parentSym, ptrListSym, linkAtOffset, 9540fca6ea1SDimitry Andric infoCategoryWriter.catBodyInfo.relocTemplate); 9550fca6ea1SDimitry Andric 9560fca6ea1SDimitry Andric uint32_t offset = listHeaderLayout.totalSize; 9570fca6ea1SDimitry Andric for (Symbol *symbol : ptrList.allPtrs) { 9580fca6ea1SDimitry Andric createSymbolReference(ptrListSym, symbol, offset, 9590fca6ea1SDimitry Andric infoCategoryWriter.catPtrListInfo.relocTemplate); 9600fca6ea1SDimitry Andric offset += target->wordSize; 9610fca6ea1SDimitry Andric } 9620fca6ea1SDimitry Andric } 9630fca6ea1SDimitry Andric 9640fca6ea1SDimitry Andric // This method creates an __objc_catlist ConcatInputSection with a single slot 9650fca6ea1SDimitry Andric Defined * 9660fca6ea1SDimitry Andric ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName, 9670fca6ea1SDimitry Andric const std::string &forBaseClassName, 9680fca6ea1SDimitry Andric ObjFile *objFile) { 9690fca6ea1SDimitry Andric uint32_t sectionSize = target->wordSize; 9700fca6ea1SDimitry Andric llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize); 9710fca6ea1SDimitry Andric 9720fca6ea1SDimitry Andric ConcatInputSection *newCatList = 9730fca6ea1SDimitry Andric make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection, 9740fca6ea1SDimitry Andric bodyData, infoCategoryWriter.catListInfo.align); 9750fca6ea1SDimitry Andric newCatList->parent = infoCategoryWriter.catListInfo.outputSection; 9760fca6ea1SDimitry Andric newCatList->live = true; 9770fca6ea1SDimitry Andric 9780fca6ea1SDimitry Andric newCatList->parent = infoCategoryWriter.catListInfo.outputSection; 9790fca6ea1SDimitry Andric 9800fca6ea1SDimitry Andric std::string catSymName = "<__objc_catlist slot for merged category "; 9810fca6ea1SDimitry Andric catSymName += forBaseClassName + "(" + forCategoryName + ")>"; 9820fca6ea1SDimitry Andric 9830fca6ea1SDimitry Andric Defined *catListSym = make<Defined>( 9840fca6ea1SDimitry Andric newStringData(catSymName.c_str()), /*file=*/objFile, newCatList, 9850fca6ea1SDimitry Andric /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false, 9860fca6ea1SDimitry Andric /*isPrivateExtern=*/false, /*includeInSymtab=*/false, 9870fca6ea1SDimitry Andric /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 9880fca6ea1SDimitry Andric /*isWeakDefCanBeHidden=*/false); 9890fca6ea1SDimitry Andric 9900fca6ea1SDimitry Andric catListSym->used = true; 9910fca6ea1SDimitry Andric objFile->symbols.push_back(catListSym); 992*6c4b055cSDimitry Andric addInputSection(newCatList); 9930fca6ea1SDimitry Andric return catListSym; 9940fca6ea1SDimitry Andric } 9950fca6ea1SDimitry Andric 9960fca6ea1SDimitry Andric // Here we generate the main category body and link the name and base class into 9970fca6ea1SDimitry Andric // it. We don't link any other info yet like the protocol and class/instance 9980fca6ea1SDimitry Andric // methods/props. 9990fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name, 10000fca6ea1SDimitry Andric const Defined *nameSym, 10010fca6ea1SDimitry Andric const Symbol *baseClassSym, 10020fca6ea1SDimitry Andric const std::string &baseClassName, 10030fca6ea1SDimitry Andric ObjFile *objFile) { 10040fca6ea1SDimitry Andric llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize); 10050fca6ea1SDimitry Andric 10060fca6ea1SDimitry Andric uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) + 10070fca6ea1SDimitry Andric catLayout.sizeOffset); 10080fca6ea1SDimitry Andric *ptrSize = catLayout.totalSize; 10090fca6ea1SDimitry Andric 10100fca6ea1SDimitry Andric ConcatInputSection *newBodySec = 10110fca6ea1SDimitry Andric make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection, 10120fca6ea1SDimitry Andric bodyData, infoCategoryWriter.catBodyInfo.align); 10130fca6ea1SDimitry Andric newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection; 10140fca6ea1SDimitry Andric newBodySec->live = true; 10150fca6ea1SDimitry Andric 10160fca6ea1SDimitry Andric std::string symName = 10170fca6ea1SDimitry Andric objc::symbol_names::category + baseClassName + "(" + name + ")"; 10180fca6ea1SDimitry Andric Defined *catBodySym = make<Defined>( 10190fca6ea1SDimitry Andric newStringData(symName.c_str()), /*file=*/objFile, newBodySec, 10200fca6ea1SDimitry Andric /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false, 10210fca6ea1SDimitry Andric /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 10220fca6ea1SDimitry Andric /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 10230fca6ea1SDimitry Andric /*isWeakDefCanBeHidden=*/false); 10240fca6ea1SDimitry Andric 10250fca6ea1SDimitry Andric catBodySym->used = true; 10260fca6ea1SDimitry Andric objFile->symbols.push_back(catBodySym); 1027*6c4b055cSDimitry Andric addInputSection(newBodySec); 10280fca6ea1SDimitry Andric 10290fca6ea1SDimitry Andric createSymbolReference(catBodySym, nameSym, catLayout.nameOffset, 10300fca6ea1SDimitry Andric infoCategoryWriter.catBodyInfo.relocTemplate); 10310fca6ea1SDimitry Andric 10320fca6ea1SDimitry Andric // Create a reloc to the base class (either external or internal) 10330fca6ea1SDimitry Andric createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset, 10340fca6ea1SDimitry Andric infoCategoryWriter.catBodyInfo.relocTemplate); 10350fca6ea1SDimitry Andric 10360fca6ea1SDimitry Andric return catBodySym; 10370fca6ea1SDimitry Andric } 10380fca6ea1SDimitry Andric 10390fca6ea1SDimitry Andric // This writes the new category name (for the merged category) into the binary 10400fca6ea1SDimitry Andric // and returns the sybmol for it. 10410fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name, 10420fca6ea1SDimitry Andric ObjFile *objFile) { 10430fca6ea1SDimitry Andric StringRef nameStrData = newStringData(name.c_str()); 10440fca6ea1SDimitry Andric // We use +1 below to include the null terminator 10450fca6ea1SDimitry Andric llvm::ArrayRef<uint8_t> nameData( 10460fca6ea1SDimitry Andric reinterpret_cast<const uint8_t *>(nameStrData.data()), 10470fca6ea1SDimitry Andric nameStrData.size() + 1); 10480fca6ea1SDimitry Andric 10490fca6ea1SDimitry Andric auto *parentSection = infoCategoryWriter.catNameInfo.inputSection; 10500fca6ea1SDimitry Andric CStringInputSection *newStringSec = make<CStringInputSection>( 10510fca6ea1SDimitry Andric *infoCategoryWriter.catNameInfo.inputSection, nameData, 10520fca6ea1SDimitry Andric infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true); 10530fca6ea1SDimitry Andric 10540fca6ea1SDimitry Andric parentSection->subsections.push_back({0, newStringSec}); 10550fca6ea1SDimitry Andric 10560fca6ea1SDimitry Andric newStringSec->splitIntoPieces(); 10570fca6ea1SDimitry Andric newStringSec->pieces[0].live = true; 10580fca6ea1SDimitry Andric newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection; 10590fca6ea1SDimitry Andric in.cStringSection->addInput(newStringSec); 10600fca6ea1SDimitry Andric assert(newStringSec->pieces.size() == 1); 10610fca6ea1SDimitry Andric 10620fca6ea1SDimitry Andric Defined *catNameSym = make<Defined>( 10630fca6ea1SDimitry Andric "<merged category name>", /*file=*/objFile, newStringSec, 10640fca6ea1SDimitry Andric /*value=*/0, nameData.size(), 10650fca6ea1SDimitry Andric /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false, 10660fca6ea1SDimitry Andric /*includeInSymtab=*/false, /*isReferencedDynamically=*/false, 10670fca6ea1SDimitry Andric /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false); 10680fca6ea1SDimitry Andric 10690fca6ea1SDimitry Andric catNameSym->used = true; 10700fca6ea1SDimitry Andric objFile->symbols.push_back(catNameSym); 10710fca6ea1SDimitry Andric return catNameSym; 10720fca6ea1SDimitry Andric } 10730fca6ea1SDimitry Andric 10740fca6ea1SDimitry Andric // This method fully creates a new category from the given ClassExtensionInfo. 10750fca6ea1SDimitry Andric // It creates the category name, body and method/protocol/prop lists and links 10760fca6ea1SDimitry Andric // them all together. Then it creates a new __objc_catlist entry and adds the 10770fca6ea1SDimitry Andric // category to it. Calling this method will fully generate a category which will 10780fca6ea1SDimitry Andric // be available in the final binary. 10790fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) { 10800fca6ea1SDimitry Andric Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName, 10810fca6ea1SDimitry Andric extInfo.objFileForMergeData); 10820fca6ea1SDimitry Andric 10830fca6ea1SDimitry Andric Defined *catBodySym = emitCategoryBody( 10840fca6ea1SDimitry Andric extInfo.mergedContainerName, catNameSym, extInfo.baseClass, 10850fca6ea1SDimitry Andric extInfo.baseClassName, extInfo.objFileForMergeData); 10860fca6ea1SDimitry Andric 10870fca6ea1SDimitry Andric Defined *catListSym = 10880fca6ea1SDimitry Andric emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName, 10890fca6ea1SDimitry Andric extInfo.objFileForMergeData); 10900fca6ea1SDimitry Andric 10910fca6ea1SDimitry Andric // Add the single category body to the category list at the offset 0. 10920fca6ea1SDimitry Andric createSymbolReference(catListSym, catBodySym, /*offset=*/0, 10930fca6ea1SDimitry Andric infoCategoryWriter.catListInfo.relocTemplate); 10940fca6ea1SDimitry Andric 10950fca6ea1SDimitry Andric emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo, 10960fca6ea1SDimitry Andric extInfo.instanceMethods); 10970fca6ea1SDimitry Andric 10980fca6ea1SDimitry Andric emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo, 10990fca6ea1SDimitry Andric extInfo.classMethods); 11000fca6ea1SDimitry Andric 11010fca6ea1SDimitry Andric emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo, 11020fca6ea1SDimitry Andric extInfo.protocols); 11030fca6ea1SDimitry Andric 11040fca6ea1SDimitry Andric emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo, 11050fca6ea1SDimitry Andric extInfo.instanceProps); 11060fca6ea1SDimitry Andric 11070fca6ea1SDimitry Andric emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo, 11080fca6ea1SDimitry Andric extInfo.classProps); 11090fca6ea1SDimitry Andric 11100fca6ea1SDimitry Andric return catBodySym; 11110fca6ea1SDimitry Andric } 11120fca6ea1SDimitry Andric 11130fca6ea1SDimitry Andric // This method merges all the categories (sharing a base class) into a single 11140fca6ea1SDimitry Andric // category. 11150fca6ea1SDimitry Andric void ObjcCategoryMerger::mergeCategoriesIntoSingleCategory( 11160fca6ea1SDimitry Andric std::vector<InfoInputCategory> &categories) { 11170fca6ea1SDimitry Andric assert(categories.size() > 1 && "Expected at least 2 categories"); 11180fca6ea1SDimitry Andric 11190fca6ea1SDimitry Andric ClassExtensionInfo extInfo(catLayout); 11200fca6ea1SDimitry Andric 11210fca6ea1SDimitry Andric for (auto &catInfo : categories) 11220fca6ea1SDimitry Andric parseCatInfoToExtInfo(catInfo, extInfo); 11230fca6ea1SDimitry Andric 11240fca6ea1SDimitry Andric Defined *newCatDef = emitCategory(extInfo); 11250fca6ea1SDimitry Andric assert(newCatDef && "Failed to create a new category"); 11260fca6ea1SDimitry Andric 11270fca6ea1SDimitry Andric // Suppress unsuded var warning 11280fca6ea1SDimitry Andric (void)newCatDef; 11290fca6ea1SDimitry Andric 11300fca6ea1SDimitry Andric for (auto &catInfo : categories) 11310fca6ea1SDimitry Andric catInfo.wasMerged = true; 11320fca6ea1SDimitry Andric } 11330fca6ea1SDimitry Andric 11340fca6ea1SDimitry Andric void ObjcCategoryMerger::createSymbolReference(Defined *refFrom, 11350fca6ea1SDimitry Andric const Symbol *refTo, 11360fca6ea1SDimitry Andric uint32_t offset, 11370fca6ea1SDimitry Andric const Reloc &relocTemplate) { 11380fca6ea1SDimitry Andric Reloc r = relocTemplate; 11390fca6ea1SDimitry Andric r.offset = offset; 11400fca6ea1SDimitry Andric r.addend = 0; 11410fca6ea1SDimitry Andric r.referent = const_cast<Symbol *>(refTo); 11420fca6ea1SDimitry Andric refFrom->isec()->relocs.push_back(r); 11430fca6ea1SDimitry Andric } 11440fca6ea1SDimitry Andric 11450fca6ea1SDimitry Andric // Get the list of categories in the '__objc_nlcatlist' section. We can't 11460fca6ea1SDimitry Andric // optimize these as they have a '+load' method that has to be called at 11470fca6ea1SDimitry Andric // runtime. 11480fca6ea1SDimitry Andric DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() { 11490fca6ea1SDimitry Andric DenseSet<const Symbol *> nlCategories; 11500fca6ea1SDimitry Andric 11510fca6ea1SDimitry Andric for (InputSection *sec : allInputSections) { 11520fca6ea1SDimitry Andric if (sec->getName() != section_names::objcNonLazyCatList) 11530fca6ea1SDimitry Andric continue; 11540fca6ea1SDimitry Andric 11550fca6ea1SDimitry Andric for (auto &r : sec->relocs) { 11560fca6ea1SDimitry Andric const Symbol *sym = r.referent.dyn_cast<Symbol *>(); 11570fca6ea1SDimitry Andric nlCategories.insert(sym); 11580fca6ea1SDimitry Andric } 11590fca6ea1SDimitry Andric } 11600fca6ea1SDimitry Andric return nlCategories; 11610fca6ea1SDimitry Andric } 11620fca6ea1SDimitry Andric 11630fca6ea1SDimitry Andric void ObjcCategoryMerger::collectAndValidateCategoriesData() { 11640fca6ea1SDimitry Andric auto nlCategories = collectNlCategories(); 11650fca6ea1SDimitry Andric 11660fca6ea1SDimitry Andric for (InputSection *sec : allInputSections) { 11670fca6ea1SDimitry Andric if (sec->getName() != section_names::objcCatList) 11680fca6ea1SDimitry Andric continue; 11690fca6ea1SDimitry Andric ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec); 11700fca6ea1SDimitry Andric assert(catListCisec && 11710fca6ea1SDimitry Andric "__objc_catList InputSection is not a ConcatInputSection"); 11720fca6ea1SDimitry Andric 11730fca6ea1SDimitry Andric for (uint32_t off = 0; off < catListCisec->getSize(); 11740fca6ea1SDimitry Andric off += target->wordSize) { 11750fca6ea1SDimitry Andric Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off); 11760fca6ea1SDimitry Andric assert(categorySym && 11770fca6ea1SDimitry Andric "Failed to get a valid category at __objc_catlit offset"); 11780fca6ea1SDimitry Andric 11790fca6ea1SDimitry Andric if (nlCategories.count(categorySym)) 11800fca6ea1SDimitry Andric continue; 11810fca6ea1SDimitry Andric 11820fca6ea1SDimitry Andric auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec()); 11830fca6ea1SDimitry Andric assert(catBodyIsec && 11840fca6ea1SDimitry Andric "Category data section is not an ConcatInputSection"); 11850fca6ea1SDimitry Andric 11860fca6ea1SDimitry Andric SourceLanguage eLang = SourceLanguage::Unknown; 11870fca6ea1SDimitry Andric if (categorySym->getName().starts_with(objc::symbol_names::category)) 11880fca6ea1SDimitry Andric eLang = SourceLanguage::ObjC; 11890fca6ea1SDimitry Andric else if (categorySym->getName().starts_with( 11900fca6ea1SDimitry Andric objc::symbol_names::swift_objc_category)) 11910fca6ea1SDimitry Andric eLang = SourceLanguage::Swift; 11920fca6ea1SDimitry Andric else 11930fca6ea1SDimitry Andric llvm_unreachable("Unexpected category symbol name"); 11940fca6ea1SDimitry Andric 11950fca6ea1SDimitry Andric InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang}; 11960fca6ea1SDimitry Andric 11970fca6ea1SDimitry Andric // Check that the category has a reloc at 'klassOffset' (which is 11980fca6ea1SDimitry Andric // a pointer to the class symbol) 11990fca6ea1SDimitry Andric 12000fca6ea1SDimitry Andric Symbol *classSym = 12010fca6ea1SDimitry Andric tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset); 12020fca6ea1SDimitry Andric assert(classSym && "Category does not have a valid base class"); 12030fca6ea1SDimitry Andric 12040fca6ea1SDimitry Andric categoryMap[classSym].push_back(catInputInfo); 12050fca6ea1SDimitry Andric 12060fca6ea1SDimitry Andric collectCategoryWriterInfoFromCategory(catInputInfo); 12070fca6ea1SDimitry Andric } 12080fca6ea1SDimitry Andric } 12090fca6ea1SDimitry Andric } 12100fca6ea1SDimitry Andric 12110fca6ea1SDimitry Andric // In the input we have multiple __objc_catlist InputSection, each of which may 12120fca6ea1SDimitry Andric // contain links to multiple categories. Of these categories, we will merge (and 12130fca6ea1SDimitry Andric // erase) only some. There will be some categories that will remain untouched 12140fca6ea1SDimitry Andric // (not erased). For these not erased categories, we generate new __objc_catlist 12150fca6ea1SDimitry Andric // entries since the parent __objc_catlist entry will be erased 12160fca6ea1SDimitry Andric void ObjcCategoryMerger::generateCatListForNonErasedCategories( 12170fca6ea1SDimitry Andric const MapVector<ConcatInputSection *, std::set<uint64_t>> 12180fca6ea1SDimitry Andric catListToErasedOffsets) { 12190fca6ea1SDimitry Andric 12200fca6ea1SDimitry Andric // Go through all offsets of all __objc_catlist's that we process and if there 12210fca6ea1SDimitry Andric // are categories that we didn't process - generate a new __objc_catlist for 12220fca6ea1SDimitry Andric // each. 12230fca6ea1SDimitry Andric for (auto &mapEntry : catListToErasedOffsets) { 12240fca6ea1SDimitry Andric ConcatInputSection *catListIsec = mapEntry.first; 12250fca6ea1SDimitry Andric for (uint32_t catListIsecOffset = 0; 12260fca6ea1SDimitry Andric catListIsecOffset < catListIsec->data.size(); 12270fca6ea1SDimitry Andric catListIsecOffset += target->wordSize) { 12280fca6ea1SDimitry Andric // This slot was erased, we can just skip it 12290fca6ea1SDimitry Andric if (mapEntry.second.count(catListIsecOffset)) 12300fca6ea1SDimitry Andric continue; 12310fca6ea1SDimitry Andric 12320fca6ea1SDimitry Andric Defined *nonErasedCatBody = 12330fca6ea1SDimitry Andric tryGetDefinedAtIsecOffset(catListIsec, catListIsecOffset); 12340fca6ea1SDimitry Andric assert(nonErasedCatBody && "Failed to relocate non-deleted category"); 12350fca6ea1SDimitry Andric 12360fca6ea1SDimitry Andric // Allocate data for the new __objc_catlist slot 12370fca6ea1SDimitry Andric llvm::ArrayRef<uint8_t> bodyData = newSectionData(target->wordSize); 12380fca6ea1SDimitry Andric 12390fca6ea1SDimitry Andric // We mark the __objc_catlist slot as belonging to the same file as the 12400fca6ea1SDimitry Andric // category 12410fca6ea1SDimitry Andric ObjFile *objFile = dyn_cast<ObjFile>(nonErasedCatBody->getFile()); 12420fca6ea1SDimitry Andric 12430fca6ea1SDimitry Andric ConcatInputSection *listSec = make<ConcatInputSection>( 12440fca6ea1SDimitry Andric *infoCategoryWriter.catListInfo.inputSection, bodyData, 12450fca6ea1SDimitry Andric infoCategoryWriter.catListInfo.align); 12460fca6ea1SDimitry Andric listSec->parent = infoCategoryWriter.catListInfo.outputSection; 12470fca6ea1SDimitry Andric listSec->live = true; 12480fca6ea1SDimitry Andric 12490fca6ea1SDimitry Andric std::string slotSymName = "<__objc_catlist slot for category "; 12500fca6ea1SDimitry Andric slotSymName += nonErasedCatBody->getName(); 12510fca6ea1SDimitry Andric slotSymName += ">"; 12520fca6ea1SDimitry Andric 12530fca6ea1SDimitry Andric Defined *catListSlotSym = make<Defined>( 12540fca6ea1SDimitry Andric newStringData(slotSymName.c_str()), /*file=*/objFile, listSec, 12550fca6ea1SDimitry Andric /*value=*/0, bodyData.size(), 12560fca6ea1SDimitry Andric /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false, 12570fca6ea1SDimitry Andric /*includeInSymtab=*/false, /*isReferencedDynamically=*/false, 12580fca6ea1SDimitry Andric /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false); 12590fca6ea1SDimitry Andric 12600fca6ea1SDimitry Andric catListSlotSym->used = true; 12610fca6ea1SDimitry Andric objFile->symbols.push_back(catListSlotSym); 1262*6c4b055cSDimitry Andric addInputSection(listSec); 12630fca6ea1SDimitry Andric 12640fca6ea1SDimitry Andric // Now link the category body into the newly created slot 12650fca6ea1SDimitry Andric createSymbolReference(catListSlotSym, nonErasedCatBody, 0, 12660fca6ea1SDimitry Andric infoCategoryWriter.catListInfo.relocTemplate); 12670fca6ea1SDimitry Andric } 12680fca6ea1SDimitry Andric } 12690fca6ea1SDimitry Andric } 12700fca6ea1SDimitry Andric 12710fca6ea1SDimitry Andric void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) { 12720fca6ea1SDimitry Andric isec->live = false; 12730fca6ea1SDimitry Andric for (auto &sym : isec->symbols) 12740fca6ea1SDimitry Andric sym->used = false; 12750fca6ea1SDimitry Andric } 12760fca6ea1SDimitry Andric 12770fca6ea1SDimitry Andric // This fully erases the merged categories, including their body, their names, 12780fca6ea1SDimitry Andric // their method/protocol/prop lists and the __objc_catlist entries that link to 12790fca6ea1SDimitry Andric // them. 12800fca6ea1SDimitry Andric void ObjcCategoryMerger::eraseMergedCategories() { 12810fca6ea1SDimitry Andric // Map of InputSection to a set of offsets of the categories that were merged 12820fca6ea1SDimitry Andric MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets; 12830fca6ea1SDimitry Andric 12840fca6ea1SDimitry Andric for (auto &mapEntry : categoryMap) { 12850fca6ea1SDimitry Andric for (InfoInputCategory &catInfo : mapEntry.second) { 12860fca6ea1SDimitry Andric if (catInfo.wasMerged) { 12870fca6ea1SDimitry Andric eraseISec(catInfo.catListIsec); 12880fca6ea1SDimitry Andric catListToErasedOffsets[catInfo.catListIsec].insert( 12890fca6ea1SDimitry Andric catInfo.offCatListIsec); 12900fca6ea1SDimitry Andric } 12910fca6ea1SDimitry Andric } 12920fca6ea1SDimitry Andric } 12930fca6ea1SDimitry Andric 12940fca6ea1SDimitry Andric // If there were categories that we did not erase, we need to generate a new 12950fca6ea1SDimitry Andric // __objc_catList that contains only the un-merged categories, and get rid of 12960fca6ea1SDimitry Andric // the references to the ones we merged. 12970fca6ea1SDimitry Andric generateCatListForNonErasedCategories(catListToErasedOffsets); 12980fca6ea1SDimitry Andric 12990fca6ea1SDimitry Andric // Erase the old method lists & names of the categories that were merged 13000fca6ea1SDimitry Andric for (auto &mapEntry : categoryMap) { 13010fca6ea1SDimitry Andric for (InfoInputCategory &catInfo : mapEntry.second) { 13020fca6ea1SDimitry Andric if (!catInfo.wasMerged) 13030fca6ea1SDimitry Andric continue; 13040fca6ea1SDimitry Andric 13050fca6ea1SDimitry Andric eraseISec(catInfo.catBodyIsec); 13060fca6ea1SDimitry Andric 13070fca6ea1SDimitry Andric // We can't erase 'catLayout.nameOffset' for either Swift or ObjC 13080fca6ea1SDimitry Andric // categories because the name will sometimes also be used for other 13090fca6ea1SDimitry Andric // purposes. 13100fca6ea1SDimitry Andric // For Swift, see usages of 'l_.str.11.SimpleClass' in 13110fca6ea1SDimitry Andric // objc-category-merging-swift.s 13120fca6ea1SDimitry Andric // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in 13130fca6ea1SDimitry Andric // objc-category-merging-erase-objc-name-test.s 13140fca6ea1SDimitry Andric // TODO: handle the above in a smarter way 13150fca6ea1SDimitry Andric 13160fca6ea1SDimitry Andric tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 13170fca6ea1SDimitry Andric catLayout.instanceMethodsOffset); 13180fca6ea1SDimitry Andric tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 13190fca6ea1SDimitry Andric catLayout.classMethodsOffset); 13200fca6ea1SDimitry Andric tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 13210fca6ea1SDimitry Andric catLayout.protocolsOffset); 13220fca6ea1SDimitry Andric tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 13230fca6ea1SDimitry Andric catLayout.classPropsOffset); 13240fca6ea1SDimitry Andric tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 13250fca6ea1SDimitry Andric catLayout.instancePropsOffset); 13260fca6ea1SDimitry Andric } 13270fca6ea1SDimitry Andric } 13280fca6ea1SDimitry Andric } 13290fca6ea1SDimitry Andric 13300fca6ea1SDimitry Andric void ObjcCategoryMerger::doMerge() { 13310fca6ea1SDimitry Andric collectAndValidateCategoriesData(); 13320fca6ea1SDimitry Andric 13330fca6ea1SDimitry Andric for (auto &[baseClass, catInfos] : categoryMap) { 13340fca6ea1SDimitry Andric if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) { 13350fca6ea1SDimitry Andric // Merge all categories into the base class 13360fca6ea1SDimitry Andric mergeCategoriesIntoBaseClass(baseClassDef, catInfos); 13370fca6ea1SDimitry Andric } else if (catInfos.size() > 1) { 13380fca6ea1SDimitry Andric // Merge all categories into a new, single category 13390fca6ea1SDimitry Andric mergeCategoriesIntoSingleCategory(catInfos); 13400fca6ea1SDimitry Andric } 13410fca6ea1SDimitry Andric } 13420fca6ea1SDimitry Andric 13430fca6ea1SDimitry Andric // Erase all categories that were merged 13440fca6ea1SDimitry Andric eraseMergedCategories(); 13450fca6ea1SDimitry Andric } 13460fca6ea1SDimitry Andric 13470fca6ea1SDimitry Andric void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); } 13480fca6ea1SDimitry Andric 13490fca6ea1SDimitry Andric StringRef ObjcCategoryMerger::newStringData(const char *str) { 13500fca6ea1SDimitry Andric uint32_t len = strlen(str); 13510fca6ea1SDimitry Andric uint32_t bufSize = len + 1; 13520fca6ea1SDimitry Andric SmallVector<uint8_t> &data = newSectionData(bufSize); 13530fca6ea1SDimitry Andric char *strData = reinterpret_cast<char *>(data.data()); 13540fca6ea1SDimitry Andric // Copy the string chars and null-terminator 13550fca6ea1SDimitry Andric memcpy(strData, str, bufSize); 13560fca6ea1SDimitry Andric return StringRef(strData, len); 13570fca6ea1SDimitry Andric } 13580fca6ea1SDimitry Andric 13590fca6ea1SDimitry Andric SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) { 13600fca6ea1SDimitry Andric generatedSectionData.push_back( 13610fca6ea1SDimitry Andric std::make_unique<SmallVector<uint8_t>>(size, 0)); 13620fca6ea1SDimitry Andric return *generatedSectionData.back(); 13630fca6ea1SDimitry Andric } 13640fca6ea1SDimitry Andric 13650fca6ea1SDimitry Andric } // namespace 13660fca6ea1SDimitry Andric 13670fca6ea1SDimitry Andric void objc::mergeCategories() { 13680fca6ea1SDimitry Andric TimeTraceScope timeScope("ObjcCategoryMerger"); 13690fca6ea1SDimitry Andric 13700fca6ea1SDimitry Andric ObjcCategoryMerger merger(inputSections); 13710fca6ea1SDimitry Andric merger.doMerge(); 13720fca6ea1SDimitry Andric } 13730fca6ea1SDimitry Andric 13740fca6ea1SDimitry Andric void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); } 13750fca6ea1SDimitry Andric 13760fca6ea1SDimitry Andric ObjcCategoryMerger::SourceLanguage 13770fca6ea1SDimitry Andric ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) { 13780fca6ea1SDimitry Andric if (classSym->getName().starts_with(objc::symbol_names::swift_objc_klass)) 13790fca6ea1SDimitry Andric return SourceLanguage::Swift; 13800fca6ea1SDimitry Andric 13810fca6ea1SDimitry Andric // If the symbol name matches the ObjC prefix, we don't necessarely know this 13820fca6ea1SDimitry Andric // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift 13830fca6ea1SDimitry Andric // classes. Ex: 13840fca6ea1SDimitry Andric // .globl _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass 13850fca6ea1SDimitry Andric // .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass 13860fca6ea1SDimitry Andric // .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN 13870fca6ea1SDimitry Andric // 13880fca6ea1SDimitry Andric // So we scan for symbols with the same address and check for the Swift class 13890fca6ea1SDimitry Andric if (classSym->getName().starts_with(objc::symbol_names::klass)) { 13900fca6ea1SDimitry Andric for (auto &sym : classSym->originalIsec->symbols) 13910fca6ea1SDimitry Andric if (sym->value == classSym->value) 13920fca6ea1SDimitry Andric if (sym->getName().starts_with(objc::symbol_names::swift_objc_klass)) 13930fca6ea1SDimitry Andric return SourceLanguage::Swift; 13940fca6ea1SDimitry Andric return SourceLanguage::ObjC; 13950fca6ea1SDimitry Andric } 13960fca6ea1SDimitry Andric 13970fca6ea1SDimitry Andric llvm_unreachable("Unexpected class symbol name during category merging"); 13980fca6ea1SDimitry Andric } 13990fca6ea1SDimitry Andric void ObjcCategoryMerger::mergeCategoriesIntoBaseClass( 14000fca6ea1SDimitry Andric const Defined *baseClass, std::vector<InfoInputCategory> &categories) { 14010fca6ea1SDimitry Andric assert(categories.size() >= 1 && "Expected at least one category to merge"); 14020fca6ea1SDimitry Andric 14030fca6ea1SDimitry Andric // Collect all the info from the categories 14040fca6ea1SDimitry Andric ClassExtensionInfo extInfo(catLayout); 14050fca6ea1SDimitry Andric extInfo.baseClass = baseClass; 14060fca6ea1SDimitry Andric extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass); 14070fca6ea1SDimitry Andric 14080fca6ea1SDimitry Andric for (auto &catInfo : categories) { 14090fca6ea1SDimitry Andric parseCatInfoToExtInfo(catInfo, extInfo); 14100fca6ea1SDimitry Andric } 14110fca6ea1SDimitry Andric 14120fca6ea1SDimitry Andric // Get metadata for the base class 14130fca6ea1SDimitry Andric Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true); 14140fca6ea1SDimitry Andric ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec()); 14150fca6ea1SDimitry Andric Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false); 14160fca6ea1SDimitry Andric ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec()); 14170fca6ea1SDimitry Andric 14180fca6ea1SDimitry Andric // Now collect the info from the base class from the various lists in the 14190fca6ea1SDimitry Andric // class metadata 14200fca6ea1SDimitry Andric 14210fca6ea1SDimitry Andric // Protocol lists are a special case - the same protocol list is in classRo 14220fca6ea1SDimitry Andric // and metaRo, so we only need to parse it once 14230fca6ea1SDimitry Andric parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset, 14240fca6ea1SDimitry Andric extInfo.protocols, extInfo.baseClassSourceLanguage); 14250fca6ea1SDimitry Andric 14260fca6ea1SDimitry Andric // Check that the classRo and metaRo protocol lists are identical 14270fca6ea1SDimitry Andric assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset, 14280fca6ea1SDimitry Andric extInfo.baseClassSourceLanguage) == 14290fca6ea1SDimitry Andric parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset, 14300fca6ea1SDimitry Andric extInfo.baseClassSourceLanguage) && 14310fca6ea1SDimitry Andric "Category merger expects classRo and metaRo to have the same protocol " 14320fca6ea1SDimitry Andric "list"); 14330fca6ea1SDimitry Andric 14340fca6ea1SDimitry Andric parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset, 14350fca6ea1SDimitry Andric extInfo.classMethods); 14360fca6ea1SDimitry Andric parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset, 14370fca6ea1SDimitry Andric extInfo.instanceMethods); 14380fca6ea1SDimitry Andric 14390fca6ea1SDimitry Andric parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset, 14400fca6ea1SDimitry Andric extInfo.classProps); 14410fca6ea1SDimitry Andric parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset, 14420fca6ea1SDimitry Andric extInfo.instanceProps); 14430fca6ea1SDimitry Andric 14440fca6ea1SDimitry Andric // Erase the old lists - these will be generated and replaced 14450fca6ea1SDimitry Andric eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset); 14460fca6ea1SDimitry Andric eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset); 14470fca6ea1SDimitry Andric eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset); 14480fca6ea1SDimitry Andric eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset); 14490fca6ea1SDimitry Andric eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset); 14500fca6ea1SDimitry Andric eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset); 14510fca6ea1SDimitry Andric 14520fca6ea1SDimitry Andric // Emit the newly merged lists - first into the meta RO then into the class RO 14530fca6ea1SDimitry Andric // First we emit and link the protocol list into the meta RO. Then we link it 14540fca6ea1SDimitry Andric // in the classRo as well (they're supposed to be identical) 14550fca6ea1SDimitry Andric if (Defined *protoListSym = 14560fca6ea1SDimitry Andric emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset, 14570fca6ea1SDimitry Andric extInfo, extInfo.protocols)) { 14580fca6ea1SDimitry Andric createSymbolReference(classRo, protoListSym, 14590fca6ea1SDimitry Andric roClassLayout.baseProtocolsOffset, 14600fca6ea1SDimitry Andric infoCategoryWriter.catBodyInfo.relocTemplate); 14610fca6ea1SDimitry Andric } 14620fca6ea1SDimitry Andric 14630fca6ea1SDimitry Andric emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo, 14640fca6ea1SDimitry Andric extInfo.classMethods); 14650fca6ea1SDimitry Andric emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo, 14660fca6ea1SDimitry Andric extInfo.instanceMethods); 14670fca6ea1SDimitry Andric 14680fca6ea1SDimitry Andric emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo, 14690fca6ea1SDimitry Andric extInfo.classProps); 14700fca6ea1SDimitry Andric 14710fca6ea1SDimitry Andric emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo, 14720fca6ea1SDimitry Andric extInfo.instanceProps); 14730fca6ea1SDimitry Andric 14740fca6ea1SDimitry Andric // Mark all the categories as merged - this will be used to erase them later 14750fca6ea1SDimitry Andric for (auto &catInfo : categories) 14760fca6ea1SDimitry Andric catInfo.wasMerged = true; 14770fca6ea1SDimitry Andric } 14780fca6ea1SDimitry Andric 14790fca6ea1SDimitry Andric // Erase the symbol at a given offset in an InputSection 14800fca6ea1SDimitry Andric void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec, 14810fca6ea1SDimitry Andric uint32_t offset) { 14820fca6ea1SDimitry Andric Defined *sym = tryGetDefinedAtIsecOffset(isec, offset); 14830fca6ea1SDimitry Andric if (!sym) 14840fca6ea1SDimitry Andric return; 14850fca6ea1SDimitry Andric 14860fca6ea1SDimitry Andric // Remove the symbol from isec->symbols 14870fca6ea1SDimitry Andric assert(isa<Defined>(sym) && "Can only erase a Defined"); 14880fca6ea1SDimitry Andric llvm::erase(isec->symbols, sym); 14890fca6ea1SDimitry Andric 14900fca6ea1SDimitry Andric // Remove the relocs that refer to this symbol 14910fca6ea1SDimitry Andric auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; }; 14920fca6ea1SDimitry Andric llvm::erase_if(isec->relocs, removeAtOff); 14930fca6ea1SDimitry Andric 14940fca6ea1SDimitry Andric // Now, if the symbol fully occupies a ConcatInputSection, we can also erase 14950fca6ea1SDimitry Andric // the whole ConcatInputSection 14960fca6ea1SDimitry Andric if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec())) 14970fca6ea1SDimitry Andric if (cisec->data.size() == sym->size) 14980fca6ea1SDimitry Andric eraseISec(cisec); 14990fca6ea1SDimitry Andric } 1500