xref: /llvm-project/lld/MachO/ObjC.cpp (revision 5d2434166787e36312f037538119d3820c5af5e6)
1 //===- ObjC.cpp -----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ObjC.h"
10 #include "ConcatOutputSection.h"
11 #include "InputFiles.h"
12 #include "InputSection.h"
13 #include "Layout.h"
14 #include "OutputSegment.h"
15 #include "SyntheticSections.h"
16 #include "Target.h"
17 
18 #include "lld/Common/ErrorHandler.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/BinaryFormat/MachO.h"
21 #include "llvm/Bitcode/BitcodeReader.h"
22 #include "llvm/Support/TimeProfiler.h"
23 
24 using namespace llvm;
25 using namespace llvm::MachO;
26 using namespace lld;
27 using namespace lld::macho;
28 
29 template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {
30   using SectionHeader = typename LP::section;
31 
32   auto *hdr =
33       reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());
34   if (hdr->magic != LP::magic)
35     return false;
36 
37   if (const auto *c =
38           findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) {
39     auto sectionHeaders = ArrayRef<SectionHeader>{
40         reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
41     for (const SectionHeader &secHead : sectionHeaders) {
42       StringRef sectname(secHead.sectname,
43                          strnlen(secHead.sectname, sizeof(secHead.sectname)));
44       StringRef segname(secHead.segname,
45                         strnlen(secHead.segname, sizeof(secHead.segname)));
46       if ((segname == segment_names::data &&
47            sectname == section_names::objcCatList) ||
48           (segname == segment_names::text &&
49            sectname.starts_with(section_names::swift))) {
50         return true;
51       }
52     }
53   }
54   return false;
55 }
56 
57 static bool objectHasObjCSection(MemoryBufferRef mb) {
58   if (target->wordSize == 8)
59     return ::objectHasObjCSection<LP64>(mb);
60   else
61     return ::objectHasObjCSection<ILP32>(mb);
62 }
63 
64 bool macho::hasObjCSection(MemoryBufferRef mb) {
65   switch (identify_magic(mb.getBuffer())) {
66   case file_magic::macho_object:
67     return objectHasObjCSection(mb);
68   case file_magic::bitcode:
69     return check(isBitcodeContainingObjCCategory(mb));
70   default:
71     return false;
72   }
73 }
74 
75 namespace {
76 
77 #define FOR_EACH_CATEGORY_FIELD(DO)                                            \
78   DO(Ptr, name)                                                                \
79   DO(Ptr, klass)                                                               \
80   DO(Ptr, instanceMethods)                                                     \
81   DO(Ptr, classMethods)                                                        \
82   DO(Ptr, protocols)                                                           \
83   DO(Ptr, instanceProps)                                                       \
84   DO(Ptr, classProps)                                                          \
85   DO(uint32_t, size)
86 
87 CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD);
88 
89 #undef FOR_EACH_CATEGORY_FIELD
90 
91 #define FOR_EACH_CLASS_FIELD(DO)                                               \
92   DO(Ptr, metaClass)                                                           \
93   DO(Ptr, superClass)                                                          \
94   DO(Ptr, methodCache)                                                         \
95   DO(Ptr, vtable)                                                              \
96   DO(Ptr, roData)
97 
98 CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD);
99 
100 #undef FOR_EACH_CLASS_FIELD
101 
102 #define FOR_EACH_RO_CLASS_FIELD(DO)                                            \
103   DO(uint32_t, flags)                                                          \
104   DO(uint32_t, instanceStart)                                                  \
105   DO(Ptr, instanceSize)                                                        \
106   DO(Ptr, ivarLayout)                                                          \
107   DO(Ptr, name)                                                                \
108   DO(Ptr, baseMethods)                                                         \
109   DO(Ptr, baseProtocols)                                                       \
110   DO(Ptr, ivars)                                                               \
111   DO(Ptr, weakIvarLayout)                                                      \
112   DO(Ptr, baseProperties)
113 
114 CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD);
115 
116 #undef FOR_EACH_RO_CLASS_FIELD
117 
118 #define FOR_EACH_LIST_HEADER(DO)                                               \
119   DO(uint32_t, structSize)                                                     \
120   DO(uint32_t, structCount)
121 
122 CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER);
123 
124 #undef FOR_EACH_LIST_HEADER
125 
126 #define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount)
127 
128 CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER);
129 
130 #undef FOR_EACH_PROTOCOL_LIST_HEADER
131 
132 #define FOR_EACH_METHOD(DO)                                                    \
133   DO(Ptr, name)                                                                \
134   DO(Ptr, type)                                                                \
135   DO(Ptr, impl)
136 
137 CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD);
138 
139 #undef FOR_EACH_METHOD
140 
141 enum MethodContainerKind {
142   MCK_Class,
143   MCK_Category,
144 };
145 
146 struct MethodContainer {
147   MethodContainerKind kind;
148   const ConcatInputSection *isec;
149 };
150 
151 enum MethodKind {
152   MK_Instance,
153   MK_Static,
154 };
155 
156 struct ObjcClass {
157   DenseMap<CachedHashStringRef, MethodContainer> instanceMethods;
158   DenseMap<CachedHashStringRef, MethodContainer> classMethods;
159 };
160 
161 } // namespace
162 
163 class ObjcCategoryChecker {
164 public:
165   ObjcCategoryChecker();
166   void parseCategory(const ConcatInputSection *catListIsec);
167 
168 private:
169   void parseClass(const Defined *classSym);
170   void parseMethods(const ConcatInputSection *methodsIsec,
171                     const Symbol *methodContainer,
172                     const ConcatInputSection *containerIsec,
173                     MethodContainerKind, MethodKind);
174 
175   CategoryLayout catLayout;
176   ClassLayout classLayout;
177   ROClassLayout roClassLayout;
178   ListHeaderLayout listHeaderLayout;
179   MethodLayout methodLayout;
180 
181   DenseMap<const Symbol *, ObjcClass> classMap;
182 };
183 
184 ObjcCategoryChecker::ObjcCategoryChecker()
185     : catLayout(target->wordSize), classLayout(target->wordSize),
186       roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
187       methodLayout(target->wordSize) {}
188 
189 void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
190                                        const Symbol *methodContainerSym,
191                                        const ConcatInputSection *containerIsec,
192                                        MethodContainerKind mcKind,
193                                        MethodKind mKind) {
194   ObjcClass &klass = classMap[methodContainerSym];
195   for (const Reloc &r : methodsIsec->relocs) {
196     if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize !=
197         methodLayout.nameOffset)
198       continue;
199 
200     CachedHashStringRef methodName(r.getReferentString());
201     // +load methods are special: all implementations are called by the runtime
202     // even if they are part of the same class. Thus there is no need to check
203     // for duplicates.
204     // NOTE: Instead of specifically checking for this method name, ld64 simply
205     // checks whether a class / category is present in __objc_nlclslist /
206     // __objc_nlcatlist respectively. This will be the case if the class /
207     // category has a +load method. It skips optimizing the categories if there
208     // are multiple +load methods. Since it does dupe checking as part of the
209     // optimization process, this avoids spurious dupe messages around +load,
210     // but it also means that legit dupe issues for other methods are ignored.
211     if (mKind == MK_Static && methodName.val() == "load")
212       continue;
213 
214     auto &methodMap =
215         mKind == MK_Instance ? klass.instanceMethods : klass.classMethods;
216     if (methodMap
217             .try_emplace(methodName, MethodContainer{mcKind, containerIsec})
218             .second)
219       continue;
220 
221     // We have a duplicate; generate a warning message.
222     const auto &mc = methodMap.lookup(methodName);
223     const Reloc *nameReloc = nullptr;
224     if (mc.kind == MCK_Category) {
225       nameReloc = mc.isec->getRelocAt(catLayout.nameOffset);
226     } else {
227       assert(mc.kind == MCK_Class);
228       const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset)
229                          ->getReferentInputSection();
230       nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset);
231     }
232     StringRef containerName = nameReloc->getReferentString();
233     StringRef methPrefix = mKind == MK_Instance ? "-" : "+";
234 
235     // We should only ever encounter collisions when parsing category methods
236     // (since the Class struct is parsed before any of its categories).
237     assert(mcKind == MCK_Category);
238     StringRef newCatName =
239         containerIsec->getRelocAt(catLayout.nameOffset)->getReferentString();
240 
241     auto formatObjAndSrcFileName = [](const InputSection *section) {
242       lld::macho::InputFile *inputFile = section->getFile();
243       std::string result = toString(inputFile);
244 
245       auto objFile = dyn_cast_or_null<ObjFile>(inputFile);
246       if (objFile && objFile->compileUnit)
247         result += " (" + objFile->sourceFile() + ")";
248 
249       return result;
250     };
251 
252     StringRef containerType = mc.kind == MCK_Category ? "category" : "class";
253     warn("method '" + methPrefix + methodName.val() +
254          "' has conflicting definitions:\n>>> defined in category " +
255          newCatName + " from " + formatObjAndSrcFileName(containerIsec) +
256          "\n>>> defined in " + containerType + " " + containerName + " from " +
257          formatObjAndSrcFileName(mc.isec));
258   }
259 }
260 
261 void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) {
262   auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset);
263   if (!classReloc)
264     return;
265 
266   auto *classSym = cast<Symbol *>(classReloc->referent);
267   if (auto *d = dyn_cast<Defined>(classSym))
268     if (!classMap.count(d))
269       parseClass(d);
270 
271   if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) {
272     parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
273                  classSym, catIsec, MCK_Category, MK_Static);
274   }
275 
276   if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) {
277     parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
278                  classSym, catIsec, MCK_Category, MK_Instance);
279   }
280 }
281 
282 void ObjcCategoryChecker::parseClass(const Defined *classSym) {
283   // Given a Class struct, get its corresponding Methods struct
284   auto getMethodsIsec =
285       [&](const InputSection *classIsec) -> ConcatInputSection * {
286     if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) {
287       if (const auto *roIsec =
288               cast_or_null<ConcatInputSection>(r->getReferentInputSection())) {
289         if (const auto *r =
290                 roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) {
291           if (auto *methodsIsec = cast_or_null<ConcatInputSection>(
292                   r->getReferentInputSection()))
293             return methodsIsec;
294         }
295       }
296     }
297     return nullptr;
298   };
299 
300   const auto *classIsec = cast<ConcatInputSection>(classSym->isec());
301 
302   // Parse instance methods.
303   if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
304     parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class,
305                  MK_Instance);
306 
307   // Class methods are contained in the metaclass.
308   if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset))
309     if (const auto *classMethodsIsec = getMethodsIsec(
310             cast<ConcatInputSection>(r->getReferentInputSection())))
311       parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static);
312 }
313 
314 void objc::checkCategories() {
315   TimeTraceScope timeScope("ObjcCategoryChecker");
316 
317   ObjcCategoryChecker checker;
318   for (const InputSection *isec : inputSections) {
319     if (isec->getName() == section_names::objcCatList)
320       for (const Reloc &r : isec->relocs) {
321         auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection());
322         checker.parseCategory(catIsec);
323       }
324   }
325 }
326 
327 namespace {
328 
329 class ObjcCategoryMerger {
330   // In which language was a particular construct originally defined
331   enum SourceLanguage { Unknown, ObjC, Swift };
332 
333   // Information about an input category
334   struct InfoInputCategory {
335     ConcatInputSection *catListIsec;
336     ConcatInputSection *catBodyIsec;
337     uint32_t offCatListIsec = 0;
338     SourceLanguage sourceLanguage = SourceLanguage::Unknown;
339 
340     bool wasMerged = false;
341   };
342 
343   // To write new (merged) categories or classes, we will try make limited
344   // assumptions about the alignment and the sections the various class/category
345   // info are stored in and . So we'll just reuse the same sections and
346   // alignment as already used in existing (input) categories. To do this we
347   // have InfoCategoryWriter which contains the various sections that the
348   // generated categories will be written to.
349   struct InfoWriteSection {
350     bool valid = false; // Data has been successfully collected from input
351     uint32_t align = 0;
352     Section *inputSection;
353     Reloc relocTemplate;
354     OutputSection *outputSection;
355   };
356 
357   struct InfoCategoryWriter {
358     InfoWriteSection catListInfo;
359     InfoWriteSection catBodyInfo;
360     InfoWriteSection catNameInfo;
361     InfoWriteSection catPtrListInfo;
362   };
363 
364   // Information about a pointer list in the original categories or class(method
365   // lists, protocol lists, etc)
366   struct PointerListInfo {
367     PointerListInfo() = default;
368     PointerListInfo(const PointerListInfo &) = default;
369     PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct)
370         : categoryPrefix(_categoryPrefix),
371           pointersPerStruct(_pointersPerStruct) {}
372 
373     inline bool operator==(const PointerListInfo &cmp) const {
374       return pointersPerStruct == cmp.pointersPerStruct &&
375              structSize == cmp.structSize && structCount == cmp.structCount &&
376              allPtrs == cmp.allPtrs;
377     }
378 
379     const char *categoryPrefix;
380 
381     uint32_t pointersPerStruct = 0;
382 
383     uint32_t structSize = 0;
384     uint32_t structCount = 0;
385 
386     std::vector<Symbol *> allPtrs;
387   };
388 
389   // Full information describing an ObjC class . This will include all the
390   // additional methods, protocols, and properties that are contained in the
391   // class and all the categories that extend a particular class.
392   struct ClassExtensionInfo {
393     ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};
394 
395     // Merged names of containers. Ex: base|firstCategory|secondCategory|...
396     std::string mergedContainerName;
397     std::string baseClassName;
398     const Symbol *baseClass = nullptr;
399     SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown;
400 
401     CategoryLayout &catLayout;
402 
403     // In case we generate new data, mark the new data as belonging to this file
404     ObjFile *objFileForMergeData = nullptr;
405 
406     PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods,
407                                        /*pointersPerStruct=*/3};
408     PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods,
409                                     /*pointersPerStruct=*/3};
410     PointerListInfo protocols = {objc::symbol_names::categoryProtocols,
411                                  /*pointersPerStruct=*/0};
412     PointerListInfo instanceProps = {objc::symbol_names::listProprieties,
413                                      /*pointersPerStruct=*/2};
414     PointerListInfo classProps = {objc::symbol_names::klassPropList,
415                                   /*pointersPerStruct=*/2};
416   };
417 
418 public:
419   ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
420   void doMerge();
421   static void doCleanup();
422 
423 private:
424   DenseSet<const Symbol *> collectNlCategories();
425   void collectAndValidateCategoriesData();
426   bool
427   mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
428 
429   void eraseISec(ConcatInputSection *isec);
430   void eraseMergedCategories();
431 
432   void generateCatListForNonErasedCategories(
433       MapVector<ConcatInputSection *, std::set<uint64_t>>
434           catListToErasedOffsets);
435   void collectSectionWriteInfoFromIsec(const InputSection *isec,
436                                        InfoWriteSection &catWriteInfo);
437   bool collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);
438   bool parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
439                              ClassExtensionInfo &extInfo);
440 
441   void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
442                              PointerListInfo &ptrList,
443                              SourceLanguage sourceLang);
444 
445   PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
446                                         uint32_t secOffset,
447                                         SourceLanguage sourceLang);
448 
449   bool parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
450                             PointerListInfo &ptrList);
451 
452   void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
453                               const ClassExtensionInfo &extInfo,
454                               const PointerListInfo &ptrList);
455 
456   Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
457                                    const ClassExtensionInfo &extInfo,
458                                    const PointerListInfo &ptrList);
459 
460   Defined *emitCategory(const ClassExtensionInfo &extInfo);
461   Defined *emitCatListEntrySec(const std::string &forCategoryName,
462                                const std::string &forBaseClassName,
463                                ObjFile *objFile);
464   Defined *emitCategoryBody(const std::string &name, const Defined *nameSym,
465                             const Symbol *baseClassSym,
466                             const std::string &baseClassName, ObjFile *objFile);
467   Defined *emitCategoryName(const std::string &name, ObjFile *objFile);
468   void createSymbolReference(Defined *refFrom, const Symbol *refTo,
469                              uint32_t offset, const Reloc &relocTemplate);
470   Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset);
471   Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
472                                    uint32_t offset);
473   Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
474                                      uint32_t offset);
475   Defined *getClassRo(const Defined *classSym, bool getMetaRo);
476   SourceLanguage getClassSymSourceLang(const Defined *classSym);
477   bool mergeCategoriesIntoBaseClass(const Defined *baseClass,
478                                     std::vector<InfoInputCategory> &categories);
479   void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
480   void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
481                                    uint32_t offset);
482 
483   // Allocate a null-terminated StringRef backed by generatedSectionData
484   StringRef newStringData(const char *str);
485   // Allocate section data, backed by generatedSectionData
486   SmallVector<uint8_t> &newSectionData(uint32_t size);
487 
488   CategoryLayout catLayout;
489   ClassLayout classLayout;
490   ROClassLayout roClassLayout;
491   ListHeaderLayout listHeaderLayout;
492   MethodLayout methodLayout;
493   ProtocolListHeaderLayout protocolListHeaderLayout;
494 
495   InfoCategoryWriter infoCategoryWriter;
496   std::vector<ConcatInputSection *> &allInputSections;
497   // Map of base class Symbol to list of InfoInputCategory's for it
498   MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
499 
500   // Normally, the binary data comes from the input files, but since we're
501   // generating binary data ourselves, we use the below array to store it in.
502   // Need this to be 'static' so the data survives past the ObjcCategoryMerger
503   // object, as the data will be read by the Writer when the final binary is
504   // generated.
505   static SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
506       generatedSectionData;
507 };
508 
509 SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
510     ObjcCategoryMerger::generatedSectionData;
511 
512 ObjcCategoryMerger::ObjcCategoryMerger(
513     std::vector<ConcatInputSection *> &_allInputSections)
514     : catLayout(target->wordSize), classLayout(target->wordSize),
515       roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
516       methodLayout(target->wordSize),
517       protocolListHeaderLayout(target->wordSize),
518       allInputSections(_allInputSections) {}
519 
520 void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
521     const InputSection *isec, InfoWriteSection &catWriteInfo) {
522 
523   catWriteInfo.inputSection = const_cast<Section *>(&isec->section);
524   catWriteInfo.align = isec->align;
525   catWriteInfo.outputSection = isec->parent;
526 
527   assert(catWriteInfo.outputSection &&
528          "outputSection may not be null in collectSectionWriteInfoFromIsec.");
529 
530   if (isec->relocs.size())
531     catWriteInfo.relocTemplate = isec->relocs[0];
532 
533   catWriteInfo.valid = true;
534 }
535 
536 Symbol *
537 ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
538                                              uint32_t offset) {
539   if (!isec)
540     return nullptr;
541   const Reloc *reloc = isec->getRelocAt(offset);
542 
543   if (!reloc)
544     return nullptr;
545 
546   Symbol *sym = dyn_cast_if_present<Symbol *>(reloc->referent);
547 
548   if (reloc->addend && sym) {
549     assert(isa<Defined>(sym) && "Expected defined for non-zero addend");
550     Defined *definedSym = cast<Defined>(sym);
551     sym = tryFindDefinedOnIsec(definedSym->isec(),
552                                definedSym->value + reloc->addend);
553   }
554 
555   return sym;
556 }
557 
558 Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec,
559                                                   uint32_t offset) {
560   for (Defined *sym : isec->symbols)
561     if ((sym->value <= offset) && (sym->value + sym->size > offset))
562       return sym;
563 
564   return nullptr;
565 }
566 
567 Defined *
568 ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
569                                               uint32_t offset) {
570   Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset);
571   return dyn_cast_or_null<Defined>(sym);
572 }
573 
574 // Get the class's ro_data symbol. If getMetaRo is true, then we will return
575 // the meta-class's ro_data symbol. Otherwise, we will return the class
576 // (instance) ro_data symbol.
577 Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,
578                                         bool getMetaRo) {
579   ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec());
580   if (!isec)
581     return nullptr;
582 
583   if (!getMetaRo)
584     return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset +
585                                                classSym->value);
586 
587   Defined *metaClass = tryGetDefinedAtIsecOffset(
588       isec, classLayout.metaClassOffset + classSym->value);
589   if (!metaClass)
590     return nullptr;
591 
592   return tryGetDefinedAtIsecOffset(
593       dyn_cast<ConcatInputSection>(metaClass->isec()),
594       classLayout.roDataOffset);
595 }
596 
597 // Given an ConcatInputSection or CStringInputSection and an offset, if there is
598 // a symbol(Defined) at that offset, then erase the symbol (mark it not live)
599 void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
600     const ConcatInputSection *isec, uint32_t offset) {
601   const Reloc *reloc = isec->getRelocAt(offset);
602 
603   if (!reloc)
604     return;
605 
606   Defined *sym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent));
607   if (!sym)
608     return;
609 
610   if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
611     eraseISec(cisec);
612   else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
613     uint32_t totalOffset = sym->value + reloc->addend;
614     StringPiece &piece = csisec->getStringPiece(totalOffset);
615     piece.live = false;
616   } else {
617     llvm_unreachable("erased symbol has to be Defined or CStringInputSection");
618   }
619 }
620 
621 bool ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
622     const InfoInputCategory &catInfo) {
623 
624   if (!infoCategoryWriter.catListInfo.valid)
625     collectSectionWriteInfoFromIsec(catInfo.catListIsec,
626                                     infoCategoryWriter.catListInfo);
627   if (!infoCategoryWriter.catBodyInfo.valid)
628     collectSectionWriteInfoFromIsec(catInfo.catBodyIsec,
629                                     infoCategoryWriter.catBodyInfo);
630 
631   if (!infoCategoryWriter.catNameInfo.valid) {
632     lld::macho::Defined *catNameSym =
633         tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
634 
635     if (!catNameSym) {
636       // This is an unhandeled case where the category name is not a symbol but
637       // instead points to an CStringInputSection (that doesn't have any symbol)
638       // TODO: Find a small repro and either fix or add a test case for this
639       // scenario
640       return false;
641     }
642 
643     collectSectionWriteInfoFromIsec(catNameSym->isec(),
644                                     infoCategoryWriter.catNameInfo);
645   }
646 
647   // Collect writer info from all the category lists (we're assuming they all
648   // would provide the same info)
649   if (!infoCategoryWriter.catPtrListInfo.valid) {
650     for (uint32_t off = catLayout.instanceMethodsOffset;
651          off <= catLayout.classPropsOffset; off += target->wordSize) {
652       if (Defined *ptrList =
653               tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {
654         collectSectionWriteInfoFromIsec(ptrList->isec(),
655                                         infoCategoryWriter.catPtrListInfo);
656         // we've successfully collected data, so we can break
657         break;
658       }
659     }
660   }
661 
662   return true;
663 }
664 
665 // Parse a protocol list that might be linked to ConcatInputSection at a given
666 // offset. The format of the protocol list is different than other lists (prop
667 // lists, method lists) so we need to parse it differently
668 void ObjcCategoryMerger::parseProtocolListInfo(
669     const ConcatInputSection *isec, uint32_t secOffset,
670     PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
671   assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
672          "Tried to read pointer list beyond protocol section end");
673 
674   const Reloc *reloc = isec->getRelocAt(secOffset);
675   if (!reloc)
676     return;
677 
678   auto *ptrListSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent));
679   assert(ptrListSym && "Protocol list reloc does not have a valid Defined");
680 
681   // Theoretically protocol count can be either 32b or 64b, depending on
682   // platform pointer size, but to simplify implementation we always just read
683   // the lower 32b which should be good enough.
684   uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
685       ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
686 
687   ptrList.structCount += protocolCount;
688   ptrList.structSize = target->wordSize;
689 
690   [[maybe_unused]] uint32_t expectedListSize =
691       (protocolCount * target->wordSize) +
692       /*header(count)*/ protocolListHeaderLayout.totalSize +
693       /*extra null value*/ target->wordSize;
694 
695   // On Swift, the protocol list does not have the extra (unnecessary) null
696   [[maybe_unused]] uint32_t expectedListSizeSwift =
697       expectedListSize - target->wordSize;
698 
699   assert(((expectedListSize == ptrListSym->isec()->data.size() &&
700            sourceLang == SourceLanguage::ObjC) ||
701           (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
702            sourceLang == SourceLanguage::Swift)) &&
703          "Protocol list does not match expected size");
704 
705   uint32_t off = protocolListHeaderLayout.totalSize;
706   for (uint32_t inx = 0; inx < protocolCount; ++inx) {
707     const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
708     assert(reloc && "No reloc found at protocol list offset");
709 
710     auto *listSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent));
711     assert(listSym && "Protocol list reloc does not have a valid Defined");
712 
713     ptrList.allPtrs.push_back(listSym);
714     off += target->wordSize;
715   }
716   assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
717          "expected null terminating protocol");
718   assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
719          "Protocol list end offset does not match expected size");
720 }
721 
722 // Parse a protocol list and return the PointerListInfo for it
723 ObjcCategoryMerger::PointerListInfo
724 ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
725                                           uint32_t secOffset,
726                                           SourceLanguage sourceLang) {
727   PointerListInfo ptrList;
728   parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
729   return ptrList;
730 }
731 
732 // Parse a pointer list that might be linked to ConcatInputSection at a given
733 // offset. This can be used for instance methods, class methods, instance props
734 // and class props since they have the same format.
735 bool ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
736                                               uint32_t secOffset,
737                                               PointerListInfo &ptrList) {
738   assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3);
739   assert(isec && "Trying to parse pointer list from null isec");
740   assert(secOffset + target->wordSize <= isec->data.size() &&
741          "Trying to read pointer list beyond section end");
742 
743   const Reloc *reloc = isec->getRelocAt(secOffset);
744   // Empty list is a valid case, return true.
745   if (!reloc)
746     return true;
747 
748   auto *ptrListSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent));
749   assert(ptrListSym && "Reloc does not have a valid Defined");
750 
751   uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
752       ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
753   uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
754       ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
755   assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
756 
757   assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
758 
759   ptrList.structCount += thisStructCount;
760   ptrList.structSize = thisStructSize;
761 
762   uint32_t expectedListSize =
763       listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
764   assert(expectedListSize == ptrListSym->isec()->data.size() &&
765          "Pointer list does not match expected size");
766 
767   for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
768        off += target->wordSize) {
769     const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
770     assert(reloc && "No reloc found at pointer list offset");
771 
772     auto *listSym =
773         dyn_cast_or_null<Defined>(reloc->referent.dyn_cast<Symbol *>());
774     // Sometimes, the reloc points to a StringPiece (InputSection + addend)
775     // instead of a symbol.
776     // TODO: Skip these cases for now, but we should fix this.
777     if (!listSym)
778       return false;
779 
780     ptrList.allPtrs.push_back(listSym);
781   }
782 
783   return true;
784 }
785 
786 // Here we parse all the information of an input category (catInfo) and
787 // append the parsed info into the structure which will contain all the
788 // information about how a class is extended (extInfo)
789 bool ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
790                                                ClassExtensionInfo &extInfo) {
791   const Reloc *catNameReloc =
792       catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset);
793 
794   // Parse name
795   assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
796 
797   // is this the first category we are parsing?
798   if (extInfo.mergedContainerName.empty())
799     extInfo.objFileForMergeData =
800         dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
801   else
802     extInfo.mergedContainerName += "|";
803 
804   assert(extInfo.objFileForMergeData &&
805          "Expected to already have valid objextInfo.objFileForMergeData");
806 
807   StringRef catName = catNameReloc->getReferentString();
808   extInfo.mergedContainerName += catName.str();
809 
810   // Parse base class
811   if (!extInfo.baseClass) {
812     Symbol *classSym =
813         tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset);
814     assert(extInfo.baseClassName.empty());
815     extInfo.baseClass = classSym;
816     llvm::StringRef classPrefix(objc::symbol_names::klass);
817     assert(classSym->getName().starts_with(classPrefix) &&
818            "Base class symbol does not start with expected prefix");
819     extInfo.baseClassName = classSym->getName().substr(classPrefix.size());
820   } else {
821     assert((extInfo.baseClass ==
822             tryGetSymbolAtIsecOffset(catInfo.catBodyIsec,
823                                      catLayout.klassOffset)) &&
824            "Trying to parse category info into container with different base "
825            "class");
826   }
827 
828   if (!parsePointerListInfo(catInfo.catBodyIsec,
829                             catLayout.instanceMethodsOffset,
830                             extInfo.instanceMethods))
831     return false;
832 
833   if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset,
834                             extInfo.classMethods))
835     return false;
836 
837   parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
838                         extInfo.protocols, catInfo.sourceLanguage);
839 
840   if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
841                             extInfo.instanceProps))
842     return false;
843 
844   if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset,
845                             extInfo.classProps))
846     return false;
847 
848   return true;
849 }
850 
851 // Generate a protocol list (including header) and link it into the parent at
852 // the specified offset.
853 Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
854     Defined *parentSym, uint32_t linkAtOffset,
855     const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
856   if (ptrList.allPtrs.empty())
857     return nullptr;
858 
859   assert(ptrList.allPtrs.size() == ptrList.structCount);
860 
861   uint32_t bodySize = (ptrList.structCount * target->wordSize) +
862                       /*header(count)*/ protocolListHeaderLayout.totalSize +
863                       /*extra null value*/ target->wordSize;
864   llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
865 
866   // This theoretically can be either 32b or 64b, but writing just the first 32b
867   // is good enough
868   const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>(
869       bodyData.data() + protocolListHeaderLayout.protocolCountOffset);
870 
871   *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size();
872 
873   ConcatInputSection *listSec = make<ConcatInputSection>(
874       *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
875       infoCategoryWriter.catPtrListInfo.align);
876   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
877   listSec->live = true;
878 
879   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
880 
881   std::string symName = ptrList.categoryPrefix;
882   symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
883 
884   Defined *ptrListSym = make<Defined>(
885       newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
886       listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
887       /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
888       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
889       /*isWeakDefCanBeHidden=*/false);
890 
891   ptrListSym->used = true;
892   parentSym->getObjectFile()->symbols.push_back(ptrListSym);
893   addInputSection(listSec);
894 
895   createSymbolReference(parentSym, ptrListSym, linkAtOffset,
896                         infoCategoryWriter.catBodyInfo.relocTemplate);
897 
898   uint32_t offset = protocolListHeaderLayout.totalSize;
899   for (Symbol *symbol : ptrList.allPtrs) {
900     createSymbolReference(ptrListSym, symbol, offset,
901                           infoCategoryWriter.catPtrListInfo.relocTemplate);
902     offset += target->wordSize;
903   }
904 
905   return ptrListSym;
906 }
907 
908 // Generate a pointer list (including header) and link it into the parent at the
909 // specified offset. This is used for instance and class methods and
910 // proprieties.
911 void ObjcCategoryMerger::emitAndLinkPointerList(
912     Defined *parentSym, uint32_t linkAtOffset,
913     const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
914   if (ptrList.allPtrs.empty())
915     return;
916 
917   assert(ptrList.allPtrs.size() * target->wordSize ==
918          ptrList.structCount * ptrList.structSize);
919 
920   // Generate body
921   uint32_t bodySize =
922       listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount);
923   llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
924 
925   const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>(
926       bodyData.data() + listHeaderLayout.structSizeOffset);
927   const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>(
928       bodyData.data() + listHeaderLayout.structCountOffset);
929 
930   *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize;
931   *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount;
932 
933   ConcatInputSection *listSec = make<ConcatInputSection>(
934       *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
935       infoCategoryWriter.catPtrListInfo.align);
936   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
937   listSec->live = true;
938 
939   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
940 
941   std::string symName = ptrList.categoryPrefix;
942   symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
943 
944   Defined *ptrListSym = make<Defined>(
945       newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
946       listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
947       /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
948       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
949       /*isWeakDefCanBeHidden=*/false);
950 
951   ptrListSym->used = true;
952   parentSym->getObjectFile()->symbols.push_back(ptrListSym);
953   addInputSection(listSec);
954 
955   createSymbolReference(parentSym, ptrListSym, linkAtOffset,
956                         infoCategoryWriter.catBodyInfo.relocTemplate);
957 
958   uint32_t offset = listHeaderLayout.totalSize;
959   for (Symbol *symbol : ptrList.allPtrs) {
960     createSymbolReference(ptrListSym, symbol, offset,
961                           infoCategoryWriter.catPtrListInfo.relocTemplate);
962     offset += target->wordSize;
963   }
964 }
965 
966 // This method creates an __objc_catlist ConcatInputSection with a single slot
967 Defined *
968 ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
969                                         const std::string &forBaseClassName,
970                                         ObjFile *objFile) {
971   uint32_t sectionSize = target->wordSize;
972   llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize);
973 
974   ConcatInputSection *newCatList =
975       make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection,
976                                bodyData, infoCategoryWriter.catListInfo.align);
977   newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
978   newCatList->live = true;
979 
980   newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
981 
982   std::string catSymName = "<__objc_catlist slot for merged category ";
983   catSymName += forBaseClassName + "(" + forCategoryName + ")>";
984 
985   Defined *catListSym = make<Defined>(
986       newStringData(catSymName.c_str()), /*file=*/objFile, newCatList,
987       /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
988       /*isPrivateExtern=*/false, /*includeInSymtab=*/false,
989       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
990       /*isWeakDefCanBeHidden=*/false);
991 
992   catListSym->used = true;
993   objFile->symbols.push_back(catListSym);
994   addInputSection(newCatList);
995   return catListSym;
996 }
997 
998 // Here we generate the main category body and link the name and base class into
999 // it. We don't link any other info yet like the protocol and class/instance
1000 // methods/props.
1001 Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
1002                                               const Defined *nameSym,
1003                                               const Symbol *baseClassSym,
1004                                               const std::string &baseClassName,
1005                                               ObjFile *objFile) {
1006   llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize);
1007 
1008   uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) +
1009                                    catLayout.sizeOffset);
1010   *ptrSize = catLayout.totalSize;
1011 
1012   ConcatInputSection *newBodySec =
1013       make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection,
1014                                bodyData, infoCategoryWriter.catBodyInfo.align);
1015   newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
1016   newBodySec->live = true;
1017 
1018   std::string symName =
1019       objc::symbol_names::category + baseClassName + "(" + name + ")";
1020   Defined *catBodySym = make<Defined>(
1021       newStringData(symName.c_str()), /*file=*/objFile, newBodySec,
1022       /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
1023       /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
1024       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
1025       /*isWeakDefCanBeHidden=*/false);
1026 
1027   catBodySym->used = true;
1028   objFile->symbols.push_back(catBodySym);
1029   addInputSection(newBodySec);
1030 
1031   createSymbolReference(catBodySym, nameSym, catLayout.nameOffset,
1032                         infoCategoryWriter.catBodyInfo.relocTemplate);
1033 
1034   // Create a reloc to the base class (either external or internal)
1035   createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset,
1036                         infoCategoryWriter.catBodyInfo.relocTemplate);
1037 
1038   return catBodySym;
1039 }
1040 
1041 // This writes the new category name (for the merged category) into the binary
1042 // and returns the sybmol for it.
1043 Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
1044                                               ObjFile *objFile) {
1045   StringRef nameStrData = newStringData(name.c_str());
1046   // We use +1 below to include the null terminator
1047   llvm::ArrayRef<uint8_t> nameData(
1048       reinterpret_cast<const uint8_t *>(nameStrData.data()),
1049       nameStrData.size() + 1);
1050 
1051   auto *parentSection = infoCategoryWriter.catNameInfo.inputSection;
1052   CStringInputSection *newStringSec = make<CStringInputSection>(
1053       *infoCategoryWriter.catNameInfo.inputSection, nameData,
1054       infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true);
1055 
1056   parentSection->subsections.push_back({0, newStringSec});
1057 
1058   newStringSec->splitIntoPieces();
1059   newStringSec->pieces[0].live = true;
1060   newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
1061   in.cStringSection->addInput(newStringSec);
1062   assert(newStringSec->pieces.size() == 1);
1063 
1064   Defined *catNameSym = make<Defined>(
1065       "<merged category name>", /*file=*/objFile, newStringSec,
1066       /*value=*/0, nameData.size(),
1067       /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1068       /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1069       /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1070 
1071   catNameSym->used = true;
1072   objFile->symbols.push_back(catNameSym);
1073   return catNameSym;
1074 }
1075 
1076 // This method fully creates a new category from the given ClassExtensionInfo.
1077 // It creates the category name, body and method/protocol/prop lists and links
1078 // them all together. Then it creates a new __objc_catlist entry and adds the
1079 // category to it. Calling this method will fully generate a category which will
1080 // be available in the final binary.
1081 Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) {
1082   Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName,
1083                                          extInfo.objFileForMergeData);
1084 
1085   Defined *catBodySym = emitCategoryBody(
1086       extInfo.mergedContainerName, catNameSym, extInfo.baseClass,
1087       extInfo.baseClassName, extInfo.objFileForMergeData);
1088 
1089   Defined *catListSym =
1090       emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName,
1091                           extInfo.objFileForMergeData);
1092 
1093   // Add the single category body to the category list at the offset 0.
1094   createSymbolReference(catListSym, catBodySym, /*offset=*/0,
1095                         infoCategoryWriter.catListInfo.relocTemplate);
1096 
1097   emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo,
1098                          extInfo.instanceMethods);
1099 
1100   emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo,
1101                          extInfo.classMethods);
1102 
1103   emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo,
1104                           extInfo.protocols);
1105 
1106   emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo,
1107                          extInfo.instanceProps);
1108 
1109   emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo,
1110                          extInfo.classProps);
1111 
1112   return catBodySym;
1113 }
1114 
1115 // This method merges all the categories (sharing a base class) into a single
1116 // category.
1117 bool ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
1118     std::vector<InfoInputCategory> &categories) {
1119   assert(categories.size() > 1 && "Expected at least 2 categories");
1120 
1121   ClassExtensionInfo extInfo(catLayout);
1122 
1123   for (auto &catInfo : categories)
1124     if (!parseCatInfoToExtInfo(catInfo, extInfo))
1125       return false;
1126 
1127   Defined *newCatDef = emitCategory(extInfo);
1128   assert(newCatDef && "Failed to create a new category");
1129 
1130   // Suppress unsuded var warning
1131   (void)newCatDef;
1132 
1133   for (auto &catInfo : categories)
1134     catInfo.wasMerged = true;
1135 
1136   return true;
1137 }
1138 
1139 void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
1140                                                const Symbol *refTo,
1141                                                uint32_t offset,
1142                                                const Reloc &relocTemplate) {
1143   Reloc r = relocTemplate;
1144   r.offset = offset;
1145   r.addend = 0;
1146   r.referent = const_cast<Symbol *>(refTo);
1147   refFrom->isec()->relocs.push_back(r);
1148 }
1149 
1150 // Get the list of categories in the '__objc_nlcatlist' section. We can't
1151 // optimize these as they have a '+load' method that has to be called at
1152 // runtime.
1153 DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() {
1154   DenseSet<const Symbol *> nlCategories;
1155 
1156   for (InputSection *sec : allInputSections) {
1157     if (sec->getName() != section_names::objcNonLazyCatList)
1158       continue;
1159 
1160     for (auto &r : sec->relocs) {
1161       const Symbol *sym = r.referent.dyn_cast<Symbol *>();
1162       nlCategories.insert(sym);
1163     }
1164   }
1165   return nlCategories;
1166 }
1167 
1168 void ObjcCategoryMerger::collectAndValidateCategoriesData() {
1169   auto nlCategories = collectNlCategories();
1170 
1171   for (InputSection *sec : allInputSections) {
1172     if (sec->getName() != section_names::objcCatList)
1173       continue;
1174     ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec);
1175     assert(catListCisec &&
1176            "__objc_catList InputSection is not a ConcatInputSection");
1177 
1178     for (uint32_t off = 0; off < catListCisec->getSize();
1179          off += target->wordSize) {
1180       Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off);
1181       assert(categorySym &&
1182              "Failed to get a valid category at __objc_catlit offset");
1183 
1184       if (nlCategories.count(categorySym))
1185         continue;
1186 
1187       auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
1188       assert(catBodyIsec &&
1189              "Category data section is not an ConcatInputSection");
1190 
1191       SourceLanguage eLang = SourceLanguage::Unknown;
1192       if (categorySym->getName().starts_with(objc::symbol_names::category))
1193         eLang = SourceLanguage::ObjC;
1194       else if (categorySym->getName().starts_with(
1195                    objc::symbol_names::swift_objc_category))
1196         eLang = SourceLanguage::Swift;
1197       else
1198         llvm_unreachable("Unexpected category symbol name");
1199 
1200       InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang};
1201 
1202       // Check that the category has a reloc at 'klassOffset' (which is
1203       // a pointer to the class symbol)
1204 
1205       Symbol *classSym =
1206           tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
1207       assert(classSym && "Category does not have a valid base class");
1208 
1209       if (!collectCategoryWriterInfoFromCategory(catInputInfo))
1210         continue;
1211 
1212       categoryMap[classSym].push_back(catInputInfo);
1213     }
1214   }
1215 }
1216 
1217 // In the input we have multiple __objc_catlist InputSection, each of which may
1218 // contain links to multiple categories. Of these categories, we will merge (and
1219 // erase) only some. There will be some categories that will remain untouched
1220 // (not erased). For these not erased categories, we generate new __objc_catlist
1221 // entries since the parent __objc_catlist entry will be erased
1222 void ObjcCategoryMerger::generateCatListForNonErasedCategories(
1223     const MapVector<ConcatInputSection *, std::set<uint64_t>>
1224         catListToErasedOffsets) {
1225 
1226   // Go through all offsets of all __objc_catlist's that we process and if there
1227   // are categories that we didn't process - generate a new __objc_catlist for
1228   // each.
1229   for (auto &mapEntry : catListToErasedOffsets) {
1230     ConcatInputSection *catListIsec = mapEntry.first;
1231     for (uint32_t catListIsecOffset = 0;
1232          catListIsecOffset < catListIsec->data.size();
1233          catListIsecOffset += target->wordSize) {
1234       // This slot was erased, we can just skip it
1235       if (mapEntry.second.count(catListIsecOffset))
1236         continue;
1237 
1238       Defined *nonErasedCatBody =
1239           tryGetDefinedAtIsecOffset(catListIsec, catListIsecOffset);
1240       assert(nonErasedCatBody && "Failed to relocate non-deleted category");
1241 
1242       // Allocate data for the new __objc_catlist slot
1243       llvm::ArrayRef<uint8_t> bodyData = newSectionData(target->wordSize);
1244 
1245       // We mark the __objc_catlist slot as belonging to the same file as the
1246       // category
1247       ObjFile *objFile = dyn_cast<ObjFile>(nonErasedCatBody->getFile());
1248 
1249       ConcatInputSection *listSec = make<ConcatInputSection>(
1250           *infoCategoryWriter.catListInfo.inputSection, bodyData,
1251           infoCategoryWriter.catListInfo.align);
1252       listSec->parent = infoCategoryWriter.catListInfo.outputSection;
1253       listSec->live = true;
1254 
1255       std::string slotSymName = "<__objc_catlist slot for category ";
1256       slotSymName += nonErasedCatBody->getName();
1257       slotSymName += ">";
1258 
1259       Defined *catListSlotSym = make<Defined>(
1260           newStringData(slotSymName.c_str()), /*file=*/objFile, listSec,
1261           /*value=*/0, bodyData.size(),
1262           /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1263           /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1264           /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1265 
1266       catListSlotSym->used = true;
1267       objFile->symbols.push_back(catListSlotSym);
1268       addInputSection(listSec);
1269 
1270       // Now link the category body into the newly created slot
1271       createSymbolReference(catListSlotSym, nonErasedCatBody, 0,
1272                             infoCategoryWriter.catListInfo.relocTemplate);
1273     }
1274   }
1275 }
1276 
1277 void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
1278   isec->live = false;
1279   for (auto &sym : isec->symbols)
1280     sym->used = false;
1281 }
1282 
1283 // This fully erases the merged categories, including their body, their names,
1284 // their method/protocol/prop lists and the __objc_catlist entries that link to
1285 // them.
1286 void ObjcCategoryMerger::eraseMergedCategories() {
1287   // Map of InputSection to a set of offsets of the categories that were merged
1288   MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
1289 
1290   for (auto &mapEntry : categoryMap) {
1291     for (InfoInputCategory &catInfo : mapEntry.second) {
1292       if (catInfo.wasMerged) {
1293         eraseISec(catInfo.catListIsec);
1294         catListToErasedOffsets[catInfo.catListIsec].insert(
1295             catInfo.offCatListIsec);
1296       }
1297     }
1298   }
1299 
1300   // If there were categories that we did not erase, we need to generate a new
1301   // __objc_catList that contains only the un-merged categories, and get rid of
1302   // the references to the ones we merged.
1303   generateCatListForNonErasedCategories(catListToErasedOffsets);
1304 
1305   // Erase the old method lists & names of the categories that were merged
1306   for (auto &mapEntry : categoryMap) {
1307     for (InfoInputCategory &catInfo : mapEntry.second) {
1308       if (!catInfo.wasMerged)
1309         continue;
1310 
1311       eraseISec(catInfo.catBodyIsec);
1312 
1313       // We can't erase 'catLayout.nameOffset' for either Swift or ObjC
1314       //   categories because the name will sometimes also be used for other
1315       //   purposes.
1316       // For Swift, see usages of 'l_.str.11.SimpleClass' in
1317       //   objc-category-merging-swift.s
1318       // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in
1319       //   objc-category-merging-erase-objc-name-test.s
1320       // TODO: handle the above in a smarter way
1321 
1322       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1323                                   catLayout.instanceMethodsOffset);
1324       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1325                                   catLayout.classMethodsOffset);
1326       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1327                                   catLayout.protocolsOffset);
1328       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1329                                   catLayout.classPropsOffset);
1330       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1331                                   catLayout.instancePropsOffset);
1332     }
1333   }
1334 }
1335 
1336 void ObjcCategoryMerger::doMerge() {
1337   collectAndValidateCategoriesData();
1338 
1339   for (auto &[baseClass, catInfos] : categoryMap) {
1340     bool merged = false;
1341     if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) {
1342       // Merge all categories into the base class
1343       merged = mergeCategoriesIntoBaseClass(baseClassDef, catInfos);
1344     } else if (catInfos.size() > 1) {
1345       // Merge all categories into a new, single category
1346       merged = mergeCategoriesIntoSingleCategory(catInfos);
1347     }
1348     if (!merged)
1349       warn("ObjC category merging skipped for class symbol' " +
1350            baseClass->getName().str() + "'\n");
1351   }
1352 
1353   // Erase all categories that were merged
1354   eraseMergedCategories();
1355 }
1356 
1357 void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }
1358 
1359 StringRef ObjcCategoryMerger::newStringData(const char *str) {
1360   uint32_t len = strlen(str);
1361   uint32_t bufSize = len + 1;
1362   SmallVector<uint8_t> &data = newSectionData(bufSize);
1363   char *strData = reinterpret_cast<char *>(data.data());
1364   // Copy the string chars and null-terminator
1365   memcpy(strData, str, bufSize);
1366   return StringRef(strData, len);
1367 }
1368 
1369 SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) {
1370   generatedSectionData.push_back(
1371       std::make_unique<SmallVector<uint8_t>>(size, 0));
1372   return *generatedSectionData.back();
1373 }
1374 
1375 } // namespace
1376 
1377 void objc::mergeCategories() {
1378   TimeTraceScope timeScope("ObjcCategoryMerger");
1379 
1380   ObjcCategoryMerger merger(inputSections);
1381   merger.doMerge();
1382 }
1383 
1384 void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }
1385 
1386 ObjcCategoryMerger::SourceLanguage
1387 ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) {
1388   if (classSym->getName().starts_with(objc::symbol_names::swift_objc_klass))
1389     return SourceLanguage::Swift;
1390 
1391   // If the symbol name matches the ObjC prefix, we don't necessarely know this
1392   // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift
1393   // classes. Ex:
1394   //  .globl	_OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1395   //  .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1396   //  .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN
1397   //
1398   // So we scan for symbols with the same address and check for the Swift class
1399   if (classSym->getName().starts_with(objc::symbol_names::klass)) {
1400     for (auto &sym : classSym->originalIsec->symbols)
1401       if (sym->value == classSym->value)
1402         if (sym->getName().starts_with(objc::symbol_names::swift_objc_klass))
1403           return SourceLanguage::Swift;
1404     return SourceLanguage::ObjC;
1405   }
1406 
1407   llvm_unreachable("Unexpected class symbol name during category merging");
1408 }
1409 
1410 bool ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
1411     const Defined *baseClass, std::vector<InfoInputCategory> &categories) {
1412   assert(categories.size() >= 1 && "Expected at least one category to merge");
1413 
1414   // Collect all the info from the categories
1415   ClassExtensionInfo extInfo(catLayout);
1416   extInfo.baseClass = baseClass;
1417   extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass);
1418 
1419   for (auto &catInfo : categories)
1420     if (!parseCatInfoToExtInfo(catInfo, extInfo))
1421       return false;
1422 
1423   // Get metadata for the base class
1424   Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true);
1425   ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec());
1426   Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false);
1427   ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec());
1428 
1429   // Now collect the info from the base class from the various lists in the
1430   // class metadata
1431 
1432   // Protocol lists are a special case - the same protocol list is in classRo
1433   // and metaRo, so we only need to parse it once
1434   parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1435                         extInfo.protocols, extInfo.baseClassSourceLanguage);
1436 
1437   // Check that the classRo and metaRo protocol lists are identical
1438   assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1439                                extInfo.baseClassSourceLanguage) ==
1440              parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
1441                                    extInfo.baseClassSourceLanguage) &&
1442          "Category merger expects classRo and metaRo to have the same protocol "
1443          "list");
1444 
1445   parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
1446                        extInfo.classMethods);
1447   parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset,
1448                        extInfo.instanceMethods);
1449 
1450   parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset,
1451                        extInfo.classProps);
1452   parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset,
1453                        extInfo.instanceProps);
1454 
1455   // Erase the old lists - these will be generated and replaced
1456   eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset);
1457   eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset);
1458   eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset);
1459   eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset);
1460   eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset);
1461   eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset);
1462 
1463   // Emit the newly merged lists - first into the meta RO then into the class RO
1464   // First we emit and link the protocol list into the meta RO. Then we link it
1465   // in the classRo as well (they're supposed to be identical)
1466   if (Defined *protoListSym =
1467           emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset,
1468                                   extInfo, extInfo.protocols)) {
1469     createSymbolReference(classRo, protoListSym,
1470                           roClassLayout.baseProtocolsOffset,
1471                           infoCategoryWriter.catBodyInfo.relocTemplate);
1472   }
1473 
1474   emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo,
1475                          extInfo.classMethods);
1476   emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo,
1477                          extInfo.instanceMethods);
1478 
1479   emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo,
1480                          extInfo.classProps);
1481 
1482   emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo,
1483                          extInfo.instanceProps);
1484 
1485   // Mark all the categories as merged - this will be used to erase them later
1486   for (auto &catInfo : categories)
1487     catInfo.wasMerged = true;
1488 
1489   return true;
1490 }
1491 
1492 // Erase the symbol at a given offset in an InputSection
1493 void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,
1494                                                  uint32_t offset) {
1495   Defined *sym = tryGetDefinedAtIsecOffset(isec, offset);
1496   if (!sym)
1497     return;
1498 
1499   // Remove the symbol from isec->symbols
1500   assert(isa<Defined>(sym) && "Can only erase a Defined");
1501   llvm::erase(isec->symbols, sym);
1502 
1503   // Remove the relocs that refer to this symbol
1504   auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };
1505   llvm::erase_if(isec->relocs, removeAtOff);
1506 
1507   // Now, if the symbol fully occupies a ConcatInputSection, we can also erase
1508   // the whole ConcatInputSection
1509   if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec()))
1510     if (cisec->data.size() == sym->size)
1511       eraseISec(cisec);
1512 }
1513