1 //===- ObjC.cpp -----------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ObjC.h" 10 #include "ConcatOutputSection.h" 11 #include "InputFiles.h" 12 #include "InputSection.h" 13 #include "Layout.h" 14 #include "OutputSegment.h" 15 #include "SyntheticSections.h" 16 #include "Target.h" 17 18 #include "lld/Common/ErrorHandler.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/BinaryFormat/MachO.h" 21 #include "llvm/Bitcode/BitcodeReader.h" 22 #include "llvm/Support/TimeProfiler.h" 23 24 using namespace llvm; 25 using namespace llvm::MachO; 26 using namespace lld; 27 using namespace lld::macho; 28 29 template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) { 30 using SectionHeader = typename LP::section; 31 32 auto *hdr = 33 reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart()); 34 if (hdr->magic != LP::magic) 35 return false; 36 37 if (const auto *c = 38 findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) { 39 auto sectionHeaders = ArrayRef<SectionHeader>{ 40 reinterpret_cast<const SectionHeader *>(c + 1), c->nsects}; 41 for (const SectionHeader &secHead : sectionHeaders) { 42 StringRef sectname(secHead.sectname, 43 strnlen(secHead.sectname, sizeof(secHead.sectname))); 44 StringRef segname(secHead.segname, 45 strnlen(secHead.segname, sizeof(secHead.segname))); 46 if ((segname == segment_names::data && 47 sectname == section_names::objcCatList) || 48 (segname == segment_names::text && 49 sectname.starts_with(section_names::swift))) { 50 return true; 51 } 52 } 53 } 54 return false; 55 } 56 57 static bool objectHasObjCSection(MemoryBufferRef mb) { 58 if (target->wordSize == 8) 59 return ::objectHasObjCSection<LP64>(mb); 60 else 61 return ::objectHasObjCSection<ILP32>(mb); 62 } 63 64 bool macho::hasObjCSection(MemoryBufferRef mb) { 65 switch (identify_magic(mb.getBuffer())) { 66 case file_magic::macho_object: 67 return objectHasObjCSection(mb); 68 case file_magic::bitcode: 69 return check(isBitcodeContainingObjCCategory(mb)); 70 default: 71 return false; 72 } 73 } 74 75 namespace { 76 77 #define FOR_EACH_CATEGORY_FIELD(DO) \ 78 DO(Ptr, name) \ 79 DO(Ptr, klass) \ 80 DO(Ptr, instanceMethods) \ 81 DO(Ptr, classMethods) \ 82 DO(Ptr, protocols) \ 83 DO(Ptr, instanceProps) \ 84 DO(Ptr, classProps) \ 85 DO(uint32_t, size) 86 87 CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD); 88 89 #undef FOR_EACH_CATEGORY_FIELD 90 91 #define FOR_EACH_CLASS_FIELD(DO) \ 92 DO(Ptr, metaClass) \ 93 DO(Ptr, superClass) \ 94 DO(Ptr, methodCache) \ 95 DO(Ptr, vtable) \ 96 DO(Ptr, roData) 97 98 CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD); 99 100 #undef FOR_EACH_CLASS_FIELD 101 102 #define FOR_EACH_RO_CLASS_FIELD(DO) \ 103 DO(uint32_t, flags) \ 104 DO(uint32_t, instanceStart) \ 105 DO(Ptr, instanceSize) \ 106 DO(Ptr, ivarLayout) \ 107 DO(Ptr, name) \ 108 DO(Ptr, baseMethods) \ 109 DO(Ptr, baseProtocols) \ 110 DO(Ptr, ivars) \ 111 DO(Ptr, weakIvarLayout) \ 112 DO(Ptr, baseProperties) 113 114 CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD); 115 116 #undef FOR_EACH_RO_CLASS_FIELD 117 118 #define FOR_EACH_LIST_HEADER(DO) \ 119 DO(uint32_t, structSize) \ 120 DO(uint32_t, structCount) 121 122 CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER); 123 124 #undef FOR_EACH_LIST_HEADER 125 126 #define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount) 127 128 CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER); 129 130 #undef FOR_EACH_PROTOCOL_LIST_HEADER 131 132 #define FOR_EACH_METHOD(DO) \ 133 DO(Ptr, name) \ 134 DO(Ptr, type) \ 135 DO(Ptr, impl) 136 137 CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD); 138 139 #undef FOR_EACH_METHOD 140 141 enum MethodContainerKind { 142 MCK_Class, 143 MCK_Category, 144 }; 145 146 struct MethodContainer { 147 MethodContainerKind kind; 148 const ConcatInputSection *isec; 149 }; 150 151 enum MethodKind { 152 MK_Instance, 153 MK_Static, 154 }; 155 156 struct ObjcClass { 157 DenseMap<CachedHashStringRef, MethodContainer> instanceMethods; 158 DenseMap<CachedHashStringRef, MethodContainer> classMethods; 159 }; 160 161 } // namespace 162 163 class ObjcCategoryChecker { 164 public: 165 ObjcCategoryChecker(); 166 void parseCategory(const ConcatInputSection *catListIsec); 167 168 private: 169 void parseClass(const Defined *classSym); 170 void parseMethods(const ConcatInputSection *methodsIsec, 171 const Symbol *methodContainer, 172 const ConcatInputSection *containerIsec, 173 MethodContainerKind, MethodKind); 174 175 CategoryLayout catLayout; 176 ClassLayout classLayout; 177 ROClassLayout roClassLayout; 178 ListHeaderLayout listHeaderLayout; 179 MethodLayout methodLayout; 180 181 DenseMap<const Symbol *, ObjcClass> classMap; 182 }; 183 184 ObjcCategoryChecker::ObjcCategoryChecker() 185 : catLayout(target->wordSize), classLayout(target->wordSize), 186 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize), 187 methodLayout(target->wordSize) {} 188 189 void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec, 190 const Symbol *methodContainerSym, 191 const ConcatInputSection *containerIsec, 192 MethodContainerKind mcKind, 193 MethodKind mKind) { 194 ObjcClass &klass = classMap[methodContainerSym]; 195 for (const Reloc &r : methodsIsec->relocs) { 196 if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize != 197 methodLayout.nameOffset) 198 continue; 199 200 CachedHashStringRef methodName(r.getReferentString()); 201 // +load methods are special: all implementations are called by the runtime 202 // even if they are part of the same class. Thus there is no need to check 203 // for duplicates. 204 // NOTE: Instead of specifically checking for this method name, ld64 simply 205 // checks whether a class / category is present in __objc_nlclslist / 206 // __objc_nlcatlist respectively. This will be the case if the class / 207 // category has a +load method. It skips optimizing the categories if there 208 // are multiple +load methods. Since it does dupe checking as part of the 209 // optimization process, this avoids spurious dupe messages around +load, 210 // but it also means that legit dupe issues for other methods are ignored. 211 if (mKind == MK_Static && methodName.val() == "load") 212 continue; 213 214 auto &methodMap = 215 mKind == MK_Instance ? klass.instanceMethods : klass.classMethods; 216 if (methodMap 217 .try_emplace(methodName, MethodContainer{mcKind, containerIsec}) 218 .second) 219 continue; 220 221 // We have a duplicate; generate a warning message. 222 const auto &mc = methodMap.lookup(methodName); 223 const Reloc *nameReloc = nullptr; 224 if (mc.kind == MCK_Category) { 225 nameReloc = mc.isec->getRelocAt(catLayout.nameOffset); 226 } else { 227 assert(mc.kind == MCK_Class); 228 const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset) 229 ->getReferentInputSection(); 230 nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset); 231 } 232 StringRef containerName = nameReloc->getReferentString(); 233 StringRef methPrefix = mKind == MK_Instance ? "-" : "+"; 234 235 // We should only ever encounter collisions when parsing category methods 236 // (since the Class struct is parsed before any of its categories). 237 assert(mcKind == MCK_Category); 238 StringRef newCatName = 239 containerIsec->getRelocAt(catLayout.nameOffset)->getReferentString(); 240 241 auto formatObjAndSrcFileName = [](const InputSection *section) { 242 lld::macho::InputFile *inputFile = section->getFile(); 243 std::string result = toString(inputFile); 244 245 auto objFile = dyn_cast_or_null<ObjFile>(inputFile); 246 if (objFile && objFile->compileUnit) 247 result += " (" + objFile->sourceFile() + ")"; 248 249 return result; 250 }; 251 252 StringRef containerType = mc.kind == MCK_Category ? "category" : "class"; 253 warn("method '" + methPrefix + methodName.val() + 254 "' has conflicting definitions:\n>>> defined in category " + 255 newCatName + " from " + formatObjAndSrcFileName(containerIsec) + 256 "\n>>> defined in " + containerType + " " + containerName + " from " + 257 formatObjAndSrcFileName(mc.isec)); 258 } 259 } 260 261 void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) { 262 auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset); 263 if (!classReloc) 264 return; 265 266 auto *classSym = cast<Symbol *>(classReloc->referent); 267 if (auto *d = dyn_cast<Defined>(classSym)) 268 if (!classMap.count(d)) 269 parseClass(d); 270 271 if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) { 272 parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()), 273 classSym, catIsec, MCK_Category, MK_Static); 274 } 275 276 if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) { 277 parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()), 278 classSym, catIsec, MCK_Category, MK_Instance); 279 } 280 } 281 282 void ObjcCategoryChecker::parseClass(const Defined *classSym) { 283 // Given a Class struct, get its corresponding Methods struct 284 auto getMethodsIsec = 285 [&](const InputSection *classIsec) -> ConcatInputSection * { 286 if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) { 287 if (const auto *roIsec = 288 cast_or_null<ConcatInputSection>(r->getReferentInputSection())) { 289 if (const auto *r = 290 roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) { 291 if (auto *methodsIsec = cast_or_null<ConcatInputSection>( 292 r->getReferentInputSection())) 293 return methodsIsec; 294 } 295 } 296 } 297 return nullptr; 298 }; 299 300 const auto *classIsec = cast<ConcatInputSection>(classSym->isec()); 301 302 // Parse instance methods. 303 if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec)) 304 parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class, 305 MK_Instance); 306 307 // Class methods are contained in the metaclass. 308 if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset)) 309 if (const auto *classMethodsIsec = getMethodsIsec( 310 cast<ConcatInputSection>(r->getReferentInputSection()))) 311 parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static); 312 } 313 314 void objc::checkCategories() { 315 TimeTraceScope timeScope("ObjcCategoryChecker"); 316 317 ObjcCategoryChecker checker; 318 for (const InputSection *isec : inputSections) { 319 if (isec->getName() == section_names::objcCatList) 320 for (const Reloc &r : isec->relocs) { 321 auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection()); 322 checker.parseCategory(catIsec); 323 } 324 } 325 } 326 327 namespace { 328 329 class ObjcCategoryMerger { 330 // In which language was a particular construct originally defined 331 enum SourceLanguage { Unknown, ObjC, Swift }; 332 333 // Information about an input category 334 struct InfoInputCategory { 335 ConcatInputSection *catListIsec; 336 ConcatInputSection *catBodyIsec; 337 uint32_t offCatListIsec = 0; 338 SourceLanguage sourceLanguage = SourceLanguage::Unknown; 339 340 bool wasMerged = false; 341 }; 342 343 // To write new (merged) categories or classes, we will try make limited 344 // assumptions about the alignment and the sections the various class/category 345 // info are stored in and . So we'll just reuse the same sections and 346 // alignment as already used in existing (input) categories. To do this we 347 // have InfoCategoryWriter which contains the various sections that the 348 // generated categories will be written to. 349 struct InfoWriteSection { 350 bool valid = false; // Data has been successfully collected from input 351 uint32_t align = 0; 352 Section *inputSection; 353 Reloc relocTemplate; 354 OutputSection *outputSection; 355 }; 356 357 struct InfoCategoryWriter { 358 InfoWriteSection catListInfo; 359 InfoWriteSection catBodyInfo; 360 InfoWriteSection catNameInfo; 361 InfoWriteSection catPtrListInfo; 362 }; 363 364 // Information about a pointer list in the original categories or class(method 365 // lists, protocol lists, etc) 366 struct PointerListInfo { 367 PointerListInfo() = default; 368 PointerListInfo(const PointerListInfo &) = default; 369 PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct) 370 : categoryPrefix(_categoryPrefix), 371 pointersPerStruct(_pointersPerStruct) {} 372 373 inline bool operator==(const PointerListInfo &cmp) const { 374 return pointersPerStruct == cmp.pointersPerStruct && 375 structSize == cmp.structSize && structCount == cmp.structCount && 376 allPtrs == cmp.allPtrs; 377 } 378 379 const char *categoryPrefix; 380 381 uint32_t pointersPerStruct = 0; 382 383 uint32_t structSize = 0; 384 uint32_t structCount = 0; 385 386 std::vector<Symbol *> allPtrs; 387 }; 388 389 // Full information describing an ObjC class . This will include all the 390 // additional methods, protocols, and properties that are contained in the 391 // class and all the categories that extend a particular class. 392 struct ClassExtensionInfo { 393 ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){}; 394 395 // Merged names of containers. Ex: base|firstCategory|secondCategory|... 396 std::string mergedContainerName; 397 std::string baseClassName; 398 const Symbol *baseClass = nullptr; 399 SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown; 400 401 CategoryLayout &catLayout; 402 403 // In case we generate new data, mark the new data as belonging to this file 404 ObjFile *objFileForMergeData = nullptr; 405 406 PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods, 407 /*pointersPerStruct=*/3}; 408 PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods, 409 /*pointersPerStruct=*/3}; 410 PointerListInfo protocols = {objc::symbol_names::categoryProtocols, 411 /*pointersPerStruct=*/0}; 412 PointerListInfo instanceProps = {objc::symbol_names::listProprieties, 413 /*pointersPerStruct=*/2}; 414 PointerListInfo classProps = {objc::symbol_names::klassPropList, 415 /*pointersPerStruct=*/2}; 416 }; 417 418 public: 419 ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections); 420 void doMerge(); 421 static void doCleanup(); 422 423 private: 424 DenseSet<const Symbol *> collectNlCategories(); 425 void collectAndValidateCategoriesData(); 426 bool 427 mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories); 428 429 void eraseISec(ConcatInputSection *isec); 430 void eraseMergedCategories(); 431 432 void generateCatListForNonErasedCategories( 433 MapVector<ConcatInputSection *, std::set<uint64_t>> 434 catListToErasedOffsets); 435 void collectSectionWriteInfoFromIsec(const InputSection *isec, 436 InfoWriteSection &catWriteInfo); 437 bool collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo); 438 bool parseCatInfoToExtInfo(const InfoInputCategory &catInfo, 439 ClassExtensionInfo &extInfo); 440 441 void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset, 442 PointerListInfo &ptrList, 443 SourceLanguage sourceLang); 444 445 PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec, 446 uint32_t secOffset, 447 SourceLanguage sourceLang); 448 449 bool parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset, 450 PointerListInfo &ptrList); 451 452 void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset, 453 const ClassExtensionInfo &extInfo, 454 const PointerListInfo &ptrList); 455 456 Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset, 457 const ClassExtensionInfo &extInfo, 458 const PointerListInfo &ptrList); 459 460 Defined *emitCategory(const ClassExtensionInfo &extInfo); 461 Defined *emitCatListEntrySec(const std::string &forCategoryName, 462 const std::string &forBaseClassName, 463 ObjFile *objFile); 464 Defined *emitCategoryBody(const std::string &name, const Defined *nameSym, 465 const Symbol *baseClassSym, 466 const std::string &baseClassName, ObjFile *objFile); 467 Defined *emitCategoryName(const std::string &name, ObjFile *objFile); 468 void createSymbolReference(Defined *refFrom, const Symbol *refTo, 469 uint32_t offset, const Reloc &relocTemplate); 470 Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset); 471 Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec, 472 uint32_t offset); 473 Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, 474 uint32_t offset); 475 Defined *getClassRo(const Defined *classSym, bool getMetaRo); 476 SourceLanguage getClassSymSourceLang(const Defined *classSym); 477 bool mergeCategoriesIntoBaseClass(const Defined *baseClass, 478 std::vector<InfoInputCategory> &categories); 479 void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset); 480 void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec, 481 uint32_t offset); 482 483 // Allocate a null-terminated StringRef backed by generatedSectionData 484 StringRef newStringData(const char *str); 485 // Allocate section data, backed by generatedSectionData 486 SmallVector<uint8_t> &newSectionData(uint32_t size); 487 488 CategoryLayout catLayout; 489 ClassLayout classLayout; 490 ROClassLayout roClassLayout; 491 ListHeaderLayout listHeaderLayout; 492 MethodLayout methodLayout; 493 ProtocolListHeaderLayout protocolListHeaderLayout; 494 495 InfoCategoryWriter infoCategoryWriter; 496 std::vector<ConcatInputSection *> &allInputSections; 497 // Map of base class Symbol to list of InfoInputCategory's for it 498 MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap; 499 500 // Normally, the binary data comes from the input files, but since we're 501 // generating binary data ourselves, we use the below array to store it in. 502 // Need this to be 'static' so the data survives past the ObjcCategoryMerger 503 // object, as the data will be read by the Writer when the final binary is 504 // generated. 505 static SmallVector<std::unique_ptr<SmallVector<uint8_t>>> 506 generatedSectionData; 507 }; 508 509 SmallVector<std::unique_ptr<SmallVector<uint8_t>>> 510 ObjcCategoryMerger::generatedSectionData; 511 512 ObjcCategoryMerger::ObjcCategoryMerger( 513 std::vector<ConcatInputSection *> &_allInputSections) 514 : catLayout(target->wordSize), classLayout(target->wordSize), 515 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize), 516 methodLayout(target->wordSize), 517 protocolListHeaderLayout(target->wordSize), 518 allInputSections(_allInputSections) {} 519 520 void ObjcCategoryMerger::collectSectionWriteInfoFromIsec( 521 const InputSection *isec, InfoWriteSection &catWriteInfo) { 522 523 catWriteInfo.inputSection = const_cast<Section *>(&isec->section); 524 catWriteInfo.align = isec->align; 525 catWriteInfo.outputSection = isec->parent; 526 527 assert(catWriteInfo.outputSection && 528 "outputSection may not be null in collectSectionWriteInfoFromIsec."); 529 530 if (isec->relocs.size()) 531 catWriteInfo.relocTemplate = isec->relocs[0]; 532 533 catWriteInfo.valid = true; 534 } 535 536 Symbol * 537 ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec, 538 uint32_t offset) { 539 if (!isec) 540 return nullptr; 541 const Reloc *reloc = isec->getRelocAt(offset); 542 543 if (!reloc) 544 return nullptr; 545 546 Symbol *sym = dyn_cast_if_present<Symbol *>(reloc->referent); 547 548 if (reloc->addend && sym) { 549 assert(isa<Defined>(sym) && "Expected defined for non-zero addend"); 550 Defined *definedSym = cast<Defined>(sym); 551 sym = tryFindDefinedOnIsec(definedSym->isec(), 552 definedSym->value + reloc->addend); 553 } 554 555 return sym; 556 } 557 558 Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec, 559 uint32_t offset) { 560 for (Defined *sym : isec->symbols) 561 if ((sym->value <= offset) && (sym->value + sym->size > offset)) 562 return sym; 563 564 return nullptr; 565 } 566 567 Defined * 568 ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, 569 uint32_t offset) { 570 Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset); 571 return dyn_cast_or_null<Defined>(sym); 572 } 573 574 // Get the class's ro_data symbol. If getMetaRo is true, then we will return 575 // the meta-class's ro_data symbol. Otherwise, we will return the class 576 // (instance) ro_data symbol. 577 Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym, 578 bool getMetaRo) { 579 ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec()); 580 if (!isec) 581 return nullptr; 582 583 if (!getMetaRo) 584 return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset + 585 classSym->value); 586 587 Defined *metaClass = tryGetDefinedAtIsecOffset( 588 isec, classLayout.metaClassOffset + classSym->value); 589 if (!metaClass) 590 return nullptr; 591 592 return tryGetDefinedAtIsecOffset( 593 dyn_cast<ConcatInputSection>(metaClass->isec()), 594 classLayout.roDataOffset); 595 } 596 597 // Given an ConcatInputSection or CStringInputSection and an offset, if there is 598 // a symbol(Defined) at that offset, then erase the symbol (mark it not live) 599 void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset( 600 const ConcatInputSection *isec, uint32_t offset) { 601 const Reloc *reloc = isec->getRelocAt(offset); 602 603 if (!reloc) 604 return; 605 606 Defined *sym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent)); 607 if (!sym) 608 return; 609 610 if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec())) 611 eraseISec(cisec); 612 else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) { 613 uint32_t totalOffset = sym->value + reloc->addend; 614 StringPiece &piece = csisec->getStringPiece(totalOffset); 615 piece.live = false; 616 } else { 617 llvm_unreachable("erased symbol has to be Defined or CStringInputSection"); 618 } 619 } 620 621 bool ObjcCategoryMerger::collectCategoryWriterInfoFromCategory( 622 const InfoInputCategory &catInfo) { 623 624 if (!infoCategoryWriter.catListInfo.valid) 625 collectSectionWriteInfoFromIsec(catInfo.catListIsec, 626 infoCategoryWriter.catListInfo); 627 if (!infoCategoryWriter.catBodyInfo.valid) 628 collectSectionWriteInfoFromIsec(catInfo.catBodyIsec, 629 infoCategoryWriter.catBodyInfo); 630 631 if (!infoCategoryWriter.catNameInfo.valid) { 632 lld::macho::Defined *catNameSym = 633 tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset); 634 635 if (!catNameSym) { 636 // This is an unhandeled case where the category name is not a symbol but 637 // instead points to an CStringInputSection (that doesn't have any symbol) 638 // TODO: Find a small repro and either fix or add a test case for this 639 // scenario 640 return false; 641 } 642 643 collectSectionWriteInfoFromIsec(catNameSym->isec(), 644 infoCategoryWriter.catNameInfo); 645 } 646 647 // Collect writer info from all the category lists (we're assuming they all 648 // would provide the same info) 649 if (!infoCategoryWriter.catPtrListInfo.valid) { 650 for (uint32_t off = catLayout.instanceMethodsOffset; 651 off <= catLayout.classPropsOffset; off += target->wordSize) { 652 if (Defined *ptrList = 653 tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) { 654 collectSectionWriteInfoFromIsec(ptrList->isec(), 655 infoCategoryWriter.catPtrListInfo); 656 // we've successfully collected data, so we can break 657 break; 658 } 659 } 660 } 661 662 return true; 663 } 664 665 // Parse a protocol list that might be linked to ConcatInputSection at a given 666 // offset. The format of the protocol list is different than other lists (prop 667 // lists, method lists) so we need to parse it differently 668 void ObjcCategoryMerger::parseProtocolListInfo( 669 const ConcatInputSection *isec, uint32_t secOffset, 670 PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) { 671 assert((isec && (secOffset + target->wordSize <= isec->data.size())) && 672 "Tried to read pointer list beyond protocol section end"); 673 674 const Reloc *reloc = isec->getRelocAt(secOffset); 675 if (!reloc) 676 return; 677 678 auto *ptrListSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent)); 679 assert(ptrListSym && "Protocol list reloc does not have a valid Defined"); 680 681 // Theoretically protocol count can be either 32b or 64b, depending on 682 // platform pointer size, but to simplify implementation we always just read 683 // the lower 32b which should be good enough. 684 uint32_t protocolCount = *reinterpret_cast<const uint32_t *>( 685 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset); 686 687 ptrList.structCount += protocolCount; 688 ptrList.structSize = target->wordSize; 689 690 [[maybe_unused]] uint32_t expectedListSize = 691 (protocolCount * target->wordSize) + 692 /*header(count)*/ protocolListHeaderLayout.totalSize + 693 /*extra null value*/ target->wordSize; 694 695 // On Swift, the protocol list does not have the extra (unnecessary) null 696 [[maybe_unused]] uint32_t expectedListSizeSwift = 697 expectedListSize - target->wordSize; 698 699 assert(((expectedListSize == ptrListSym->isec()->data.size() && 700 sourceLang == SourceLanguage::ObjC) || 701 (expectedListSizeSwift == ptrListSym->isec()->data.size() && 702 sourceLang == SourceLanguage::Swift)) && 703 "Protocol list does not match expected size"); 704 705 uint32_t off = protocolListHeaderLayout.totalSize; 706 for (uint32_t inx = 0; inx < protocolCount; ++inx) { 707 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off); 708 assert(reloc && "No reloc found at protocol list offset"); 709 710 auto *listSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent)); 711 assert(listSym && "Protocol list reloc does not have a valid Defined"); 712 713 ptrList.allPtrs.push_back(listSym); 714 off += target->wordSize; 715 } 716 assert((ptrListSym->isec()->getRelocAt(off) == nullptr) && 717 "expected null terminating protocol"); 718 assert(off + /*extra null value*/ target->wordSize == expectedListSize && 719 "Protocol list end offset does not match expected size"); 720 } 721 722 // Parse a protocol list and return the PointerListInfo for it 723 ObjcCategoryMerger::PointerListInfo 724 ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec, 725 uint32_t secOffset, 726 SourceLanguage sourceLang) { 727 PointerListInfo ptrList; 728 parseProtocolListInfo(isec, secOffset, ptrList, sourceLang); 729 return ptrList; 730 } 731 732 // Parse a pointer list that might be linked to ConcatInputSection at a given 733 // offset. This can be used for instance methods, class methods, instance props 734 // and class props since they have the same format. 735 bool ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec, 736 uint32_t secOffset, 737 PointerListInfo &ptrList) { 738 assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3); 739 assert(isec && "Trying to parse pointer list from null isec"); 740 assert(secOffset + target->wordSize <= isec->data.size() && 741 "Trying to read pointer list beyond section end"); 742 743 const Reloc *reloc = isec->getRelocAt(secOffset); 744 // Empty list is a valid case, return true. 745 if (!reloc) 746 return true; 747 748 auto *ptrListSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent)); 749 assert(ptrListSym && "Reloc does not have a valid Defined"); 750 751 uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>( 752 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset); 753 uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>( 754 ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset); 755 assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize); 756 757 assert(!ptrList.structSize || (thisStructSize == ptrList.structSize)); 758 759 ptrList.structCount += thisStructCount; 760 ptrList.structSize = thisStructSize; 761 762 uint32_t expectedListSize = 763 listHeaderLayout.totalSize + (thisStructSize * thisStructCount); 764 assert(expectedListSize == ptrListSym->isec()->data.size() && 765 "Pointer list does not match expected size"); 766 767 for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize; 768 off += target->wordSize) { 769 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off); 770 assert(reloc && "No reloc found at pointer list offset"); 771 772 auto *listSym = 773 dyn_cast_or_null<Defined>(reloc->referent.dyn_cast<Symbol *>()); 774 // Sometimes, the reloc points to a StringPiece (InputSection + addend) 775 // instead of a symbol. 776 // TODO: Skip these cases for now, but we should fix this. 777 if (!listSym) 778 return false; 779 780 ptrList.allPtrs.push_back(listSym); 781 } 782 783 return true; 784 } 785 786 // Here we parse all the information of an input category (catInfo) and 787 // append the parsed info into the structure which will contain all the 788 // information about how a class is extended (extInfo) 789 bool ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo, 790 ClassExtensionInfo &extInfo) { 791 const Reloc *catNameReloc = 792 catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset); 793 794 // Parse name 795 assert(catNameReloc && "Category does not have a reloc at 'nameOffset'"); 796 797 // is this the first category we are parsing? 798 if (extInfo.mergedContainerName.empty()) 799 extInfo.objFileForMergeData = 800 dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile()); 801 else 802 extInfo.mergedContainerName += "|"; 803 804 assert(extInfo.objFileForMergeData && 805 "Expected to already have valid objextInfo.objFileForMergeData"); 806 807 StringRef catName = catNameReloc->getReferentString(); 808 extInfo.mergedContainerName += catName.str(); 809 810 // Parse base class 811 if (!extInfo.baseClass) { 812 Symbol *classSym = 813 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset); 814 assert(extInfo.baseClassName.empty()); 815 extInfo.baseClass = classSym; 816 llvm::StringRef classPrefix(objc::symbol_names::klass); 817 assert(classSym->getName().starts_with(classPrefix) && 818 "Base class symbol does not start with expected prefix"); 819 extInfo.baseClassName = classSym->getName().substr(classPrefix.size()); 820 } else { 821 assert((extInfo.baseClass == 822 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, 823 catLayout.klassOffset)) && 824 "Trying to parse category info into container with different base " 825 "class"); 826 } 827 828 if (!parsePointerListInfo(catInfo.catBodyIsec, 829 catLayout.instanceMethodsOffset, 830 extInfo.instanceMethods)) 831 return false; 832 833 if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset, 834 extInfo.classMethods)) 835 return false; 836 837 parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset, 838 extInfo.protocols, catInfo.sourceLanguage); 839 840 if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset, 841 extInfo.instanceProps)) 842 return false; 843 844 if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset, 845 extInfo.classProps)) 846 return false; 847 848 return true; 849 } 850 851 // Generate a protocol list (including header) and link it into the parent at 852 // the specified offset. 853 Defined *ObjcCategoryMerger::emitAndLinkProtocolList( 854 Defined *parentSym, uint32_t linkAtOffset, 855 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) { 856 if (ptrList.allPtrs.empty()) 857 return nullptr; 858 859 assert(ptrList.allPtrs.size() == ptrList.structCount); 860 861 uint32_t bodySize = (ptrList.structCount * target->wordSize) + 862 /*header(count)*/ protocolListHeaderLayout.totalSize + 863 /*extra null value*/ target->wordSize; 864 llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize); 865 866 // This theoretically can be either 32b or 64b, but writing just the first 32b 867 // is good enough 868 const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>( 869 bodyData.data() + protocolListHeaderLayout.protocolCountOffset); 870 871 *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size(); 872 873 ConcatInputSection *listSec = make<ConcatInputSection>( 874 *infoCategoryWriter.catPtrListInfo.inputSection, bodyData, 875 infoCategoryWriter.catPtrListInfo.align); 876 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 877 listSec->live = true; 878 879 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 880 881 std::string symName = ptrList.categoryPrefix; 882 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")"; 883 884 Defined *ptrListSym = make<Defined>( 885 newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(), 886 listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false, 887 /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 888 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 889 /*isWeakDefCanBeHidden=*/false); 890 891 ptrListSym->used = true; 892 parentSym->getObjectFile()->symbols.push_back(ptrListSym); 893 addInputSection(listSec); 894 895 createSymbolReference(parentSym, ptrListSym, linkAtOffset, 896 infoCategoryWriter.catBodyInfo.relocTemplate); 897 898 uint32_t offset = protocolListHeaderLayout.totalSize; 899 for (Symbol *symbol : ptrList.allPtrs) { 900 createSymbolReference(ptrListSym, symbol, offset, 901 infoCategoryWriter.catPtrListInfo.relocTemplate); 902 offset += target->wordSize; 903 } 904 905 return ptrListSym; 906 } 907 908 // Generate a pointer list (including header) and link it into the parent at the 909 // specified offset. This is used for instance and class methods and 910 // proprieties. 911 void ObjcCategoryMerger::emitAndLinkPointerList( 912 Defined *parentSym, uint32_t linkAtOffset, 913 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) { 914 if (ptrList.allPtrs.empty()) 915 return; 916 917 assert(ptrList.allPtrs.size() * target->wordSize == 918 ptrList.structCount * ptrList.structSize); 919 920 // Generate body 921 uint32_t bodySize = 922 listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount); 923 llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize); 924 925 const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>( 926 bodyData.data() + listHeaderLayout.structSizeOffset); 927 const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>( 928 bodyData.data() + listHeaderLayout.structCountOffset); 929 930 *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize; 931 *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount; 932 933 ConcatInputSection *listSec = make<ConcatInputSection>( 934 *infoCategoryWriter.catPtrListInfo.inputSection, bodyData, 935 infoCategoryWriter.catPtrListInfo.align); 936 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 937 listSec->live = true; 938 939 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 940 941 std::string symName = ptrList.categoryPrefix; 942 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")"; 943 944 Defined *ptrListSym = make<Defined>( 945 newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(), 946 listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false, 947 /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 948 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 949 /*isWeakDefCanBeHidden=*/false); 950 951 ptrListSym->used = true; 952 parentSym->getObjectFile()->symbols.push_back(ptrListSym); 953 addInputSection(listSec); 954 955 createSymbolReference(parentSym, ptrListSym, linkAtOffset, 956 infoCategoryWriter.catBodyInfo.relocTemplate); 957 958 uint32_t offset = listHeaderLayout.totalSize; 959 for (Symbol *symbol : ptrList.allPtrs) { 960 createSymbolReference(ptrListSym, symbol, offset, 961 infoCategoryWriter.catPtrListInfo.relocTemplate); 962 offset += target->wordSize; 963 } 964 } 965 966 // This method creates an __objc_catlist ConcatInputSection with a single slot 967 Defined * 968 ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName, 969 const std::string &forBaseClassName, 970 ObjFile *objFile) { 971 uint32_t sectionSize = target->wordSize; 972 llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize); 973 974 ConcatInputSection *newCatList = 975 make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection, 976 bodyData, infoCategoryWriter.catListInfo.align); 977 newCatList->parent = infoCategoryWriter.catListInfo.outputSection; 978 newCatList->live = true; 979 980 newCatList->parent = infoCategoryWriter.catListInfo.outputSection; 981 982 std::string catSymName = "<__objc_catlist slot for merged category "; 983 catSymName += forBaseClassName + "(" + forCategoryName + ")>"; 984 985 Defined *catListSym = make<Defined>( 986 newStringData(catSymName.c_str()), /*file=*/objFile, newCatList, 987 /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false, 988 /*isPrivateExtern=*/false, /*includeInSymtab=*/false, 989 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 990 /*isWeakDefCanBeHidden=*/false); 991 992 catListSym->used = true; 993 objFile->symbols.push_back(catListSym); 994 addInputSection(newCatList); 995 return catListSym; 996 } 997 998 // Here we generate the main category body and link the name and base class into 999 // it. We don't link any other info yet like the protocol and class/instance 1000 // methods/props. 1001 Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name, 1002 const Defined *nameSym, 1003 const Symbol *baseClassSym, 1004 const std::string &baseClassName, 1005 ObjFile *objFile) { 1006 llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize); 1007 1008 uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) + 1009 catLayout.sizeOffset); 1010 *ptrSize = catLayout.totalSize; 1011 1012 ConcatInputSection *newBodySec = 1013 make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection, 1014 bodyData, infoCategoryWriter.catBodyInfo.align); 1015 newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection; 1016 newBodySec->live = true; 1017 1018 std::string symName = 1019 objc::symbol_names::category + baseClassName + "(" + name + ")"; 1020 Defined *catBodySym = make<Defined>( 1021 newStringData(symName.c_str()), /*file=*/objFile, newBodySec, 1022 /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false, 1023 /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 1024 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 1025 /*isWeakDefCanBeHidden=*/false); 1026 1027 catBodySym->used = true; 1028 objFile->symbols.push_back(catBodySym); 1029 addInputSection(newBodySec); 1030 1031 createSymbolReference(catBodySym, nameSym, catLayout.nameOffset, 1032 infoCategoryWriter.catBodyInfo.relocTemplate); 1033 1034 // Create a reloc to the base class (either external or internal) 1035 createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset, 1036 infoCategoryWriter.catBodyInfo.relocTemplate); 1037 1038 return catBodySym; 1039 } 1040 1041 // This writes the new category name (for the merged category) into the binary 1042 // and returns the sybmol for it. 1043 Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name, 1044 ObjFile *objFile) { 1045 StringRef nameStrData = newStringData(name.c_str()); 1046 // We use +1 below to include the null terminator 1047 llvm::ArrayRef<uint8_t> nameData( 1048 reinterpret_cast<const uint8_t *>(nameStrData.data()), 1049 nameStrData.size() + 1); 1050 1051 auto *parentSection = infoCategoryWriter.catNameInfo.inputSection; 1052 CStringInputSection *newStringSec = make<CStringInputSection>( 1053 *infoCategoryWriter.catNameInfo.inputSection, nameData, 1054 infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true); 1055 1056 parentSection->subsections.push_back({0, newStringSec}); 1057 1058 newStringSec->splitIntoPieces(); 1059 newStringSec->pieces[0].live = true; 1060 newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection; 1061 in.cStringSection->addInput(newStringSec); 1062 assert(newStringSec->pieces.size() == 1); 1063 1064 Defined *catNameSym = make<Defined>( 1065 "<merged category name>", /*file=*/objFile, newStringSec, 1066 /*value=*/0, nameData.size(), 1067 /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false, 1068 /*includeInSymtab=*/false, /*isReferencedDynamically=*/false, 1069 /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false); 1070 1071 catNameSym->used = true; 1072 objFile->symbols.push_back(catNameSym); 1073 return catNameSym; 1074 } 1075 1076 // This method fully creates a new category from the given ClassExtensionInfo. 1077 // It creates the category name, body and method/protocol/prop lists and links 1078 // them all together. Then it creates a new __objc_catlist entry and adds the 1079 // category to it. Calling this method will fully generate a category which will 1080 // be available in the final binary. 1081 Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) { 1082 Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName, 1083 extInfo.objFileForMergeData); 1084 1085 Defined *catBodySym = emitCategoryBody( 1086 extInfo.mergedContainerName, catNameSym, extInfo.baseClass, 1087 extInfo.baseClassName, extInfo.objFileForMergeData); 1088 1089 Defined *catListSym = 1090 emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName, 1091 extInfo.objFileForMergeData); 1092 1093 // Add the single category body to the category list at the offset 0. 1094 createSymbolReference(catListSym, catBodySym, /*offset=*/0, 1095 infoCategoryWriter.catListInfo.relocTemplate); 1096 1097 emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo, 1098 extInfo.instanceMethods); 1099 1100 emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo, 1101 extInfo.classMethods); 1102 1103 emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo, 1104 extInfo.protocols); 1105 1106 emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo, 1107 extInfo.instanceProps); 1108 1109 emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo, 1110 extInfo.classProps); 1111 1112 return catBodySym; 1113 } 1114 1115 // This method merges all the categories (sharing a base class) into a single 1116 // category. 1117 bool ObjcCategoryMerger::mergeCategoriesIntoSingleCategory( 1118 std::vector<InfoInputCategory> &categories) { 1119 assert(categories.size() > 1 && "Expected at least 2 categories"); 1120 1121 ClassExtensionInfo extInfo(catLayout); 1122 1123 for (auto &catInfo : categories) 1124 if (!parseCatInfoToExtInfo(catInfo, extInfo)) 1125 return false; 1126 1127 Defined *newCatDef = emitCategory(extInfo); 1128 assert(newCatDef && "Failed to create a new category"); 1129 1130 // Suppress unsuded var warning 1131 (void)newCatDef; 1132 1133 for (auto &catInfo : categories) 1134 catInfo.wasMerged = true; 1135 1136 return true; 1137 } 1138 1139 void ObjcCategoryMerger::createSymbolReference(Defined *refFrom, 1140 const Symbol *refTo, 1141 uint32_t offset, 1142 const Reloc &relocTemplate) { 1143 Reloc r = relocTemplate; 1144 r.offset = offset; 1145 r.addend = 0; 1146 r.referent = const_cast<Symbol *>(refTo); 1147 refFrom->isec()->relocs.push_back(r); 1148 } 1149 1150 // Get the list of categories in the '__objc_nlcatlist' section. We can't 1151 // optimize these as they have a '+load' method that has to be called at 1152 // runtime. 1153 DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() { 1154 DenseSet<const Symbol *> nlCategories; 1155 1156 for (InputSection *sec : allInputSections) { 1157 if (sec->getName() != section_names::objcNonLazyCatList) 1158 continue; 1159 1160 for (auto &r : sec->relocs) { 1161 const Symbol *sym = r.referent.dyn_cast<Symbol *>(); 1162 nlCategories.insert(sym); 1163 } 1164 } 1165 return nlCategories; 1166 } 1167 1168 void ObjcCategoryMerger::collectAndValidateCategoriesData() { 1169 auto nlCategories = collectNlCategories(); 1170 1171 for (InputSection *sec : allInputSections) { 1172 if (sec->getName() != section_names::objcCatList) 1173 continue; 1174 ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec); 1175 assert(catListCisec && 1176 "__objc_catList InputSection is not a ConcatInputSection"); 1177 1178 for (uint32_t off = 0; off < catListCisec->getSize(); 1179 off += target->wordSize) { 1180 Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off); 1181 assert(categorySym && 1182 "Failed to get a valid category at __objc_catlit offset"); 1183 1184 if (nlCategories.count(categorySym)) 1185 continue; 1186 1187 auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec()); 1188 assert(catBodyIsec && 1189 "Category data section is not an ConcatInputSection"); 1190 1191 SourceLanguage eLang = SourceLanguage::Unknown; 1192 if (categorySym->getName().starts_with(objc::symbol_names::category)) 1193 eLang = SourceLanguage::ObjC; 1194 else if (categorySym->getName().starts_with( 1195 objc::symbol_names::swift_objc_category)) 1196 eLang = SourceLanguage::Swift; 1197 else 1198 llvm_unreachable("Unexpected category symbol name"); 1199 1200 InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang}; 1201 1202 // Check that the category has a reloc at 'klassOffset' (which is 1203 // a pointer to the class symbol) 1204 1205 Symbol *classSym = 1206 tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset); 1207 assert(classSym && "Category does not have a valid base class"); 1208 1209 if (!collectCategoryWriterInfoFromCategory(catInputInfo)) 1210 continue; 1211 1212 categoryMap[classSym].push_back(catInputInfo); 1213 } 1214 } 1215 } 1216 1217 // In the input we have multiple __objc_catlist InputSection, each of which may 1218 // contain links to multiple categories. Of these categories, we will merge (and 1219 // erase) only some. There will be some categories that will remain untouched 1220 // (not erased). For these not erased categories, we generate new __objc_catlist 1221 // entries since the parent __objc_catlist entry will be erased 1222 void ObjcCategoryMerger::generateCatListForNonErasedCategories( 1223 const MapVector<ConcatInputSection *, std::set<uint64_t>> 1224 catListToErasedOffsets) { 1225 1226 // Go through all offsets of all __objc_catlist's that we process and if there 1227 // are categories that we didn't process - generate a new __objc_catlist for 1228 // each. 1229 for (auto &mapEntry : catListToErasedOffsets) { 1230 ConcatInputSection *catListIsec = mapEntry.first; 1231 for (uint32_t catListIsecOffset = 0; 1232 catListIsecOffset < catListIsec->data.size(); 1233 catListIsecOffset += target->wordSize) { 1234 // This slot was erased, we can just skip it 1235 if (mapEntry.second.count(catListIsecOffset)) 1236 continue; 1237 1238 Defined *nonErasedCatBody = 1239 tryGetDefinedAtIsecOffset(catListIsec, catListIsecOffset); 1240 assert(nonErasedCatBody && "Failed to relocate non-deleted category"); 1241 1242 // Allocate data for the new __objc_catlist slot 1243 llvm::ArrayRef<uint8_t> bodyData = newSectionData(target->wordSize); 1244 1245 // We mark the __objc_catlist slot as belonging to the same file as the 1246 // category 1247 ObjFile *objFile = dyn_cast<ObjFile>(nonErasedCatBody->getFile()); 1248 1249 ConcatInputSection *listSec = make<ConcatInputSection>( 1250 *infoCategoryWriter.catListInfo.inputSection, bodyData, 1251 infoCategoryWriter.catListInfo.align); 1252 listSec->parent = infoCategoryWriter.catListInfo.outputSection; 1253 listSec->live = true; 1254 1255 std::string slotSymName = "<__objc_catlist slot for category "; 1256 slotSymName += nonErasedCatBody->getName(); 1257 slotSymName += ">"; 1258 1259 Defined *catListSlotSym = make<Defined>( 1260 newStringData(slotSymName.c_str()), /*file=*/objFile, listSec, 1261 /*value=*/0, bodyData.size(), 1262 /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false, 1263 /*includeInSymtab=*/false, /*isReferencedDynamically=*/false, 1264 /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false); 1265 1266 catListSlotSym->used = true; 1267 objFile->symbols.push_back(catListSlotSym); 1268 addInputSection(listSec); 1269 1270 // Now link the category body into the newly created slot 1271 createSymbolReference(catListSlotSym, nonErasedCatBody, 0, 1272 infoCategoryWriter.catListInfo.relocTemplate); 1273 } 1274 } 1275 } 1276 1277 void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) { 1278 isec->live = false; 1279 for (auto &sym : isec->symbols) 1280 sym->used = false; 1281 } 1282 1283 // This fully erases the merged categories, including their body, their names, 1284 // their method/protocol/prop lists and the __objc_catlist entries that link to 1285 // them. 1286 void ObjcCategoryMerger::eraseMergedCategories() { 1287 // Map of InputSection to a set of offsets of the categories that were merged 1288 MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets; 1289 1290 for (auto &mapEntry : categoryMap) { 1291 for (InfoInputCategory &catInfo : mapEntry.second) { 1292 if (catInfo.wasMerged) { 1293 eraseISec(catInfo.catListIsec); 1294 catListToErasedOffsets[catInfo.catListIsec].insert( 1295 catInfo.offCatListIsec); 1296 } 1297 } 1298 } 1299 1300 // If there were categories that we did not erase, we need to generate a new 1301 // __objc_catList that contains only the un-merged categories, and get rid of 1302 // the references to the ones we merged. 1303 generateCatListForNonErasedCategories(catListToErasedOffsets); 1304 1305 // Erase the old method lists & names of the categories that were merged 1306 for (auto &mapEntry : categoryMap) { 1307 for (InfoInputCategory &catInfo : mapEntry.second) { 1308 if (!catInfo.wasMerged) 1309 continue; 1310 1311 eraseISec(catInfo.catBodyIsec); 1312 1313 // We can't erase 'catLayout.nameOffset' for either Swift or ObjC 1314 // categories because the name will sometimes also be used for other 1315 // purposes. 1316 // For Swift, see usages of 'l_.str.11.SimpleClass' in 1317 // objc-category-merging-swift.s 1318 // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in 1319 // objc-category-merging-erase-objc-name-test.s 1320 // TODO: handle the above in a smarter way 1321 1322 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1323 catLayout.instanceMethodsOffset); 1324 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1325 catLayout.classMethodsOffset); 1326 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1327 catLayout.protocolsOffset); 1328 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1329 catLayout.classPropsOffset); 1330 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1331 catLayout.instancePropsOffset); 1332 } 1333 } 1334 } 1335 1336 void ObjcCategoryMerger::doMerge() { 1337 collectAndValidateCategoriesData(); 1338 1339 for (auto &[baseClass, catInfos] : categoryMap) { 1340 bool merged = false; 1341 if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) { 1342 // Merge all categories into the base class 1343 merged = mergeCategoriesIntoBaseClass(baseClassDef, catInfos); 1344 } else if (catInfos.size() > 1) { 1345 // Merge all categories into a new, single category 1346 merged = mergeCategoriesIntoSingleCategory(catInfos); 1347 } 1348 if (!merged) 1349 warn("ObjC category merging skipped for class symbol' " + 1350 baseClass->getName().str() + "'\n"); 1351 } 1352 1353 // Erase all categories that were merged 1354 eraseMergedCategories(); 1355 } 1356 1357 void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); } 1358 1359 StringRef ObjcCategoryMerger::newStringData(const char *str) { 1360 uint32_t len = strlen(str); 1361 uint32_t bufSize = len + 1; 1362 SmallVector<uint8_t> &data = newSectionData(bufSize); 1363 char *strData = reinterpret_cast<char *>(data.data()); 1364 // Copy the string chars and null-terminator 1365 memcpy(strData, str, bufSize); 1366 return StringRef(strData, len); 1367 } 1368 1369 SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) { 1370 generatedSectionData.push_back( 1371 std::make_unique<SmallVector<uint8_t>>(size, 0)); 1372 return *generatedSectionData.back(); 1373 } 1374 1375 } // namespace 1376 1377 void objc::mergeCategories() { 1378 TimeTraceScope timeScope("ObjcCategoryMerger"); 1379 1380 ObjcCategoryMerger merger(inputSections); 1381 merger.doMerge(); 1382 } 1383 1384 void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); } 1385 1386 ObjcCategoryMerger::SourceLanguage 1387 ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) { 1388 if (classSym->getName().starts_with(objc::symbol_names::swift_objc_klass)) 1389 return SourceLanguage::Swift; 1390 1391 // If the symbol name matches the ObjC prefix, we don't necessarely know this 1392 // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift 1393 // classes. Ex: 1394 // .globl _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass 1395 // .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass 1396 // .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN 1397 // 1398 // So we scan for symbols with the same address and check for the Swift class 1399 if (classSym->getName().starts_with(objc::symbol_names::klass)) { 1400 for (auto &sym : classSym->originalIsec->symbols) 1401 if (sym->value == classSym->value) 1402 if (sym->getName().starts_with(objc::symbol_names::swift_objc_klass)) 1403 return SourceLanguage::Swift; 1404 return SourceLanguage::ObjC; 1405 } 1406 1407 llvm_unreachable("Unexpected class symbol name during category merging"); 1408 } 1409 1410 bool ObjcCategoryMerger::mergeCategoriesIntoBaseClass( 1411 const Defined *baseClass, std::vector<InfoInputCategory> &categories) { 1412 assert(categories.size() >= 1 && "Expected at least one category to merge"); 1413 1414 // Collect all the info from the categories 1415 ClassExtensionInfo extInfo(catLayout); 1416 extInfo.baseClass = baseClass; 1417 extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass); 1418 1419 for (auto &catInfo : categories) 1420 if (!parseCatInfoToExtInfo(catInfo, extInfo)) 1421 return false; 1422 1423 // Get metadata for the base class 1424 Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true); 1425 ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec()); 1426 Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false); 1427 ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec()); 1428 1429 // Now collect the info from the base class from the various lists in the 1430 // class metadata 1431 1432 // Protocol lists are a special case - the same protocol list is in classRo 1433 // and metaRo, so we only need to parse it once 1434 parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset, 1435 extInfo.protocols, extInfo.baseClassSourceLanguage); 1436 1437 // Check that the classRo and metaRo protocol lists are identical 1438 assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset, 1439 extInfo.baseClassSourceLanguage) == 1440 parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset, 1441 extInfo.baseClassSourceLanguage) && 1442 "Category merger expects classRo and metaRo to have the same protocol " 1443 "list"); 1444 1445 parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset, 1446 extInfo.classMethods); 1447 parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset, 1448 extInfo.instanceMethods); 1449 1450 parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset, 1451 extInfo.classProps); 1452 parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset, 1453 extInfo.instanceProps); 1454 1455 // Erase the old lists - these will be generated and replaced 1456 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset); 1457 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset); 1458 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset); 1459 eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset); 1460 eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset); 1461 eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset); 1462 1463 // Emit the newly merged lists - first into the meta RO then into the class RO 1464 // First we emit and link the protocol list into the meta RO. Then we link it 1465 // in the classRo as well (they're supposed to be identical) 1466 if (Defined *protoListSym = 1467 emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset, 1468 extInfo, extInfo.protocols)) { 1469 createSymbolReference(classRo, protoListSym, 1470 roClassLayout.baseProtocolsOffset, 1471 infoCategoryWriter.catBodyInfo.relocTemplate); 1472 } 1473 1474 emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo, 1475 extInfo.classMethods); 1476 emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo, 1477 extInfo.instanceMethods); 1478 1479 emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo, 1480 extInfo.classProps); 1481 1482 emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo, 1483 extInfo.instanceProps); 1484 1485 // Mark all the categories as merged - this will be used to erase them later 1486 for (auto &catInfo : categories) 1487 catInfo.wasMerged = true; 1488 1489 return true; 1490 } 1491 1492 // Erase the symbol at a given offset in an InputSection 1493 void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec, 1494 uint32_t offset) { 1495 Defined *sym = tryGetDefinedAtIsecOffset(isec, offset); 1496 if (!sym) 1497 return; 1498 1499 // Remove the symbol from isec->symbols 1500 assert(isa<Defined>(sym) && "Can only erase a Defined"); 1501 llvm::erase(isec->symbols, sym); 1502 1503 // Remove the relocs that refer to this symbol 1504 auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; }; 1505 llvm::erase_if(isec->relocs, removeAtOff); 1506 1507 // Now, if the symbol fully occupies a ConcatInputSection, we can also erase 1508 // the whole ConcatInputSection 1509 if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec())) 1510 if (cisec->data.size() == sym->size) 1511 eraseISec(cisec); 1512 } 1513