1 //===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the writeArchive function. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Object/ArchiveWriter.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/StringMap.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/BinaryFormat/Magic.h" 18 #include "llvm/IR/LLVMContext.h" 19 #include "llvm/Object/Archive.h" 20 #include "llvm/Object/COFF.h" 21 #include "llvm/Object/Error.h" 22 #include "llvm/Object/IRObjectFile.h" 23 #include "llvm/Object/MachO.h" 24 #include "llvm/Object/ObjectFile.h" 25 #include "llvm/Object/SymbolicFile.h" 26 #include "llvm/Object/XCOFFObjectFile.h" 27 #include "llvm/Support/Alignment.h" 28 #include "llvm/Support/EndianStream.h" 29 #include "llvm/Support/Errc.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/Format.h" 32 #include "llvm/Support/MathExtras.h" 33 #include "llvm/Support/Path.h" 34 #include "llvm/Support/SmallVectorMemoryBuffer.h" 35 #include "llvm/Support/raw_ostream.h" 36 37 #include <cerrno> 38 #include <map> 39 40 #if !defined(_MSC_VER) && !defined(__MINGW32__) 41 #include <unistd.h> 42 #else 43 #include <io.h> 44 #endif 45 46 using namespace llvm; 47 48 struct SymMap { 49 bool UseECMap; 50 std::map<std::string, uint16_t> Map; 51 std::map<std::string, uint16_t> ECMap; 52 }; 53 54 NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) 55 : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), 56 MemberName(BufRef.getBufferIdentifier()) {} 57 58 object::Archive::Kind NewArchiveMember::detectKindFromObject() const { 59 auto MemBufferRef = this->Buf->getMemBufferRef(); 60 Expected<std::unique_ptr<object::ObjectFile>> OptionalObject = 61 object::ObjectFile::createObjectFile(MemBufferRef); 62 63 if (OptionalObject) 64 return isa<object::MachOObjectFile>(**OptionalObject) 65 ? object::Archive::K_DARWIN 66 : (isa<object::XCOFFObjectFile>(**OptionalObject) 67 ? object::Archive::K_AIXBIG 68 : object::Archive::K_GNU); 69 70 // Squelch the error in case we had a non-object file. 71 consumeError(OptionalObject.takeError()); 72 73 // If we're adding a bitcode file to the archive, detect the Archive kind 74 // based on the target triple. 75 LLVMContext Context; 76 if (identify_magic(MemBufferRef.getBuffer()) == file_magic::bitcode) { 77 if (auto ObjOrErr = object::SymbolicFile::createSymbolicFile( 78 MemBufferRef, file_magic::bitcode, &Context)) { 79 auto &IRObject = cast<object::IRObjectFile>(**ObjOrErr); 80 auto TargetTriple = Triple(IRObject.getTargetTriple()); 81 return TargetTriple.isOSDarwin() 82 ? object::Archive::K_DARWIN 83 : (TargetTriple.isOSAIX() ? object::Archive::K_AIXBIG 84 : object::Archive::K_GNU); 85 } else { 86 // Squelch the error in case this was not a SymbolicFile. 87 consumeError(ObjOrErr.takeError()); 88 } 89 } 90 91 return object::Archive::getDefaultKindForHost(); 92 } 93 94 Expected<NewArchiveMember> 95 NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, 96 bool Deterministic) { 97 Expected<llvm::MemoryBufferRef> BufOrErr = OldMember.getMemoryBufferRef(); 98 if (!BufOrErr) 99 return BufOrErr.takeError(); 100 101 NewArchiveMember M; 102 M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false); 103 M.MemberName = M.Buf->getBufferIdentifier(); 104 if (!Deterministic) { 105 auto ModTimeOrErr = OldMember.getLastModified(); 106 if (!ModTimeOrErr) 107 return ModTimeOrErr.takeError(); 108 M.ModTime = ModTimeOrErr.get(); 109 Expected<unsigned> UIDOrErr = OldMember.getUID(); 110 if (!UIDOrErr) 111 return UIDOrErr.takeError(); 112 M.UID = UIDOrErr.get(); 113 Expected<unsigned> GIDOrErr = OldMember.getGID(); 114 if (!GIDOrErr) 115 return GIDOrErr.takeError(); 116 M.GID = GIDOrErr.get(); 117 Expected<sys::fs::perms> AccessModeOrErr = OldMember.getAccessMode(); 118 if (!AccessModeOrErr) 119 return AccessModeOrErr.takeError(); 120 M.Perms = AccessModeOrErr.get(); 121 } 122 return std::move(M); 123 } 124 125 Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName, 126 bool Deterministic) { 127 sys::fs::file_status Status; 128 auto FDOrErr = sys::fs::openNativeFileForRead(FileName); 129 if (!FDOrErr) 130 return FDOrErr.takeError(); 131 sys::fs::file_t FD = *FDOrErr; 132 assert(FD != sys::fs::kInvalidFile); 133 134 if (auto EC = sys::fs::status(FD, Status)) 135 return errorCodeToError(EC); 136 137 // Opening a directory doesn't make sense. Let it fail. 138 // Linux cannot open directories with open(2), although 139 // cygwin and *bsd can. 140 if (Status.type() == sys::fs::file_type::directory_file) 141 return errorCodeToError(make_error_code(errc::is_a_directory)); 142 143 ErrorOr<std::unique_ptr<MemoryBuffer>> MemberBufferOrErr = 144 MemoryBuffer::getOpenFile(FD, FileName, Status.getSize(), false); 145 if (!MemberBufferOrErr) 146 return errorCodeToError(MemberBufferOrErr.getError()); 147 148 if (auto EC = sys::fs::closeFile(FD)) 149 return errorCodeToError(EC); 150 151 NewArchiveMember M; 152 M.Buf = std::move(*MemberBufferOrErr); 153 M.MemberName = M.Buf->getBufferIdentifier(); 154 if (!Deterministic) { 155 M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>( 156 Status.getLastModificationTime()); 157 M.UID = Status.getUser(); 158 M.GID = Status.getGroup(); 159 M.Perms = Status.permissions(); 160 } 161 return std::move(M); 162 } 163 164 template <typename T> 165 static void printWithSpacePadding(raw_ostream &OS, T Data, unsigned Size) { 166 uint64_t OldPos = OS.tell(); 167 OS << Data; 168 unsigned SizeSoFar = OS.tell() - OldPos; 169 assert(SizeSoFar <= Size && "Data doesn't fit in Size"); 170 OS.indent(Size - SizeSoFar); 171 } 172 173 static bool isDarwin(object::Archive::Kind Kind) { 174 return Kind == object::Archive::K_DARWIN || 175 Kind == object::Archive::K_DARWIN64; 176 } 177 178 static bool isAIXBigArchive(object::Archive::Kind Kind) { 179 return Kind == object::Archive::K_AIXBIG; 180 } 181 182 static bool isCOFFArchive(object::Archive::Kind Kind) { 183 return Kind == object::Archive::K_COFF; 184 } 185 186 static bool isBSDLike(object::Archive::Kind Kind) { 187 switch (Kind) { 188 case object::Archive::K_GNU: 189 case object::Archive::K_GNU64: 190 case object::Archive::K_AIXBIG: 191 case object::Archive::K_COFF: 192 return false; 193 case object::Archive::K_BSD: 194 case object::Archive::K_DARWIN: 195 case object::Archive::K_DARWIN64: 196 return true; 197 } 198 llvm_unreachable("not supported for writting"); 199 } 200 201 template <class T> 202 static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { 203 support::endian::write(Out, Val, 204 isBSDLike(Kind) ? support::little : support::big); 205 } 206 207 template <class T> static void printLE(raw_ostream &Out, T Val) { 208 support::endian::write(Out, Val, support::little); 209 } 210 211 static void printRestOfMemberHeader( 212 raw_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime, 213 unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { 214 printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); 215 216 // The format has only 6 chars for uid and gid. Truncate if the provided 217 // values don't fit. 218 printWithSpacePadding(Out, UID % 1000000, 6); 219 printWithSpacePadding(Out, GID % 1000000, 6); 220 221 printWithSpacePadding(Out, format("%o", Perms), 8); 222 printWithSpacePadding(Out, Size, 10); 223 Out << "`\n"; 224 } 225 226 static void 227 printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name, 228 const sys::TimePoint<std::chrono::seconds> &ModTime, 229 unsigned UID, unsigned GID, unsigned Perms, 230 uint64_t Size) { 231 printWithSpacePadding(Out, Twine(Name) + "/", 16); 232 printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); 233 } 234 235 static void 236 printBSDMemberHeader(raw_ostream &Out, uint64_t Pos, StringRef Name, 237 const sys::TimePoint<std::chrono::seconds> &ModTime, 238 unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { 239 uint64_t PosAfterHeader = Pos + 60 + Name.size(); 240 // Pad so that even 64 bit object files are aligned. 241 unsigned Pad = offsetToAlignment(PosAfterHeader, Align(8)); 242 unsigned NameWithPadding = Name.size() + Pad; 243 printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16); 244 printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, 245 NameWithPadding + Size); 246 Out << Name; 247 while (Pad--) 248 Out.write(uint8_t(0)); 249 } 250 251 static void 252 printBigArchiveMemberHeader(raw_ostream &Out, StringRef Name, 253 const sys::TimePoint<std::chrono::seconds> &ModTime, 254 unsigned UID, unsigned GID, unsigned Perms, 255 uint64_t Size, unsigned PrevOffset, 256 unsigned NextOffset) { 257 unsigned NameLen = Name.size(); 258 259 printWithSpacePadding(Out, Size, 20); // File member size 260 printWithSpacePadding(Out, NextOffset, 20); // Next member header offset 261 printWithSpacePadding(Out, PrevOffset, 20); // Previous member header offset 262 printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); // File member date 263 // The big archive format has 12 chars for uid and gid. 264 printWithSpacePadding(Out, UID % 1000000000000, 12); // UID 265 printWithSpacePadding(Out, GID % 1000000000000, 12); // GID 266 printWithSpacePadding(Out, format("%o", Perms), 12); // Permission 267 printWithSpacePadding(Out, NameLen, 4); // Name length 268 if (NameLen) { 269 printWithSpacePadding(Out, Name, NameLen); // Name 270 if (NameLen % 2) 271 Out.write(uint8_t(0)); // Null byte padding 272 } 273 Out << "`\n"; // Terminator 274 } 275 276 static bool useStringTable(bool Thin, StringRef Name) { 277 return Thin || Name.size() >= 16 || Name.contains('/'); 278 } 279 280 static bool is64BitKind(object::Archive::Kind Kind) { 281 switch (Kind) { 282 case object::Archive::K_GNU: 283 case object::Archive::K_BSD: 284 case object::Archive::K_DARWIN: 285 case object::Archive::K_COFF: 286 return false; 287 case object::Archive::K_AIXBIG: 288 case object::Archive::K_DARWIN64: 289 case object::Archive::K_GNU64: 290 return true; 291 } 292 llvm_unreachable("not supported for writting"); 293 } 294 295 static void 296 printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable, 297 StringMap<uint64_t> &MemberNames, object::Archive::Kind Kind, 298 bool Thin, const NewArchiveMember &M, 299 sys::TimePoint<std::chrono::seconds> ModTime, uint64_t Size) { 300 if (isBSDLike(Kind)) 301 return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID, 302 M.Perms, Size); 303 if (!useStringTable(Thin, M.MemberName)) 304 return printGNUSmallMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID, 305 M.Perms, Size); 306 Out << '/'; 307 uint64_t NamePos; 308 if (Thin) { 309 NamePos = StringTable.tell(); 310 StringTable << M.MemberName << "/\n"; 311 } else { 312 auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)}); 313 if (Insertion.second) { 314 Insertion.first->second = StringTable.tell(); 315 StringTable << M.MemberName; 316 if (isCOFFArchive(Kind)) 317 StringTable << '\0'; 318 else 319 StringTable << "/\n"; 320 } 321 NamePos = Insertion.first->second; 322 } 323 printWithSpacePadding(Out, NamePos, 15); 324 printRestOfMemberHeader(Out, ModTime, M.UID, M.GID, M.Perms, Size); 325 } 326 327 namespace { 328 struct MemberData { 329 std::vector<unsigned> Symbols; 330 std::string Header; 331 StringRef Data; 332 StringRef Padding; 333 }; 334 } // namespace 335 336 static MemberData computeStringTable(StringRef Names) { 337 unsigned Size = Names.size(); 338 unsigned Pad = offsetToAlignment(Size, Align(2)); 339 std::string Header; 340 raw_string_ostream Out(Header); 341 printWithSpacePadding(Out, "//", 48); 342 printWithSpacePadding(Out, Size + Pad, 10); 343 Out << "`\n"; 344 Out.flush(); 345 return {{}, std::move(Header), Names, Pad ? "\n" : ""}; 346 } 347 348 static sys::TimePoint<std::chrono::seconds> now(bool Deterministic) { 349 using namespace std::chrono; 350 351 if (!Deterministic) 352 return time_point_cast<seconds>(system_clock::now()); 353 return sys::TimePoint<seconds>(); 354 } 355 356 static bool isArchiveSymbol(const object::BasicSymbolRef &S) { 357 Expected<uint32_t> SymFlagsOrErr = S.getFlags(); 358 if (!SymFlagsOrErr) 359 // TODO: Actually report errors helpfully. 360 report_fatal_error(SymFlagsOrErr.takeError()); 361 if (*SymFlagsOrErr & object::SymbolRef::SF_FormatSpecific) 362 return false; 363 if (!(*SymFlagsOrErr & object::SymbolRef::SF_Global)) 364 return false; 365 if (*SymFlagsOrErr & object::SymbolRef::SF_Undefined) 366 return false; 367 return true; 368 } 369 370 static void printNBits(raw_ostream &Out, object::Archive::Kind Kind, 371 uint64_t Val) { 372 if (is64BitKind(Kind)) 373 print<uint64_t>(Out, Kind, Val); 374 else 375 print<uint32_t>(Out, Kind, Val); 376 } 377 378 static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, 379 uint64_t NumSyms, uint64_t OffsetSize, 380 uint64_t StringTableSize, 381 uint32_t *Padding = nullptr) { 382 assert((OffsetSize == 4 || OffsetSize == 8) && "Unsupported OffsetSize"); 383 uint64_t Size = OffsetSize; // Number of entries 384 if (isBSDLike(Kind)) 385 Size += NumSyms * OffsetSize * 2; // Table 386 else 387 Size += NumSyms * OffsetSize; // Table 388 if (isBSDLike(Kind)) 389 Size += OffsetSize; // byte count 390 Size += StringTableSize; 391 // ld64 expects the members to be 8-byte aligned for 64-bit content and at 392 // least 4-byte aligned for 32-bit content. Opt for the larger encoding 393 // uniformly. 394 // We do this for all bsd formats because it simplifies aligning members. 395 // For the big archive format, the symbol table is the last member, so there 396 // is no need to align. 397 uint32_t Pad = isAIXBigArchive(Kind) 398 ? 0 399 : offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2)); 400 401 Size += Pad; 402 if (Padding) 403 *Padding = Pad; 404 return Size; 405 } 406 407 static uint64_t computeSymbolMapSize(uint64_t NumObj, SymMap &SymMap, 408 uint32_t *Padding = nullptr) { 409 uint64_t Size = sizeof(uint32_t) * 2; // Number of symbols and objects entries 410 Size += NumObj * sizeof(uint32_t); // Offset table 411 412 for (auto S : SymMap.Map) 413 Size += sizeof(uint16_t) + S.first.length() + 1; 414 415 uint32_t Pad = offsetToAlignment(Size, Align(2)); 416 Size += Pad; 417 if (Padding) 418 *Padding = Pad; 419 return Size; 420 } 421 422 static uint64_t computeECSymbolsSize(SymMap &SymMap, 423 uint32_t *Padding = nullptr) { 424 uint64_t Size = sizeof(uint32_t); // Number of symbols 425 426 for (auto S : SymMap.ECMap) 427 Size += sizeof(uint16_t) + S.first.length() + 1; 428 429 uint32_t Pad = offsetToAlignment(Size, Align(2)); 430 Size += Pad; 431 if (Padding) 432 *Padding = Pad; 433 return Size; 434 } 435 436 static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind, 437 bool Deterministic, uint64_t Size, 438 uint64_t PrevMemberOffset = 0) { 439 if (isBSDLike(Kind)) { 440 const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF"; 441 printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0, 442 Size); 443 } else if (isAIXBigArchive(Kind)) { 444 printBigArchiveMemberHeader(Out, "", now(Deterministic), 0, 0, 445 0, Size, PrevMemberOffset, 0); 446 } else { 447 const char *Name = is64BitKind(Kind) ? "/SYM64" : ""; 448 printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size); 449 } 450 } 451 452 static uint64_t computeHeadersSize(object::Archive::Kind Kind, 453 uint64_t NumMembers, 454 uint64_t StringMemberSize, uint64_t NumSyms, 455 uint64_t SymNamesSize, SymMap *SymMap) { 456 uint32_t OffsetSize = is64BitKind(Kind) ? 8 : 4; 457 uint64_t SymtabSize = 458 computeSymbolTableSize(Kind, NumSyms, OffsetSize, SymNamesSize); 459 auto computeSymbolTableHeaderSize = [=] { 460 SmallString<0> TmpBuf; 461 raw_svector_ostream Tmp(TmpBuf); 462 writeSymbolTableHeader(Tmp, Kind, true, SymtabSize); 463 return TmpBuf.size(); 464 }; 465 uint32_t HeaderSize = computeSymbolTableHeaderSize(); 466 uint64_t Size = strlen("!<arch>\n") + HeaderSize + SymtabSize; 467 468 if (SymMap) { 469 Size += HeaderSize + computeSymbolMapSize(NumMembers, *SymMap); 470 if (SymMap->ECMap.size()) 471 Size += HeaderSize + computeECSymbolsSize(*SymMap); 472 } 473 474 return Size + StringMemberSize; 475 } 476 477 static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, 478 bool Deterministic, ArrayRef<MemberData> Members, 479 StringRef StringTable, uint64_t MembersOffset, 480 uint64_t PrevMemberOffset = 0) { 481 // We don't write a symbol table on an archive with no members -- except on 482 // Darwin, where the linker will abort unless the archive has a symbol table. 483 if (StringTable.empty() && !isDarwin(Kind) && !isCOFFArchive(Kind)) 484 return; 485 486 unsigned NumSyms = 0; 487 for (const MemberData &M : Members) 488 NumSyms += M.Symbols.size(); 489 490 uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4; 491 uint32_t Pad; 492 uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, 493 StringTable.size(), &Pad); 494 writeSymbolTableHeader(Out, Kind, Deterministic, Size, PrevMemberOffset); 495 496 if (isBSDLike(Kind)) 497 printNBits(Out, Kind, NumSyms * 2 * OffsetSize); 498 else 499 printNBits(Out, Kind, NumSyms); 500 501 uint64_t Pos = MembersOffset; 502 for (const MemberData &M : Members) { 503 for (unsigned StringOffset : M.Symbols) { 504 if (isBSDLike(Kind)) 505 printNBits(Out, Kind, StringOffset); 506 printNBits(Out, Kind, Pos); // member offset 507 } 508 Pos += M.Header.size() + M.Data.size() + M.Padding.size(); 509 } 510 511 if (isBSDLike(Kind)) 512 // byte count of the string table 513 printNBits(Out, Kind, StringTable.size()); 514 Out << StringTable; 515 516 while (Pad--) 517 Out.write(uint8_t(0)); 518 } 519 520 static void writeSymbolMap(raw_ostream &Out, object::Archive::Kind Kind, 521 bool Deterministic, ArrayRef<MemberData> Members, 522 SymMap &SymMap, uint64_t MembersOffset) { 523 uint32_t Pad; 524 uint64_t Size = computeSymbolMapSize(Members.size(), SymMap, &Pad); 525 writeSymbolTableHeader(Out, Kind, Deterministic, Size, 0); 526 527 uint32_t Pos = MembersOffset; 528 529 printLE<uint32_t>(Out, Members.size()); 530 for (const MemberData &M : Members) { 531 printLE(Out, Pos); // member offset 532 Pos += M.Header.size() + M.Data.size() + M.Padding.size(); 533 } 534 535 printLE<uint32_t>(Out, SymMap.Map.size()); 536 537 for (auto S : SymMap.Map) 538 printLE(Out, S.second); 539 for (auto S : SymMap.Map) 540 Out << S.first << '\0'; 541 542 while (Pad--) 543 Out.write(uint8_t(0)); 544 } 545 546 static void writeECSymbols(raw_ostream &Out, object::Archive::Kind Kind, 547 bool Deterministic, ArrayRef<MemberData> Members, 548 SymMap &SymMap) { 549 uint32_t Pad; 550 uint64_t Size = computeECSymbolsSize(SymMap, &Pad); 551 printGNUSmallMemberHeader(Out, "/<ECSYMBOLS>", now(Deterministic), 0, 0, 0, 552 Size); 553 554 printLE<uint32_t>(Out, SymMap.ECMap.size()); 555 556 for (auto S : SymMap.ECMap) 557 printLE(Out, S.second); 558 for (auto S : SymMap.ECMap) 559 Out << S.first << '\0'; 560 while (Pad--) 561 Out.write(uint8_t(0)); 562 } 563 564 static bool isECObject(object::SymbolicFile &Obj) { 565 if (Obj.isCOFF()) 566 return cast<llvm::object::COFFObjectFile>(&Obj)->getMachine() != 567 COFF::IMAGE_FILE_MACHINE_ARM64; 568 569 if (Obj.isIR()) { 570 Expected<std::string> TripleStr = 571 getBitcodeTargetTriple(Obj.getMemoryBufferRef()); 572 if (!TripleStr) 573 return false; 574 Triple T(*TripleStr); 575 return T.isWindowsArm64EC() || T.getArch() == Triple::x86_64; 576 } 577 578 return false; 579 } 580 581 static Expected<std::vector<unsigned>> 582 getSymbols(MemoryBufferRef Buf, uint16_t Index, raw_ostream &SymNames, 583 SymMap *SymMap, bool &HasObject) { 584 std::vector<unsigned> Ret; 585 586 // In the scenario when LLVMContext is populated SymbolicFile will contain a 587 // reference to it, thus SymbolicFile should be destroyed first. 588 LLVMContext Context; 589 std::unique_ptr<object::SymbolicFile> Obj; 590 591 const file_magic Type = identify_magic(Buf.getBuffer()); 592 // Treat unsupported file types as having no symbols. 593 if (!object::SymbolicFile::isSymbolicFile(Type, &Context)) 594 return Ret; 595 if (Type == file_magic::bitcode) { 596 auto ObjOrErr = object::SymbolicFile::createSymbolicFile( 597 Buf, file_magic::bitcode, &Context); 598 if (!ObjOrErr) 599 return ObjOrErr.takeError(); 600 Obj = std::move(*ObjOrErr); 601 } else { 602 auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf); 603 if (!ObjOrErr) 604 return ObjOrErr.takeError(); 605 Obj = std::move(*ObjOrErr); 606 } 607 608 std::map<std::string, uint16_t> *Map = nullptr; 609 if (SymMap) 610 Map = SymMap->UseECMap && isECObject(*Obj) ? &SymMap->ECMap : &SymMap->Map; 611 HasObject = true; 612 for (const object::BasicSymbolRef &S : Obj->symbols()) { 613 if (!isArchiveSymbol(S)) 614 continue; 615 if (Map) { 616 std::string Name; 617 raw_string_ostream NameStream(Name); 618 if (Error E = S.printName(NameStream)) 619 return std::move(E); 620 if (Map->find(Name) != Map->end()) 621 continue; // ignore duplicated symbol 622 (*Map)[Name] = Index; 623 if (Map == &SymMap->Map) { 624 Ret.push_back(SymNames.tell()); 625 SymNames << Name << '\0'; 626 } 627 } else { 628 Ret.push_back(SymNames.tell()); 629 if (Error E = S.printName(SymNames)) 630 return std::move(E); 631 SymNames << '\0'; 632 } 633 } 634 return Ret; 635 } 636 637 static Expected<std::vector<MemberData>> 638 computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, 639 object::Archive::Kind Kind, bool Thin, bool Deterministic, 640 bool NeedSymbols, SymMap *SymMap, 641 ArrayRef<NewArchiveMember> NewMembers) { 642 static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; 643 644 uint64_t Pos = 645 isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) : 0; 646 647 std::vector<MemberData> Ret; 648 bool HasObject = false; 649 650 // Deduplicate long member names in the string table and reuse earlier name 651 // offsets. This especially saves space for COFF Import libraries where all 652 // members have the same name. 653 StringMap<uint64_t> MemberNames; 654 655 // UniqueTimestamps is a special case to improve debugging on Darwin: 656 // 657 // The Darwin linker does not link debug info into the final 658 // binary. Instead, it emits entries of type N_OSO in in the output 659 // binary's symbol table, containing references to the linked-in 660 // object files. Using that reference, the debugger can read the 661 // debug data directly from the object files. Alternatively, an 662 // invocation of 'dsymutil' will link the debug data from the object 663 // files into a dSYM bundle, which can be loaded by the debugger, 664 // instead of the object files. 665 // 666 // For an object file, the N_OSO entries contain the absolute path 667 // path to the file, and the file's timestamp. For an object 668 // included in an archive, the path is formatted like 669 // "/absolute/path/to/archive.a(member.o)", and the timestamp is the 670 // archive member's timestamp, rather than the archive's timestamp. 671 // 672 // However, this doesn't always uniquely identify an object within 673 // an archive -- an archive file can have multiple entries with the 674 // same filename. (This will happen commonly if the original object 675 // files started in different directories.) The only way they get 676 // distinguished, then, is via the timestamp. But this process is 677 // unable to find the correct object file in the archive when there 678 // are two files of the same name and timestamp. 679 // 680 // Additionally, timestamp==0 is treated specially, and causes the 681 // timestamp to be ignored as a match criteria. 682 // 683 // That will "usually" work out okay when creating an archive not in 684 // deterministic timestamp mode, because the objects will probably 685 // have been created at different timestamps. 686 // 687 // To ameliorate this problem, in deterministic archive mode (which 688 // is the default), on Darwin we will emit a unique non-zero 689 // timestamp for each entry with a duplicated name. This is still 690 // deterministic: the only thing affecting that timestamp is the 691 // order of the files in the resultant archive. 692 // 693 // See also the functions that handle the lookup: 694 // in lldb: ObjectContainerBSDArchive::Archive::FindObject() 695 // in llvm/tools/dsymutil: BinaryHolder::GetArchiveMemberBuffers(). 696 bool UniqueTimestamps = Deterministic && isDarwin(Kind); 697 std::map<StringRef, unsigned> FilenameCount; 698 if (UniqueTimestamps) { 699 for (const NewArchiveMember &M : NewMembers) 700 FilenameCount[M.MemberName]++; 701 for (auto &Entry : FilenameCount) 702 Entry.second = Entry.second > 1 ? 1 : 0; 703 } 704 705 // The big archive format needs to know the offset of the previous member 706 // header. 707 unsigned PrevOffset = 0, Index = 0; 708 for (const NewArchiveMember &M : NewMembers) { 709 std::string Header; 710 raw_string_ostream Out(Header); 711 712 MemoryBufferRef Buf = M.Buf->getMemBufferRef(); 713 StringRef Data = Thin ? "" : Buf.getBuffer(); 714 715 Index++; 716 717 // ld64 expects the members to be 8-byte aligned for 64-bit content and at 718 // least 4-byte aligned for 32-bit content. Opt for the larger encoding 719 // uniformly. This matches the behaviour with cctools and ensures that ld64 720 // is happy with archives that we generate. 721 unsigned MemberPadding = 722 isDarwin(Kind) ? offsetToAlignment(Data.size(), Align(8)) : 0; 723 unsigned TailPadding = 724 offsetToAlignment(Data.size() + MemberPadding, Align(2)); 725 StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding); 726 727 sys::TimePoint<std::chrono::seconds> ModTime; 728 if (UniqueTimestamps) 729 // Increment timestamp for each file of a given name. 730 ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++); 731 else 732 ModTime = M.ModTime; 733 734 uint64_t Size = Buf.getBufferSize() + MemberPadding; 735 if (Size > object::Archive::MaxMemberSize) { 736 std::string StringMsg = 737 "File " + M.MemberName.str() + " exceeds size limit"; 738 return make_error<object::GenericBinaryError>( 739 std::move(StringMsg), object::object_error::parse_failed); 740 } 741 742 if (isAIXBigArchive(Kind)) { 743 unsigned NextOffset = Pos + sizeof(object::BigArMemHdrType) + 744 alignTo(M.MemberName.size(), 2) + alignTo(Size, 2); 745 printBigArchiveMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID, 746 M.Perms, Size, PrevOffset, NextOffset); 747 PrevOffset = Pos; 748 } else { 749 printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M, 750 ModTime, Size); 751 } 752 Out.flush(); 753 754 std::vector<unsigned> Symbols; 755 if (NeedSymbols) { 756 Expected<std::vector<unsigned>> SymbolsOrErr = 757 getSymbols(Buf, Index, SymNames, SymMap, HasObject); 758 if (!SymbolsOrErr) 759 return createFileError(M.MemberName, SymbolsOrErr.takeError()); 760 Symbols = std::move(*SymbolsOrErr); 761 } 762 763 Pos += Header.size() + Data.size() + Padding.size(); 764 Ret.push_back({std::move(Symbols), std::move(Header), Data, Padding}); 765 } 766 // If there are no symbols, emit an empty symbol table, to satisfy Solaris 767 // tools, older versions of which expect a symbol table in a non-empty 768 // archive, regardless of whether there are any symbols in it. 769 if (HasObject && SymNames.tell() == 0 && !isCOFFArchive(Kind)) 770 SymNames << '\0' << '\0' << '\0'; 771 return Ret; 772 } 773 774 namespace llvm { 775 776 static ErrorOr<SmallString<128>> canonicalizePath(StringRef P) { 777 SmallString<128> Ret = P; 778 std::error_code Err = sys::fs::make_absolute(Ret); 779 if (Err) 780 return Err; 781 sys::path::remove_dots(Ret, /*removedotdot*/ true); 782 return Ret; 783 } 784 785 // Compute the relative path from From to To. 786 Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) { 787 ErrorOr<SmallString<128>> PathToOrErr = canonicalizePath(To); 788 ErrorOr<SmallString<128>> DirFromOrErr = canonicalizePath(From); 789 if (!PathToOrErr || !DirFromOrErr) 790 return errorCodeToError(std::error_code(errno, std::generic_category())); 791 792 const SmallString<128> &PathTo = *PathToOrErr; 793 const SmallString<128> &DirFrom = sys::path::parent_path(*DirFromOrErr); 794 795 // Can't construct a relative path between different roots 796 if (sys::path::root_name(PathTo) != sys::path::root_name(DirFrom)) 797 return sys::path::convert_to_slash(PathTo); 798 799 // Skip common prefixes 800 auto FromTo = 801 std::mismatch(sys::path::begin(DirFrom), sys::path::end(DirFrom), 802 sys::path::begin(PathTo)); 803 auto FromI = FromTo.first; 804 auto ToI = FromTo.second; 805 806 // Construct relative path 807 SmallString<128> Relative; 808 for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI) 809 sys::path::append(Relative, sys::path::Style::posix, ".."); 810 811 for (auto ToE = sys::path::end(PathTo); ToI != ToE; ++ToI) 812 sys::path::append(Relative, sys::path::Style::posix, *ToI); 813 814 return std::string(Relative.str()); 815 } 816 817 static Error writeArchiveToStream(raw_ostream &Out, 818 ArrayRef<NewArchiveMember> NewMembers, 819 bool WriteSymtab, object::Archive::Kind Kind, 820 bool Deterministic, bool Thin, bool IsEC) { 821 assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode"); 822 823 SmallString<0> SymNamesBuf; 824 raw_svector_ostream SymNames(SymNamesBuf); 825 SmallString<0> StringTableBuf; 826 raw_svector_ostream StringTable(StringTableBuf); 827 SymMap SymMap; 828 829 // COFF symbol map uses 16-bit indexes, so we can't use it if there are too 830 // many members. 831 if (isCOFFArchive(Kind) && NewMembers.size() > 0xfffe) 832 Kind = object::Archive::K_GNU; 833 834 SymMap.UseECMap = IsEC; 835 Expected<std::vector<MemberData>> DataOrErr = computeMemberData( 836 StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab, 837 isCOFFArchive(Kind) ? &SymMap : nullptr, NewMembers); 838 if (Error E = DataOrErr.takeError()) 839 return E; 840 std::vector<MemberData> &Data = *DataOrErr; 841 842 uint64_t StringTableSize = 0; 843 MemberData StringTableMember; 844 if (!StringTableBuf.empty() && !isAIXBigArchive(Kind)) { 845 StringTableMember = computeStringTable(StringTableBuf); 846 StringTableSize = StringTableMember.Header.size() + 847 StringTableMember.Data.size() + 848 StringTableMember.Padding.size(); 849 } 850 851 // We would like to detect if we need to switch to a 64-bit symbol table. 852 uint64_t LastMemberEndOffset = 0; 853 uint64_t LastMemberHeaderOffset = 0; 854 uint64_t NumSyms = 0; 855 for (const auto &M : Data) { 856 // Record the start of the member's offset 857 LastMemberHeaderOffset = LastMemberEndOffset; 858 // Account for the size of each part associated with the member. 859 LastMemberEndOffset += M.Header.size() + M.Data.size() + M.Padding.size(); 860 NumSyms += M.Symbols.size(); 861 } 862 863 std::optional<uint64_t> HeadersSize; 864 865 // The symbol table is put at the end of the big archive file. The symbol 866 // table is at the start of the archive file for other archive formats. 867 if (WriteSymtab && !is64BitKind(Kind)) { 868 // We assume 32-bit offsets to see if 32-bit symbols are possible or not. 869 HeadersSize = computeHeadersSize(Kind, Data.size(), StringTableSize, 870 NumSyms, SymNamesBuf.size(), 871 isCOFFArchive(Kind) ? &SymMap : nullptr); 872 873 // The SYM64 format is used when an archive's member offsets are larger than 874 // 32-bits can hold. The need for this shift in format is detected by 875 // writeArchive. To test this we need to generate a file with a member that 876 // has an offset larger than 32-bits but this demands a very slow test. To 877 // speed the test up we use this environment variable to pretend like the 878 // cutoff happens before 32-bits and instead happens at some much smaller 879 // value. 880 uint64_t Sym64Threshold = 1ULL << 32; 881 const char *Sym64Env = std::getenv("SYM64_THRESHOLD"); 882 if (Sym64Env) 883 StringRef(Sym64Env).getAsInteger(10, Sym64Threshold); 884 885 // If LastMemberHeaderOffset isn't going to fit in a 32-bit varible we need 886 // to switch to 64-bit. Note that the file can be larger than 4GB as long as 887 // the last member starts before the 4GB offset. 888 if (*HeadersSize + LastMemberHeaderOffset >= Sym64Threshold) { 889 if (Kind == object::Archive::K_DARWIN) 890 Kind = object::Archive::K_DARWIN64; 891 else 892 Kind = object::Archive::K_GNU64; 893 HeadersSize.reset(); 894 } 895 } 896 897 if (Thin) 898 Out << "!<thin>\n"; 899 else if (isAIXBigArchive(Kind)) 900 Out << "<bigaf>\n"; 901 else 902 Out << "!<arch>\n"; 903 904 if (!isAIXBigArchive(Kind)) { 905 if (WriteSymtab) { 906 if (!HeadersSize) 907 HeadersSize = computeHeadersSize( 908 Kind, Data.size(), StringTableSize, NumSyms, SymNamesBuf.size(), 909 isCOFFArchive(Kind) ? &SymMap : nullptr); 910 writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf, 911 *HeadersSize); 912 913 if (isCOFFArchive(Kind)) 914 writeSymbolMap(Out, Kind, Deterministic, Data, SymMap, *HeadersSize); 915 } 916 917 if (StringTableSize) 918 Out << StringTableMember.Header << StringTableMember.Data 919 << StringTableMember.Padding; 920 921 if (WriteSymtab && SymMap.ECMap.size()) 922 writeECSymbols(Out, Kind, Deterministic, Data, SymMap); 923 924 for (const MemberData &M : Data) 925 Out << M.Header << M.Data << M.Padding; 926 } else { 927 HeadersSize = sizeof(object::BigArchive::FixLenHdr); 928 LastMemberEndOffset += *HeadersSize; 929 LastMemberHeaderOffset += *HeadersSize; 930 931 // For the big archive (AIX) format, compute a table of member names and 932 // offsets, used in the member table. 933 uint64_t MemberTableNameStrTblSize = 0; 934 std::vector<size_t> MemberOffsets; 935 std::vector<StringRef> MemberNames; 936 // Loop across object to find offset and names. 937 uint64_t MemberEndOffset = sizeof(object::BigArchive::FixLenHdr); 938 for (size_t I = 0, Size = NewMembers.size(); I != Size; ++I) { 939 const NewArchiveMember &Member = NewMembers[I]; 940 MemberTableNameStrTblSize += Member.MemberName.size() + 1; 941 MemberOffsets.push_back(MemberEndOffset); 942 MemberNames.push_back(Member.MemberName); 943 // File member name ended with "`\n". The length is included in 944 // BigArMemHdrType. 945 MemberEndOffset += sizeof(object::BigArMemHdrType) + 946 alignTo(Data[I].Data.size(), 2) + 947 alignTo(Member.MemberName.size(), 2); 948 } 949 950 // AIX member table size. 951 unsigned MemberTableSize = 20 + // Number of members field 952 20 * MemberOffsets.size() + 953 MemberTableNameStrTblSize; 954 955 unsigned GlobalSymbolOffset = 956 (WriteSymtab && NumSyms > 0) 957 ? LastMemberEndOffset + 958 alignTo(sizeof(object::BigArMemHdrType) + MemberTableSize, 2) 959 : 0; 960 961 // Fixed Sized Header. 962 printWithSpacePadding(Out, NewMembers.size() ? LastMemberEndOffset : 0, 963 20); // Offset to member table 964 // If there are no file members in the archive, there will be no global 965 // symbol table. 966 printWithSpacePadding(Out, NewMembers.size() ? GlobalSymbolOffset : 0, 20); 967 printWithSpacePadding( 968 Out, 0, 969 20); // Offset to 64 bits global symbol table - Not supported yet 970 printWithSpacePadding( 971 Out, NewMembers.size() ? sizeof(object::BigArchive::FixLenHdr) : 0, 972 20); // Offset to first archive member 973 printWithSpacePadding(Out, NewMembers.size() ? LastMemberHeaderOffset : 0, 974 20); // Offset to last archive member 975 printWithSpacePadding( 976 Out, 0, 977 20); // Offset to first member of free list - Not supported yet 978 979 for (const MemberData &M : Data) { 980 Out << M.Header << M.Data; 981 if (M.Data.size() % 2) 982 Out << '\0'; 983 } 984 985 if (NewMembers.size()) { 986 // Member table. 987 printBigArchiveMemberHeader(Out, "", sys::toTimePoint(0), 0, 0, 0, 988 MemberTableSize, LastMemberHeaderOffset, 989 GlobalSymbolOffset); 990 printWithSpacePadding(Out, MemberOffsets.size(), 20); // Number of members 991 for (uint64_t MemberOffset : MemberOffsets) 992 printWithSpacePadding(Out, MemberOffset, 993 20); // Offset to member file header. 994 for (StringRef MemberName : MemberNames) 995 Out << MemberName << '\0'; // Member file name, null byte padding. 996 997 if (MemberTableNameStrTblSize % 2) 998 Out << '\0'; // Name table must be tail padded to an even number of 999 // bytes. 1000 1001 if (WriteSymtab && NumSyms > 0) 1002 writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf, 1003 *HeadersSize, LastMemberEndOffset); 1004 } 1005 } 1006 Out.flush(); 1007 return Error::success(); 1008 } 1009 1010 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers, 1011 bool WriteSymtab, object::Archive::Kind Kind, 1012 bool Deterministic, bool Thin, 1013 std::unique_ptr<MemoryBuffer> OldArchiveBuf, bool IsEC) { 1014 Expected<sys::fs::TempFile> Temp = 1015 sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a"); 1016 if (!Temp) 1017 return Temp.takeError(); 1018 raw_fd_ostream Out(Temp->FD, false); 1019 1020 if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind, 1021 Deterministic, Thin, IsEC)) { 1022 if (Error DiscardError = Temp->discard()) 1023 return joinErrors(std::move(E), std::move(DiscardError)); 1024 return E; 1025 } 1026 1027 // At this point, we no longer need whatever backing memory 1028 // was used to generate the NewMembers. On Windows, this buffer 1029 // could be a mapped view of the file we want to replace (if 1030 // we're updating an existing archive, say). In that case, the 1031 // rename would still succeed, but it would leave behind a 1032 // temporary file (actually the original file renamed) because 1033 // a file cannot be deleted while there's a handle open on it, 1034 // only renamed. So by freeing this buffer, this ensures that 1035 // the last open handle on the destination file, if any, is 1036 // closed before we attempt to rename. 1037 OldArchiveBuf.reset(); 1038 1039 return Temp->keep(ArcName); 1040 } 1041 1042 Expected<std::unique_ptr<MemoryBuffer>> 1043 writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers, bool WriteSymtab, 1044 object::Archive::Kind Kind, bool Deterministic, 1045 bool Thin) { 1046 SmallVector<char, 0> ArchiveBufferVector; 1047 raw_svector_ostream ArchiveStream(ArchiveBufferVector); 1048 1049 if (Error E = writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab, 1050 Kind, Deterministic, Thin, false)) 1051 return std::move(E); 1052 1053 return std::make_unique<SmallVectorMemoryBuffer>( 1054 std::move(ArchiveBufferVector), /*RequiresNullTerminator=*/false); 1055 } 1056 1057 } // namespace llvm 1058