1 //===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the writeArchive function. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Object/ArchiveWriter.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/StringMap.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/BinaryFormat/Magic.h" 18 #include "llvm/IR/LLVMContext.h" 19 #include "llvm/Object/Archive.h" 20 #include "llvm/Object/COFF.h" 21 #include "llvm/Object/Error.h" 22 #include "llvm/Object/IRObjectFile.h" 23 #include "llvm/Object/MachO.h" 24 #include "llvm/Object/ObjectFile.h" 25 #include "llvm/Object/SymbolicFile.h" 26 #include "llvm/Object/XCOFFObjectFile.h" 27 #include "llvm/Support/Alignment.h" 28 #include "llvm/Support/EndianStream.h" 29 #include "llvm/Support/Errc.h" 30 #include "llvm/Support/ErrorHandling.h" 31 #include "llvm/Support/Format.h" 32 #include "llvm/Support/MathExtras.h" 33 #include "llvm/Support/Path.h" 34 #include "llvm/Support/SmallVectorMemoryBuffer.h" 35 #include "llvm/Support/raw_ostream.h" 36 37 #include <map> 38 39 #if !defined(_MSC_VER) && !defined(__MINGW32__) 40 #include <unistd.h> 41 #else 42 #include <io.h> 43 #endif 44 45 using namespace llvm; 46 47 struct SymMap { 48 std::map<std::string, uint16_t> Map; 49 }; 50 51 NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) 52 : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), 53 MemberName(BufRef.getBufferIdentifier()) {} 54 55 object::Archive::Kind NewArchiveMember::detectKindFromObject() const { 56 auto MemBufferRef = this->Buf->getMemBufferRef(); 57 Expected<std::unique_ptr<object::ObjectFile>> OptionalObject = 58 object::ObjectFile::createObjectFile(MemBufferRef); 59 60 if (OptionalObject) 61 return isa<object::MachOObjectFile>(**OptionalObject) 62 ? object::Archive::K_DARWIN 63 : (isa<object::XCOFFObjectFile>(**OptionalObject) 64 ? object::Archive::K_AIXBIG 65 : object::Archive::K_GNU); 66 67 // Squelch the error in case we had a non-object file. 68 consumeError(OptionalObject.takeError()); 69 70 // If we're adding a bitcode file to the archive, detect the Archive kind 71 // based on the target triple. 72 LLVMContext Context; 73 if (identify_magic(MemBufferRef.getBuffer()) == file_magic::bitcode) { 74 if (auto ObjOrErr = object::SymbolicFile::createSymbolicFile( 75 MemBufferRef, file_magic::bitcode, &Context)) { 76 auto &IRObject = cast<object::IRObjectFile>(**ObjOrErr); 77 return Triple(IRObject.getTargetTriple()).isOSDarwin() 78 ? object::Archive::K_DARWIN 79 : object::Archive::K_GNU; 80 } else { 81 // Squelch the error in case this was not a SymbolicFile. 82 consumeError(ObjOrErr.takeError()); 83 } 84 } 85 86 return object::Archive::getDefaultKindForHost(); 87 } 88 89 Expected<NewArchiveMember> 90 NewArchiveMember::getOldMember(const object::Archive::Child &OldMember, 91 bool Deterministic) { 92 Expected<llvm::MemoryBufferRef> BufOrErr = OldMember.getMemoryBufferRef(); 93 if (!BufOrErr) 94 return BufOrErr.takeError(); 95 96 NewArchiveMember M; 97 M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false); 98 M.MemberName = M.Buf->getBufferIdentifier(); 99 if (!Deterministic) { 100 auto ModTimeOrErr = OldMember.getLastModified(); 101 if (!ModTimeOrErr) 102 return ModTimeOrErr.takeError(); 103 M.ModTime = ModTimeOrErr.get(); 104 Expected<unsigned> UIDOrErr = OldMember.getUID(); 105 if (!UIDOrErr) 106 return UIDOrErr.takeError(); 107 M.UID = UIDOrErr.get(); 108 Expected<unsigned> GIDOrErr = OldMember.getGID(); 109 if (!GIDOrErr) 110 return GIDOrErr.takeError(); 111 M.GID = GIDOrErr.get(); 112 Expected<sys::fs::perms> AccessModeOrErr = OldMember.getAccessMode(); 113 if (!AccessModeOrErr) 114 return AccessModeOrErr.takeError(); 115 M.Perms = AccessModeOrErr.get(); 116 } 117 return std::move(M); 118 } 119 120 Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName, 121 bool Deterministic) { 122 sys::fs::file_status Status; 123 auto FDOrErr = sys::fs::openNativeFileForRead(FileName); 124 if (!FDOrErr) 125 return FDOrErr.takeError(); 126 sys::fs::file_t FD = *FDOrErr; 127 assert(FD != sys::fs::kInvalidFile); 128 129 if (auto EC = sys::fs::status(FD, Status)) 130 return errorCodeToError(EC); 131 132 // Opening a directory doesn't make sense. Let it fail. 133 // Linux cannot open directories with open(2), although 134 // cygwin and *bsd can. 135 if (Status.type() == sys::fs::file_type::directory_file) 136 return errorCodeToError(make_error_code(errc::is_a_directory)); 137 138 ErrorOr<std::unique_ptr<MemoryBuffer>> MemberBufferOrErr = 139 MemoryBuffer::getOpenFile(FD, FileName, Status.getSize(), false); 140 if (!MemberBufferOrErr) 141 return errorCodeToError(MemberBufferOrErr.getError()); 142 143 if (auto EC = sys::fs::closeFile(FD)) 144 return errorCodeToError(EC); 145 146 NewArchiveMember M; 147 M.Buf = std::move(*MemberBufferOrErr); 148 M.MemberName = M.Buf->getBufferIdentifier(); 149 if (!Deterministic) { 150 M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>( 151 Status.getLastModificationTime()); 152 M.UID = Status.getUser(); 153 M.GID = Status.getGroup(); 154 M.Perms = Status.permissions(); 155 } 156 return std::move(M); 157 } 158 159 template <typename T> 160 static void printWithSpacePadding(raw_ostream &OS, T Data, unsigned Size) { 161 uint64_t OldPos = OS.tell(); 162 OS << Data; 163 unsigned SizeSoFar = OS.tell() - OldPos; 164 assert(SizeSoFar <= Size && "Data doesn't fit in Size"); 165 OS.indent(Size - SizeSoFar); 166 } 167 168 static bool isDarwin(object::Archive::Kind Kind) { 169 return Kind == object::Archive::K_DARWIN || 170 Kind == object::Archive::K_DARWIN64; 171 } 172 173 static bool isAIXBigArchive(object::Archive::Kind Kind) { 174 return Kind == object::Archive::K_AIXBIG; 175 } 176 177 static bool isCOFFArchive(object::Archive::Kind Kind) { 178 return Kind == object::Archive::K_COFF; 179 } 180 181 static bool isBSDLike(object::Archive::Kind Kind) { 182 switch (Kind) { 183 case object::Archive::K_GNU: 184 case object::Archive::K_GNU64: 185 case object::Archive::K_AIXBIG: 186 case object::Archive::K_COFF: 187 return false; 188 case object::Archive::K_BSD: 189 case object::Archive::K_DARWIN: 190 case object::Archive::K_DARWIN64: 191 return true; 192 } 193 llvm_unreachable("not supported for writting"); 194 } 195 196 template <class T> 197 static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { 198 support::endian::write(Out, Val, 199 isBSDLike(Kind) ? support::little : support::big); 200 } 201 202 template <class T> static void printLE(raw_ostream &Out, T Val) { 203 support::endian::write(Out, Val, support::little); 204 } 205 206 static void printRestOfMemberHeader( 207 raw_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime, 208 unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { 209 printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); 210 211 // The format has only 6 chars for uid and gid. Truncate if the provided 212 // values don't fit. 213 printWithSpacePadding(Out, UID % 1000000, 6); 214 printWithSpacePadding(Out, GID % 1000000, 6); 215 216 printWithSpacePadding(Out, format("%o", Perms), 8); 217 printWithSpacePadding(Out, Size, 10); 218 Out << "`\n"; 219 } 220 221 static void 222 printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name, 223 const sys::TimePoint<std::chrono::seconds> &ModTime, 224 unsigned UID, unsigned GID, unsigned Perms, 225 uint64_t Size) { 226 printWithSpacePadding(Out, Twine(Name) + "/", 16); 227 printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); 228 } 229 230 static void 231 printBSDMemberHeader(raw_ostream &Out, uint64_t Pos, StringRef Name, 232 const sys::TimePoint<std::chrono::seconds> &ModTime, 233 unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { 234 uint64_t PosAfterHeader = Pos + 60 + Name.size(); 235 // Pad so that even 64 bit object files are aligned. 236 unsigned Pad = offsetToAlignment(PosAfterHeader, Align(8)); 237 unsigned NameWithPadding = Name.size() + Pad; 238 printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16); 239 printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, 240 NameWithPadding + Size); 241 Out << Name; 242 while (Pad--) 243 Out.write(uint8_t(0)); 244 } 245 246 static void 247 printBigArchiveMemberHeader(raw_ostream &Out, StringRef Name, 248 const sys::TimePoint<std::chrono::seconds> &ModTime, 249 unsigned UID, unsigned GID, unsigned Perms, 250 uint64_t Size, unsigned PrevOffset, 251 unsigned NextOffset) { 252 unsigned NameLen = Name.size(); 253 254 printWithSpacePadding(Out, Size, 20); // File member size 255 printWithSpacePadding(Out, NextOffset, 20); // Next member header offset 256 printWithSpacePadding(Out, PrevOffset, 20); // Previous member header offset 257 printWithSpacePadding(Out, sys::toTimeT(ModTime), 12); // File member date 258 // The big archive format has 12 chars for uid and gid. 259 printWithSpacePadding(Out, UID % 1000000000000, 12); // UID 260 printWithSpacePadding(Out, GID % 1000000000000, 12); // GID 261 printWithSpacePadding(Out, format("%o", Perms), 12); // Permission 262 printWithSpacePadding(Out, NameLen, 4); // Name length 263 if (NameLen) { 264 printWithSpacePadding(Out, Name, NameLen); // Name 265 if (NameLen % 2) 266 Out.write(uint8_t(0)); // Null byte padding 267 } 268 Out << "`\n"; // Terminator 269 } 270 271 static bool useStringTable(bool Thin, StringRef Name) { 272 return Thin || Name.size() >= 16 || Name.contains('/'); 273 } 274 275 static bool is64BitKind(object::Archive::Kind Kind) { 276 switch (Kind) { 277 case object::Archive::K_GNU: 278 case object::Archive::K_BSD: 279 case object::Archive::K_DARWIN: 280 case object::Archive::K_COFF: 281 return false; 282 case object::Archive::K_AIXBIG: 283 case object::Archive::K_DARWIN64: 284 case object::Archive::K_GNU64: 285 return true; 286 } 287 llvm_unreachable("not supported for writting"); 288 } 289 290 static void 291 printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable, 292 StringMap<uint64_t> &MemberNames, object::Archive::Kind Kind, 293 bool Thin, const NewArchiveMember &M, 294 sys::TimePoint<std::chrono::seconds> ModTime, uint64_t Size) { 295 if (isBSDLike(Kind)) 296 return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID, 297 M.Perms, Size); 298 if (!useStringTable(Thin, M.MemberName)) 299 return printGNUSmallMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID, 300 M.Perms, Size); 301 Out << '/'; 302 uint64_t NamePos; 303 if (Thin) { 304 NamePos = StringTable.tell(); 305 StringTable << M.MemberName << "/\n"; 306 } else { 307 auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)}); 308 if (Insertion.second) { 309 Insertion.first->second = StringTable.tell(); 310 StringTable << M.MemberName; 311 if (isCOFFArchive(Kind)) 312 StringTable << '\0'; 313 else 314 StringTable << "/\n"; 315 } 316 NamePos = Insertion.first->second; 317 } 318 printWithSpacePadding(Out, NamePos, 15); 319 printRestOfMemberHeader(Out, ModTime, M.UID, M.GID, M.Perms, Size); 320 } 321 322 namespace { 323 struct MemberData { 324 std::vector<unsigned> Symbols; 325 std::string Header; 326 StringRef Data; 327 StringRef Padding; 328 }; 329 } // namespace 330 331 static MemberData computeStringTable(StringRef Names) { 332 unsigned Size = Names.size(); 333 unsigned Pad = offsetToAlignment(Size, Align(2)); 334 std::string Header; 335 raw_string_ostream Out(Header); 336 printWithSpacePadding(Out, "//", 48); 337 printWithSpacePadding(Out, Size + Pad, 10); 338 Out << "`\n"; 339 Out.flush(); 340 return {{}, std::move(Header), Names, Pad ? "\n" : ""}; 341 } 342 343 static sys::TimePoint<std::chrono::seconds> now(bool Deterministic) { 344 using namespace std::chrono; 345 346 if (!Deterministic) 347 return time_point_cast<seconds>(system_clock::now()); 348 return sys::TimePoint<seconds>(); 349 } 350 351 static bool isArchiveSymbol(const object::BasicSymbolRef &S) { 352 Expected<uint32_t> SymFlagsOrErr = S.getFlags(); 353 if (!SymFlagsOrErr) 354 // TODO: Actually report errors helpfully. 355 report_fatal_error(SymFlagsOrErr.takeError()); 356 if (*SymFlagsOrErr & object::SymbolRef::SF_FormatSpecific) 357 return false; 358 if (!(*SymFlagsOrErr & object::SymbolRef::SF_Global)) 359 return false; 360 if (*SymFlagsOrErr & object::SymbolRef::SF_Undefined) 361 return false; 362 return true; 363 } 364 365 static void printNBits(raw_ostream &Out, object::Archive::Kind Kind, 366 uint64_t Val) { 367 if (is64BitKind(Kind)) 368 print<uint64_t>(Out, Kind, Val); 369 else 370 print<uint32_t>(Out, Kind, Val); 371 } 372 373 static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, 374 uint64_t NumSyms, uint64_t OffsetSize, 375 uint64_t StringTableSize, 376 uint32_t *Padding = nullptr) { 377 assert((OffsetSize == 4 || OffsetSize == 8) && "Unsupported OffsetSize"); 378 uint64_t Size = OffsetSize; // Number of entries 379 if (isBSDLike(Kind)) 380 Size += NumSyms * OffsetSize * 2; // Table 381 else 382 Size += NumSyms * OffsetSize; // Table 383 if (isBSDLike(Kind)) 384 Size += OffsetSize; // byte count 385 Size += StringTableSize; 386 // ld64 expects the members to be 8-byte aligned for 64-bit content and at 387 // least 4-byte aligned for 32-bit content. Opt for the larger encoding 388 // uniformly. 389 // We do this for all bsd formats because it simplifies aligning members. 390 // For the big archive format, the symbol table is the last member, so there 391 // is no need to align. 392 uint32_t Pad = isAIXBigArchive(Kind) 393 ? 0 394 : offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2)); 395 396 Size += Pad; 397 if (Padding) 398 *Padding = Pad; 399 return Size; 400 } 401 402 static uint64_t computeSymbolMapSize(uint64_t NumObj, SymMap &SymMap, 403 uint32_t *Padding = nullptr) { 404 uint64_t Size = sizeof(uint32_t) * 2; // Number of symbols and objects entries 405 Size += NumObj * sizeof(uint32_t); // Offset table 406 407 for (auto S : SymMap.Map) 408 Size += sizeof(uint16_t) + S.first.length() + 1; 409 410 uint32_t Pad = offsetToAlignment(Size, Align(2)); 411 Size += Pad; 412 if (Padding) 413 *Padding = Pad; 414 return Size; 415 } 416 417 static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind, 418 bool Deterministic, uint64_t Size, 419 uint64_t PrevMemberOffset = 0) { 420 if (isBSDLike(Kind)) { 421 const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF"; 422 printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0, 423 Size); 424 } else if (isAIXBigArchive(Kind)) { 425 printBigArchiveMemberHeader(Out, "", now(Deterministic), 0, 0, 426 0, Size, PrevMemberOffset, 0); 427 } else { 428 const char *Name = is64BitKind(Kind) ? "/SYM64" : ""; 429 printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size); 430 } 431 } 432 433 static uint64_t computeHeadersSize(object::Archive::Kind Kind, 434 uint64_t NumMembers, 435 uint64_t StringMemberSize, uint64_t NumSyms, 436 uint64_t SymNamesSize, SymMap *SymMap) { 437 uint32_t OffsetSize = is64BitKind(Kind) ? 8 : 4; 438 uint64_t SymtabSize = 439 computeSymbolTableSize(Kind, NumSyms, OffsetSize, SymNamesSize); 440 auto computeSymbolTableHeaderSize = [=] { 441 SmallString<0> TmpBuf; 442 raw_svector_ostream Tmp(TmpBuf); 443 writeSymbolTableHeader(Tmp, Kind, true, SymtabSize); 444 return TmpBuf.size(); 445 }; 446 uint32_t HeaderSize = computeSymbolTableHeaderSize(); 447 uint64_t Size = strlen("!<arch>\n") + HeaderSize + SymtabSize; 448 449 if (SymMap) 450 Size += HeaderSize + computeSymbolMapSize(NumMembers, *SymMap); 451 452 return Size + StringMemberSize; 453 } 454 455 static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, 456 bool Deterministic, ArrayRef<MemberData> Members, 457 StringRef StringTable, uint64_t MembersOffset, 458 uint64_t PrevMemberOffset = 0) { 459 // We don't write a symbol table on an archive with no members -- except on 460 // Darwin, where the linker will abort unless the archive has a symbol table. 461 if (StringTable.empty() && !isDarwin(Kind) && !isCOFFArchive(Kind)) 462 return; 463 464 unsigned NumSyms = 0; 465 for (const MemberData &M : Members) 466 NumSyms += M.Symbols.size(); 467 468 uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4; 469 uint32_t Pad; 470 uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, 471 StringTable.size(), &Pad); 472 writeSymbolTableHeader(Out, Kind, Deterministic, Size, PrevMemberOffset); 473 474 if (isBSDLike(Kind)) 475 printNBits(Out, Kind, NumSyms * 2 * OffsetSize); 476 else 477 printNBits(Out, Kind, NumSyms); 478 479 uint64_t Pos = MembersOffset; 480 for (const MemberData &M : Members) { 481 for (unsigned StringOffset : M.Symbols) { 482 if (isBSDLike(Kind)) 483 printNBits(Out, Kind, StringOffset); 484 printNBits(Out, Kind, Pos); // member offset 485 } 486 Pos += M.Header.size() + M.Data.size() + M.Padding.size(); 487 } 488 489 if (isBSDLike(Kind)) 490 // byte count of the string table 491 printNBits(Out, Kind, StringTable.size()); 492 Out << StringTable; 493 494 while (Pad--) 495 Out.write(uint8_t(0)); 496 } 497 498 static void writeSymbolMap(raw_ostream &Out, object::Archive::Kind Kind, 499 bool Deterministic, ArrayRef<MemberData> Members, 500 SymMap &SymMap, uint64_t MembersOffset) { 501 uint32_t Pad; 502 uint64_t Size = computeSymbolMapSize(Members.size(), SymMap, &Pad); 503 writeSymbolTableHeader(Out, Kind, Deterministic, Size, 0); 504 505 uint32_t Pos = MembersOffset; 506 507 printLE<uint32_t>(Out, Members.size()); 508 for (const MemberData &M : Members) { 509 printLE(Out, Pos); // member offset 510 Pos += M.Header.size() + M.Data.size() + M.Padding.size(); 511 } 512 513 printLE<uint32_t>(Out, SymMap.Map.size()); 514 515 for (auto S : SymMap.Map) 516 printLE(Out, S.second); 517 for (auto S : SymMap.Map) 518 Out << S.first << '\0'; 519 520 while (Pad--) 521 Out.write(uint8_t(0)); 522 } 523 524 static Expected<std::vector<unsigned>> 525 getSymbols(MemoryBufferRef Buf, uint16_t Index, raw_ostream &SymNames, 526 SymMap *SymMap, bool &HasObject) { 527 std::vector<unsigned> Ret; 528 529 // In the scenario when LLVMContext is populated SymbolicFile will contain a 530 // reference to it, thus SymbolicFile should be destroyed first. 531 LLVMContext Context; 532 std::unique_ptr<object::SymbolicFile> Obj; 533 534 const file_magic Type = identify_magic(Buf.getBuffer()); 535 // Treat unsupported file types as having no symbols. 536 if (!object::SymbolicFile::isSymbolicFile(Type, &Context)) 537 return Ret; 538 if (Type == file_magic::bitcode) { 539 auto ObjOrErr = object::SymbolicFile::createSymbolicFile( 540 Buf, file_magic::bitcode, &Context); 541 if (!ObjOrErr) 542 return ObjOrErr.takeError(); 543 Obj = std::move(*ObjOrErr); 544 } else { 545 auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf); 546 if (!ObjOrErr) 547 return ObjOrErr.takeError(); 548 Obj = std::move(*ObjOrErr); 549 } 550 551 HasObject = true; 552 for (const object::BasicSymbolRef &S : Obj->symbols()) { 553 if (!isArchiveSymbol(S)) 554 continue; 555 if (SymMap) { 556 std::string Name; 557 raw_string_ostream NameStream(Name); 558 if (Error E = S.printName(NameStream)) 559 return std::move(E); 560 if (SymMap->Map.find(Name) != SymMap->Map.end()) 561 continue; // ignore duplicated symbol 562 SymMap->Map[Name] = Index; 563 Ret.push_back(SymNames.tell()); 564 SymNames << Name << '\0'; 565 } else { 566 Ret.push_back(SymNames.tell()); 567 if (Error E = S.printName(SymNames)) 568 return std::move(E); 569 SymNames << '\0'; 570 } 571 } 572 return Ret; 573 } 574 575 static Expected<std::vector<MemberData>> 576 computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, 577 object::Archive::Kind Kind, bool Thin, bool Deterministic, 578 bool NeedSymbols, SymMap *SymMap, 579 ArrayRef<NewArchiveMember> NewMembers) { 580 static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; 581 582 uint64_t Pos = 583 isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) : 0; 584 585 std::vector<MemberData> Ret; 586 bool HasObject = false; 587 588 // Deduplicate long member names in the string table and reuse earlier name 589 // offsets. This especially saves space for COFF Import libraries where all 590 // members have the same name. 591 StringMap<uint64_t> MemberNames; 592 593 // UniqueTimestamps is a special case to improve debugging on Darwin: 594 // 595 // The Darwin linker does not link debug info into the final 596 // binary. Instead, it emits entries of type N_OSO in in the output 597 // binary's symbol table, containing references to the linked-in 598 // object files. Using that reference, the debugger can read the 599 // debug data directly from the object files. Alternatively, an 600 // invocation of 'dsymutil' will link the debug data from the object 601 // files into a dSYM bundle, which can be loaded by the debugger, 602 // instead of the object files. 603 // 604 // For an object file, the N_OSO entries contain the absolute path 605 // path to the file, and the file's timestamp. For an object 606 // included in an archive, the path is formatted like 607 // "/absolute/path/to/archive.a(member.o)", and the timestamp is the 608 // archive member's timestamp, rather than the archive's timestamp. 609 // 610 // However, this doesn't always uniquely identify an object within 611 // an archive -- an archive file can have multiple entries with the 612 // same filename. (This will happen commonly if the original object 613 // files started in different directories.) The only way they get 614 // distinguished, then, is via the timestamp. But this process is 615 // unable to find the correct object file in the archive when there 616 // are two files of the same name and timestamp. 617 // 618 // Additionally, timestamp==0 is treated specially, and causes the 619 // timestamp to be ignored as a match criteria. 620 // 621 // That will "usually" work out okay when creating an archive not in 622 // deterministic timestamp mode, because the objects will probably 623 // have been created at different timestamps. 624 // 625 // To ameliorate this problem, in deterministic archive mode (which 626 // is the default), on Darwin we will emit a unique non-zero 627 // timestamp for each entry with a duplicated name. This is still 628 // deterministic: the only thing affecting that timestamp is the 629 // order of the files in the resultant archive. 630 // 631 // See also the functions that handle the lookup: 632 // in lldb: ObjectContainerBSDArchive::Archive::FindObject() 633 // in llvm/tools/dsymutil: BinaryHolder::GetArchiveMemberBuffers(). 634 bool UniqueTimestamps = Deterministic && isDarwin(Kind); 635 std::map<StringRef, unsigned> FilenameCount; 636 if (UniqueTimestamps) { 637 for (const NewArchiveMember &M : NewMembers) 638 FilenameCount[M.MemberName]++; 639 for (auto &Entry : FilenameCount) 640 Entry.second = Entry.second > 1 ? 1 : 0; 641 } 642 643 // The big archive format needs to know the offset of the previous member 644 // header. 645 unsigned PrevOffset = 0, Index = 0; 646 for (const NewArchiveMember &M : NewMembers) { 647 std::string Header; 648 raw_string_ostream Out(Header); 649 650 MemoryBufferRef Buf = M.Buf->getMemBufferRef(); 651 StringRef Data = Thin ? "" : Buf.getBuffer(); 652 653 Index++; 654 655 // ld64 expects the members to be 8-byte aligned for 64-bit content and at 656 // least 4-byte aligned for 32-bit content. Opt for the larger encoding 657 // uniformly. This matches the behaviour with cctools and ensures that ld64 658 // is happy with archives that we generate. 659 unsigned MemberPadding = 660 isDarwin(Kind) ? offsetToAlignment(Data.size(), Align(8)) : 0; 661 unsigned TailPadding = 662 offsetToAlignment(Data.size() + MemberPadding, Align(2)); 663 StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding); 664 665 sys::TimePoint<std::chrono::seconds> ModTime; 666 if (UniqueTimestamps) 667 // Increment timestamp for each file of a given name. 668 ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++); 669 else 670 ModTime = M.ModTime; 671 672 uint64_t Size = Buf.getBufferSize() + MemberPadding; 673 if (Size > object::Archive::MaxMemberSize) { 674 std::string StringMsg = 675 "File " + M.MemberName.str() + " exceeds size limit"; 676 return make_error<object::GenericBinaryError>( 677 std::move(StringMsg), object::object_error::parse_failed); 678 } 679 680 if (isAIXBigArchive(Kind)) { 681 unsigned NextOffset = Pos + sizeof(object::BigArMemHdrType) + 682 alignTo(M.MemberName.size(), 2) + alignTo(Size, 2); 683 printBigArchiveMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID, 684 M.Perms, Size, PrevOffset, NextOffset); 685 PrevOffset = Pos; 686 } else { 687 printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M, 688 ModTime, Size); 689 } 690 Out.flush(); 691 692 std::vector<unsigned> Symbols; 693 if (NeedSymbols) { 694 Expected<std::vector<unsigned>> SymbolsOrErr = 695 getSymbols(Buf, Index, SymNames, SymMap, HasObject); 696 if (!SymbolsOrErr) 697 return createFileError(M.MemberName, SymbolsOrErr.takeError()); 698 Symbols = std::move(*SymbolsOrErr); 699 } 700 701 Pos += Header.size() + Data.size() + Padding.size(); 702 Ret.push_back({std::move(Symbols), std::move(Header), Data, Padding}); 703 } 704 // If there are no symbols, emit an empty symbol table, to satisfy Solaris 705 // tools, older versions of which expect a symbol table in a non-empty 706 // archive, regardless of whether there are any symbols in it. 707 if (HasObject && SymNames.tell() == 0 && !isCOFFArchive(Kind)) 708 SymNames << '\0' << '\0' << '\0'; 709 return Ret; 710 } 711 712 namespace llvm { 713 714 static ErrorOr<SmallString<128>> canonicalizePath(StringRef P) { 715 SmallString<128> Ret = P; 716 std::error_code Err = sys::fs::make_absolute(Ret); 717 if (Err) 718 return Err; 719 sys::path::remove_dots(Ret, /*removedotdot*/ true); 720 return Ret; 721 } 722 723 // Compute the relative path from From to To. 724 Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) { 725 ErrorOr<SmallString<128>> PathToOrErr = canonicalizePath(To); 726 ErrorOr<SmallString<128>> DirFromOrErr = canonicalizePath(From); 727 if (!PathToOrErr || !DirFromOrErr) 728 return errorCodeToError(std::error_code(errno, std::generic_category())); 729 730 const SmallString<128> &PathTo = *PathToOrErr; 731 const SmallString<128> &DirFrom = sys::path::parent_path(*DirFromOrErr); 732 733 // Can't construct a relative path between different roots 734 if (sys::path::root_name(PathTo) != sys::path::root_name(DirFrom)) 735 return sys::path::convert_to_slash(PathTo); 736 737 // Skip common prefixes 738 auto FromTo = 739 std::mismatch(sys::path::begin(DirFrom), sys::path::end(DirFrom), 740 sys::path::begin(PathTo)); 741 auto FromI = FromTo.first; 742 auto ToI = FromTo.second; 743 744 // Construct relative path 745 SmallString<128> Relative; 746 for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI) 747 sys::path::append(Relative, sys::path::Style::posix, ".."); 748 749 for (auto ToE = sys::path::end(PathTo); ToI != ToE; ++ToI) 750 sys::path::append(Relative, sys::path::Style::posix, *ToI); 751 752 return std::string(Relative.str()); 753 } 754 755 static Error writeArchiveToStream(raw_ostream &Out, 756 ArrayRef<NewArchiveMember> NewMembers, 757 bool WriteSymtab, object::Archive::Kind Kind, 758 bool Deterministic, bool Thin) { 759 assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode"); 760 761 SmallString<0> SymNamesBuf; 762 raw_svector_ostream SymNames(SymNamesBuf); 763 SmallString<0> StringTableBuf; 764 raw_svector_ostream StringTable(StringTableBuf); 765 SymMap SymMap; 766 767 // COFF symbol map uses 16-bit indexes, so we can't use it if there are too 768 // many members. 769 if (isCOFFArchive(Kind) && NewMembers.size() > 0xfffe) 770 Kind = object::Archive::K_GNU; 771 772 Expected<std::vector<MemberData>> DataOrErr = computeMemberData( 773 StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab, 774 isCOFFArchive(Kind) ? &SymMap : nullptr, NewMembers); 775 if (Error E = DataOrErr.takeError()) 776 return E; 777 std::vector<MemberData> &Data = *DataOrErr; 778 779 uint64_t StringTableSize = 0; 780 MemberData StringTableMember; 781 if (!StringTableBuf.empty() && !isAIXBigArchive(Kind)) { 782 StringTableMember = computeStringTable(StringTableBuf); 783 StringTableSize = StringTableMember.Header.size() + 784 StringTableMember.Data.size() + 785 StringTableMember.Padding.size(); 786 } 787 788 // We would like to detect if we need to switch to a 64-bit symbol table. 789 uint64_t LastMemberEndOffset = 0; 790 uint64_t LastMemberHeaderOffset = 0; 791 uint64_t NumSyms = 0; 792 for (const auto &M : Data) { 793 // Record the start of the member's offset 794 LastMemberHeaderOffset = LastMemberEndOffset; 795 // Account for the size of each part associated with the member. 796 LastMemberEndOffset += M.Header.size() + M.Data.size() + M.Padding.size(); 797 NumSyms += M.Symbols.size(); 798 } 799 800 std::optional<uint64_t> HeadersSize; 801 802 // The symbol table is put at the end of the big archive file. The symbol 803 // table is at the start of the archive file for other archive formats. 804 if (WriteSymtab && !is64BitKind(Kind)) { 805 // We assume 32-bit offsets to see if 32-bit symbols are possible or not. 806 HeadersSize = computeHeadersSize(Kind, Data.size(), StringTableSize, 807 NumSyms, SymNamesBuf.size(), 808 isCOFFArchive(Kind) ? &SymMap : nullptr); 809 810 // The SYM64 format is used when an archive's member offsets are larger than 811 // 32-bits can hold. The need for this shift in format is detected by 812 // writeArchive. To test this we need to generate a file with a member that 813 // has an offset larger than 32-bits but this demands a very slow test. To 814 // speed the test up we use this environment variable to pretend like the 815 // cutoff happens before 32-bits and instead happens at some much smaller 816 // value. 817 uint64_t Sym64Threshold = 1ULL << 32; 818 const char *Sym64Env = std::getenv("SYM64_THRESHOLD"); 819 if (Sym64Env) 820 StringRef(Sym64Env).getAsInteger(10, Sym64Threshold); 821 822 // If LastMemberHeaderOffset isn't going to fit in a 32-bit varible we need 823 // to switch to 64-bit. Note that the file can be larger than 4GB as long as 824 // the last member starts before the 4GB offset. 825 if (*HeadersSize + LastMemberHeaderOffset >= Sym64Threshold) { 826 if (Kind == object::Archive::K_DARWIN) 827 Kind = object::Archive::K_DARWIN64; 828 else 829 Kind = object::Archive::K_GNU64; 830 HeadersSize.reset(); 831 } 832 } 833 834 if (Thin) 835 Out << "!<thin>\n"; 836 else if (isAIXBigArchive(Kind)) 837 Out << "<bigaf>\n"; 838 else 839 Out << "!<arch>\n"; 840 841 if (!isAIXBigArchive(Kind)) { 842 if (WriteSymtab) { 843 if (!HeadersSize) 844 HeadersSize = computeHeadersSize( 845 Kind, Data.size(), StringTableSize, NumSyms, SymNamesBuf.size(), 846 isCOFFArchive(Kind) ? &SymMap : nullptr); 847 writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf, 848 *HeadersSize); 849 850 if (isCOFFArchive(Kind)) 851 writeSymbolMap(Out, Kind, Deterministic, Data, SymMap, *HeadersSize); 852 } 853 854 if (StringTableSize) 855 Out << StringTableMember.Header << StringTableMember.Data 856 << StringTableMember.Padding; 857 858 for (const MemberData &M : Data) 859 Out << M.Header << M.Data << M.Padding; 860 } else { 861 HeadersSize = sizeof(object::BigArchive::FixLenHdr); 862 LastMemberEndOffset += *HeadersSize; 863 LastMemberHeaderOffset += *HeadersSize; 864 865 // For the big archive (AIX) format, compute a table of member names and 866 // offsets, used in the member table. 867 uint64_t MemberTableNameStrTblSize = 0; 868 std::vector<size_t> MemberOffsets; 869 std::vector<StringRef> MemberNames; 870 // Loop across object to find offset and names. 871 uint64_t MemberEndOffset = sizeof(object::BigArchive::FixLenHdr); 872 for (size_t I = 0, Size = NewMembers.size(); I != Size; ++I) { 873 const NewArchiveMember &Member = NewMembers[I]; 874 MemberTableNameStrTblSize += Member.MemberName.size() + 1; 875 MemberOffsets.push_back(MemberEndOffset); 876 MemberNames.push_back(Member.MemberName); 877 // File member name ended with "`\n". The length is included in 878 // BigArMemHdrType. 879 MemberEndOffset += sizeof(object::BigArMemHdrType) + 880 alignTo(Data[I].Data.size(), 2) + 881 alignTo(Member.MemberName.size(), 2); 882 } 883 884 // AIX member table size. 885 unsigned MemberTableSize = 20 + // Number of members field 886 20 * MemberOffsets.size() + 887 MemberTableNameStrTblSize; 888 889 unsigned GlobalSymbolOffset = 890 (WriteSymtab && NumSyms > 0) 891 ? LastMemberEndOffset + 892 alignTo(sizeof(object::BigArMemHdrType) + MemberTableSize, 2) 893 : 0; 894 895 // Fixed Sized Header. 896 printWithSpacePadding(Out, NewMembers.size() ? LastMemberEndOffset : 0, 897 20); // Offset to member table 898 // If there are no file members in the archive, there will be no global 899 // symbol table. 900 printWithSpacePadding(Out, NewMembers.size() ? GlobalSymbolOffset : 0, 20); 901 printWithSpacePadding( 902 Out, 0, 903 20); // Offset to 64 bits global symbol table - Not supported yet 904 printWithSpacePadding( 905 Out, NewMembers.size() ? sizeof(object::BigArchive::FixLenHdr) : 0, 906 20); // Offset to first archive member 907 printWithSpacePadding(Out, NewMembers.size() ? LastMemberHeaderOffset : 0, 908 20); // Offset to last archive member 909 printWithSpacePadding( 910 Out, 0, 911 20); // Offset to first member of free list - Not supported yet 912 913 for (const MemberData &M : Data) { 914 Out << M.Header << M.Data; 915 if (M.Data.size() % 2) 916 Out << '\0'; 917 } 918 919 if (NewMembers.size()) { 920 // Member table. 921 printBigArchiveMemberHeader(Out, "", sys::toTimePoint(0), 0, 0, 0, 922 MemberTableSize, LastMemberHeaderOffset, 923 GlobalSymbolOffset); 924 printWithSpacePadding(Out, MemberOffsets.size(), 20); // Number of members 925 for (uint64_t MemberOffset : MemberOffsets) 926 printWithSpacePadding(Out, MemberOffset, 927 20); // Offset to member file header. 928 for (StringRef MemberName : MemberNames) 929 Out << MemberName << '\0'; // Member file name, null byte padding. 930 931 if (MemberTableNameStrTblSize % 2) 932 Out << '\0'; // Name table must be tail padded to an even number of 933 // bytes. 934 935 if (WriteSymtab && NumSyms > 0) 936 writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf, 937 *HeadersSize, LastMemberEndOffset); 938 } 939 } 940 Out.flush(); 941 return Error::success(); 942 } 943 944 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers, 945 bool WriteSymtab, object::Archive::Kind Kind, 946 bool Deterministic, bool Thin, 947 std::unique_ptr<MemoryBuffer> OldArchiveBuf) { 948 Expected<sys::fs::TempFile> Temp = 949 sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a"); 950 if (!Temp) 951 return Temp.takeError(); 952 raw_fd_ostream Out(Temp->FD, false); 953 954 if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind, 955 Deterministic, Thin)) { 956 if (Error DiscardError = Temp->discard()) 957 return joinErrors(std::move(E), std::move(DiscardError)); 958 return E; 959 } 960 961 // At this point, we no longer need whatever backing memory 962 // was used to generate the NewMembers. On Windows, this buffer 963 // could be a mapped view of the file we want to replace (if 964 // we're updating an existing archive, say). In that case, the 965 // rename would still succeed, but it would leave behind a 966 // temporary file (actually the original file renamed) because 967 // a file cannot be deleted while there's a handle open on it, 968 // only renamed. So by freeing this buffer, this ensures that 969 // the last open handle on the destination file, if any, is 970 // closed before we attempt to rename. 971 OldArchiveBuf.reset(); 972 973 return Temp->keep(ArcName); 974 } 975 976 Expected<std::unique_ptr<MemoryBuffer>> 977 writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers, bool WriteSymtab, 978 object::Archive::Kind Kind, bool Deterministic, 979 bool Thin) { 980 SmallVector<char, 0> ArchiveBufferVector; 981 raw_svector_ostream ArchiveStream(ArchiveBufferVector); 982 983 if (Error E = writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab, 984 Kind, Deterministic, Thin)) 985 return std::move(E); 986 987 return std::make_unique<SmallVectorMemoryBuffer>( 988 std::move(ArchiveBufferVector), /*RequiresNullTerminator=*/false); 989 } 990 991 } // namespace llvm 992