xref: /llvm-project/clang/lib/Driver/OffloadBundler.cpp (revision e87b843811e147db8d1edd7fe2dd52bb90be6ebc)
1 //===- OffloadBundler.cpp - File Bundling and Unbundling ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements an offload bundling API that bundles different files
11 /// that relate with the same source code but different targets into a single
12 /// one. Also the implements the opposite functionality, i.e. unbundle files
13 /// previous created by this API.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "clang/Driver/OffloadBundler.h"
18 #include "clang/Basic/Cuda.h"
19 #include "clang/Basic/TargetID.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringMap.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/BinaryFormat/Magic.h"
27 #include "llvm/Object/Archive.h"
28 #include "llvm/Object/ArchiveWriter.h"
29 #include "llvm/Object/Binary.h"
30 #include "llvm/Object/ObjectFile.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/Compression.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/EndianStream.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/ErrorOr.h"
38 #include "llvm/Support/FileSystem.h"
39 #include "llvm/Support/MD5.h"
40 #include "llvm/Support/ManagedStatic.h"
41 #include "llvm/Support/MemoryBuffer.h"
42 #include "llvm/Support/Path.h"
43 #include "llvm/Support/Program.h"
44 #include "llvm/Support/Signals.h"
45 #include "llvm/Support/StringSaver.h"
46 #include "llvm/Support/Timer.h"
47 #include "llvm/Support/WithColor.h"
48 #include "llvm/Support/raw_ostream.h"
49 #include "llvm/TargetParser/Host.h"
50 #include "llvm/TargetParser/Triple.h"
51 #include <algorithm>
52 #include <cassert>
53 #include <cstddef>
54 #include <cstdint>
55 #include <forward_list>
56 #include <llvm/Support/Process.h>
57 #include <memory>
58 #include <set>
59 #include <string>
60 #include <system_error>
61 #include <utility>
62 
63 using namespace llvm;
64 using namespace llvm::object;
65 using namespace clang;
66 
67 namespace {
68 struct CreateClangOffloadBundlerTimerGroup {
69   static void *call() {
70     return new TimerGroup("Clang Offload Bundler Timer Group",
71                           "Timer group for clang offload bundler");
72   }
73 };
74 } // namespace
75 static llvm::ManagedStatic<llvm::TimerGroup,
76                            CreateClangOffloadBundlerTimerGroup>
77     ClangOffloadBundlerTimerGroup;
78 
79 /// Magic string that marks the existence of offloading data.
80 #define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
81 
82 OffloadTargetInfo::OffloadTargetInfo(const StringRef Target,
83                                      const OffloadBundlerConfig &BC)
84     : BundlerConfig(BC) {
85 
86   // TODO: Add error checking from ClangOffloadBundler.cpp
87   auto TargetFeatures = Target.split(':');
88   auto TripleOrGPU = TargetFeatures.first.rsplit('-');
89 
90   if (clang::StringToOffloadArch(TripleOrGPU.second) !=
91       clang::OffloadArch::UNKNOWN) {
92     auto KindTriple = TripleOrGPU.first.split('-');
93     this->OffloadKind = KindTriple.first;
94 
95     // Enforce optional env field to standardize bundles
96     llvm::Triple t = llvm::Triple(KindTriple.second);
97     this->Triple = llvm::Triple(t.getArchName(), t.getVendorName(),
98                                 t.getOSName(), t.getEnvironmentName());
99 
100     this->TargetID = Target.substr(Target.find(TripleOrGPU.second));
101   } else {
102     auto KindTriple = TargetFeatures.first.split('-');
103     this->OffloadKind = KindTriple.first;
104 
105     // Enforce optional env field to standardize bundles
106     llvm::Triple t = llvm::Triple(KindTriple.second);
107     this->Triple = llvm::Triple(t.getArchName(), t.getVendorName(),
108                                 t.getOSName(), t.getEnvironmentName());
109 
110     this->TargetID = "";
111   }
112 }
113 
114 bool OffloadTargetInfo::hasHostKind() const {
115   return this->OffloadKind == "host";
116 }
117 
118 bool OffloadTargetInfo::isOffloadKindValid() const {
119   return OffloadKind == "host" || OffloadKind == "openmp" ||
120          OffloadKind == "hip" || OffloadKind == "hipv4";
121 }
122 
123 bool OffloadTargetInfo::isOffloadKindCompatible(
124     const StringRef TargetOffloadKind) const {
125   if ((OffloadKind == TargetOffloadKind) ||
126       (OffloadKind == "hip" && TargetOffloadKind == "hipv4") ||
127       (OffloadKind == "hipv4" && TargetOffloadKind == "hip"))
128     return true;
129 
130   if (BundlerConfig.HipOpenmpCompatible) {
131     bool HIPCompatibleWithOpenMP = OffloadKind.starts_with_insensitive("hip") &&
132                                    TargetOffloadKind == "openmp";
133     bool OpenMPCompatibleWithHIP =
134         OffloadKind == "openmp" &&
135         TargetOffloadKind.starts_with_insensitive("hip");
136     return HIPCompatibleWithOpenMP || OpenMPCompatibleWithHIP;
137   }
138   return false;
139 }
140 
141 bool OffloadTargetInfo::isTripleValid() const {
142   return !Triple.str().empty() && Triple.getArch() != Triple::UnknownArch;
143 }
144 
145 bool OffloadTargetInfo::operator==(const OffloadTargetInfo &Target) const {
146   return OffloadKind == Target.OffloadKind &&
147          Triple.isCompatibleWith(Target.Triple) && TargetID == Target.TargetID;
148 }
149 
150 std::string OffloadTargetInfo::str() const {
151   return Twine(OffloadKind + "-" + Triple.str() + "-" + TargetID).str();
152 }
153 
154 static StringRef getDeviceFileExtension(StringRef Device,
155                                         StringRef BundleFileName) {
156   if (Device.contains("gfx"))
157     return ".bc";
158   if (Device.contains("sm_"))
159     return ".cubin";
160   return sys::path::extension(BundleFileName);
161 }
162 
163 static std::string getDeviceLibraryFileName(StringRef BundleFileName,
164                                             StringRef Device) {
165   StringRef LibName = sys::path::stem(BundleFileName);
166   StringRef Extension = getDeviceFileExtension(Device, BundleFileName);
167 
168   std::string Result;
169   Result += LibName;
170   Result += Extension;
171   return Result;
172 }
173 
174 namespace {
175 /// Generic file handler interface.
176 class FileHandler {
177 public:
178   struct BundleInfo {
179     StringRef BundleID;
180   };
181 
182   FileHandler() {}
183 
184   virtual ~FileHandler() {}
185 
186   /// Update the file handler with information from the header of the bundled
187   /// file.
188   virtual Error ReadHeader(MemoryBuffer &Input) = 0;
189 
190   /// Read the marker of the next bundled to be read in the file. The bundle
191   /// name is returned if there is one in the file, or `std::nullopt` if there
192   /// are no more bundles to be read.
193   virtual Expected<std::optional<StringRef>>
194   ReadBundleStart(MemoryBuffer &Input) = 0;
195 
196   /// Read the marker that closes the current bundle.
197   virtual Error ReadBundleEnd(MemoryBuffer &Input) = 0;
198 
199   /// Read the current bundle and write the result into the stream \a OS.
200   virtual Error ReadBundle(raw_ostream &OS, MemoryBuffer &Input) = 0;
201 
202   /// Write the header of the bundled file to \a OS based on the information
203   /// gathered from \a Inputs.
204   virtual Error WriteHeader(raw_ostream &OS,
205                             ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) = 0;
206 
207   /// Write the marker that initiates a bundle for the triple \a TargetTriple to
208   /// \a OS.
209   virtual Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) = 0;
210 
211   /// Write the marker that closes a bundle for the triple \a TargetTriple to \a
212   /// OS.
213   virtual Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) = 0;
214 
215   /// Write the bundle from \a Input into \a OS.
216   virtual Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) = 0;
217 
218   /// Finalize output file.
219   virtual Error finalizeOutputFile() { return Error::success(); }
220 
221   /// List bundle IDs in \a Input.
222   virtual Error listBundleIDs(MemoryBuffer &Input) {
223     if (Error Err = ReadHeader(Input))
224       return Err;
225     return forEachBundle(Input, [&](const BundleInfo &Info) -> Error {
226       llvm::outs() << Info.BundleID << '\n';
227       Error Err = listBundleIDsCallback(Input, Info);
228       if (Err)
229         return Err;
230       return Error::success();
231     });
232   }
233 
234   /// Get bundle IDs in \a Input in \a BundleIds.
235   virtual Error getBundleIDs(MemoryBuffer &Input,
236                              std::set<StringRef> &BundleIds) {
237     if (Error Err = ReadHeader(Input))
238       return Err;
239     return forEachBundle(Input, [&](const BundleInfo &Info) -> Error {
240       BundleIds.insert(Info.BundleID);
241       Error Err = listBundleIDsCallback(Input, Info);
242       if (Err)
243         return Err;
244       return Error::success();
245     });
246   }
247 
248   /// For each bundle in \a Input, do \a Func.
249   Error forEachBundle(MemoryBuffer &Input,
250                       std::function<Error(const BundleInfo &)> Func) {
251     while (true) {
252       Expected<std::optional<StringRef>> CurTripleOrErr =
253           ReadBundleStart(Input);
254       if (!CurTripleOrErr)
255         return CurTripleOrErr.takeError();
256 
257       // No more bundles.
258       if (!*CurTripleOrErr)
259         break;
260 
261       StringRef CurTriple = **CurTripleOrErr;
262       assert(!CurTriple.empty());
263 
264       BundleInfo Info{CurTriple};
265       if (Error Err = Func(Info))
266         return Err;
267     }
268     return Error::success();
269   }
270 
271 protected:
272   virtual Error listBundleIDsCallback(MemoryBuffer &Input,
273                                       const BundleInfo &Info) {
274     return Error::success();
275   }
276 };
277 
278 /// Handler for binary files. The bundled file will have the following format
279 /// (all integers are stored in little-endian format):
280 ///
281 /// "OFFLOAD_BUNDLER_MAGIC_STR" (ASCII encoding of the string)
282 ///
283 /// NumberOfOffloadBundles (8-byte integer)
284 ///
285 /// OffsetOfBundle1 (8-byte integer)
286 /// SizeOfBundle1 (8-byte integer)
287 /// NumberOfBytesInTripleOfBundle1 (8-byte integer)
288 /// TripleOfBundle1 (byte length defined before)
289 ///
290 /// ...
291 ///
292 /// OffsetOfBundleN (8-byte integer)
293 /// SizeOfBundleN (8-byte integer)
294 /// NumberOfBytesInTripleOfBundleN (8-byte integer)
295 /// TripleOfBundleN (byte length defined before)
296 ///
297 /// Bundle1
298 /// ...
299 /// BundleN
300 
301 /// Read 8-byte integers from a buffer in little-endian format.
302 static uint64_t Read8byteIntegerFromBuffer(StringRef Buffer, size_t pos) {
303   return llvm::support::endian::read64le(Buffer.data() + pos);
304 }
305 
306 /// Write 8-byte integers to a buffer in little-endian format.
307 static void Write8byteIntegerToBuffer(raw_ostream &OS, uint64_t Val) {
308   llvm::support::endian::write(OS, Val, llvm::endianness::little);
309 }
310 
311 class BinaryFileHandler final : public FileHandler {
312   /// Information about the bundles extracted from the header.
313   struct BinaryBundleInfo final : public BundleInfo {
314     /// Size of the bundle.
315     uint64_t Size = 0u;
316     /// Offset at which the bundle starts in the bundled file.
317     uint64_t Offset = 0u;
318 
319     BinaryBundleInfo() {}
320     BinaryBundleInfo(uint64_t Size, uint64_t Offset)
321         : Size(Size), Offset(Offset) {}
322   };
323 
324   /// Map between a triple and the corresponding bundle information.
325   StringMap<BinaryBundleInfo> BundlesInfo;
326 
327   /// Iterator for the bundle information that is being read.
328   StringMap<BinaryBundleInfo>::iterator CurBundleInfo;
329   StringMap<BinaryBundleInfo>::iterator NextBundleInfo;
330 
331   /// Current bundle target to be written.
332   std::string CurWriteBundleTarget;
333 
334   /// Configuration options and arrays for this bundler job
335   const OffloadBundlerConfig &BundlerConfig;
336 
337 public:
338   // TODO: Add error checking from ClangOffloadBundler.cpp
339   BinaryFileHandler(const OffloadBundlerConfig &BC) : BundlerConfig(BC) {}
340 
341   ~BinaryFileHandler() final {}
342 
343   Error ReadHeader(MemoryBuffer &Input) final {
344     StringRef FC = Input.getBuffer();
345 
346     // Initialize the current bundle with the end of the container.
347     CurBundleInfo = BundlesInfo.end();
348 
349     // Check if buffer is smaller than magic string.
350     size_t ReadChars = sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1;
351     if (ReadChars > FC.size())
352       return Error::success();
353 
354     // Check if no magic was found.
355     if (llvm::identify_magic(FC) != llvm::file_magic::offload_bundle)
356       return Error::success();
357 
358     // Read number of bundles.
359     if (ReadChars + 8 > FC.size())
360       return Error::success();
361 
362     uint64_t NumberOfBundles = Read8byteIntegerFromBuffer(FC, ReadChars);
363     ReadChars += 8;
364 
365     // Read bundle offsets, sizes and triples.
366     for (uint64_t i = 0; i < NumberOfBundles; ++i) {
367 
368       // Read offset.
369       if (ReadChars + 8 > FC.size())
370         return Error::success();
371 
372       uint64_t Offset = Read8byteIntegerFromBuffer(FC, ReadChars);
373       ReadChars += 8;
374 
375       // Read size.
376       if (ReadChars + 8 > FC.size())
377         return Error::success();
378 
379       uint64_t Size = Read8byteIntegerFromBuffer(FC, ReadChars);
380       ReadChars += 8;
381 
382       // Read triple size.
383       if (ReadChars + 8 > FC.size())
384         return Error::success();
385 
386       uint64_t TripleSize = Read8byteIntegerFromBuffer(FC, ReadChars);
387       ReadChars += 8;
388 
389       // Read triple.
390       if (ReadChars + TripleSize > FC.size())
391         return Error::success();
392 
393       StringRef Triple(&FC.data()[ReadChars], TripleSize);
394       ReadChars += TripleSize;
395 
396       // Check if the offset and size make sense.
397       if (!Offset || Offset + Size > FC.size())
398         return Error::success();
399 
400       assert(!BundlesInfo.contains(Triple) && "Triple is duplicated??");
401       BundlesInfo[Triple] = BinaryBundleInfo(Size, Offset);
402     }
403     // Set the iterator to where we will start to read.
404     CurBundleInfo = BundlesInfo.end();
405     NextBundleInfo = BundlesInfo.begin();
406     return Error::success();
407   }
408 
409   Expected<std::optional<StringRef>>
410   ReadBundleStart(MemoryBuffer &Input) final {
411     if (NextBundleInfo == BundlesInfo.end())
412       return std::nullopt;
413     CurBundleInfo = NextBundleInfo++;
414     return CurBundleInfo->first();
415   }
416 
417   Error ReadBundleEnd(MemoryBuffer &Input) final {
418     assert(CurBundleInfo != BundlesInfo.end() && "Invalid reader info!");
419     return Error::success();
420   }
421 
422   Error ReadBundle(raw_ostream &OS, MemoryBuffer &Input) final {
423     assert(CurBundleInfo != BundlesInfo.end() && "Invalid reader info!");
424     StringRef FC = Input.getBuffer();
425     OS.write(FC.data() + CurBundleInfo->second.Offset,
426              CurBundleInfo->second.Size);
427     return Error::success();
428   }
429 
430   Error WriteHeader(raw_ostream &OS,
431                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
432 
433     // Compute size of the header.
434     uint64_t HeaderSize = 0;
435 
436     HeaderSize += sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1;
437     HeaderSize += 8; // Number of Bundles
438 
439     for (auto &T : BundlerConfig.TargetNames) {
440       HeaderSize += 3 * 8; // Bundle offset, Size of bundle and size of triple.
441       HeaderSize += T.size(); // The triple.
442     }
443 
444     // Write to the buffer the header.
445     OS << OFFLOAD_BUNDLER_MAGIC_STR;
446 
447     Write8byteIntegerToBuffer(OS, BundlerConfig.TargetNames.size());
448 
449     unsigned Idx = 0;
450     for (auto &T : BundlerConfig.TargetNames) {
451       MemoryBuffer &MB = *Inputs[Idx++];
452       HeaderSize = alignTo(HeaderSize, BundlerConfig.BundleAlignment);
453       // Bundle offset.
454       Write8byteIntegerToBuffer(OS, HeaderSize);
455       // Size of the bundle (adds to the next bundle's offset)
456       Write8byteIntegerToBuffer(OS, MB.getBufferSize());
457       BundlesInfo[T] = BinaryBundleInfo(MB.getBufferSize(), HeaderSize);
458       HeaderSize += MB.getBufferSize();
459       // Size of the triple
460       Write8byteIntegerToBuffer(OS, T.size());
461       // Triple
462       OS << T;
463     }
464     return Error::success();
465   }
466 
467   Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
468     CurWriteBundleTarget = TargetTriple.str();
469     return Error::success();
470   }
471 
472   Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
473     return Error::success();
474   }
475 
476   Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
477     auto BI = BundlesInfo[CurWriteBundleTarget];
478 
479     // Pad with 0 to reach specified offset.
480     size_t CurrentPos = OS.tell();
481     size_t PaddingSize = BI.Offset > CurrentPos ? BI.Offset - CurrentPos : 0;
482     for (size_t I = 0; I < PaddingSize; ++I)
483       OS.write('\0');
484     assert(OS.tell() == BI.Offset);
485 
486     OS.write(Input.getBufferStart(), Input.getBufferSize());
487 
488     return Error::success();
489   }
490 };
491 
492 // This class implements a list of temporary files that are removed upon
493 // object destruction.
494 class TempFileHandlerRAII {
495 public:
496   ~TempFileHandlerRAII() {
497     for (const auto &File : Files)
498       sys::fs::remove(File);
499   }
500 
501   // Creates temporary file with given contents.
502   Expected<StringRef> Create(std::optional<ArrayRef<char>> Contents) {
503     SmallString<128u> File;
504     if (std::error_code EC =
505             sys::fs::createTemporaryFile("clang-offload-bundler", "tmp", File))
506       return createFileError(File, EC);
507     Files.push_front(File);
508 
509     if (Contents) {
510       std::error_code EC;
511       raw_fd_ostream OS(File, EC);
512       if (EC)
513         return createFileError(File, EC);
514       OS.write(Contents->data(), Contents->size());
515     }
516     return Files.front().str();
517   }
518 
519 private:
520   std::forward_list<SmallString<128u>> Files;
521 };
522 
523 /// Handler for object files. The bundles are organized by sections with a
524 /// designated name.
525 ///
526 /// To unbundle, we just copy the contents of the designated section.
527 class ObjectFileHandler final : public FileHandler {
528 
529   /// The object file we are currently dealing with.
530   std::unique_ptr<ObjectFile> Obj;
531 
532   /// Return the input file contents.
533   StringRef getInputFileContents() const { return Obj->getData(); }
534 
535   /// Return bundle name (<kind>-<triple>) if the provided section is an offload
536   /// section.
537   static Expected<std::optional<StringRef>>
538   IsOffloadSection(SectionRef CurSection) {
539     Expected<StringRef> NameOrErr = CurSection.getName();
540     if (!NameOrErr)
541       return NameOrErr.takeError();
542 
543     // If it does not start with the reserved suffix, just skip this section.
544     if (llvm::identify_magic(*NameOrErr) != llvm::file_magic::offload_bundle)
545       return std::nullopt;
546 
547     // Return the triple that is right after the reserved prefix.
548     return NameOrErr->substr(sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1);
549   }
550 
551   /// Total number of inputs.
552   unsigned NumberOfInputs = 0;
553 
554   /// Total number of processed inputs, i.e, inputs that were already
555   /// read from the buffers.
556   unsigned NumberOfProcessedInputs = 0;
557 
558   /// Iterator of the current and next section.
559   section_iterator CurrentSection;
560   section_iterator NextSection;
561 
562   /// Configuration options and arrays for this bundler job
563   const OffloadBundlerConfig &BundlerConfig;
564 
565 public:
566   // TODO: Add error checking from ClangOffloadBundler.cpp
567   ObjectFileHandler(std::unique_ptr<ObjectFile> ObjIn,
568                     const OffloadBundlerConfig &BC)
569       : Obj(std::move(ObjIn)), CurrentSection(Obj->section_begin()),
570         NextSection(Obj->section_begin()), BundlerConfig(BC) {}
571 
572   ~ObjectFileHandler() final {}
573 
574   Error ReadHeader(MemoryBuffer &Input) final { return Error::success(); }
575 
576   Expected<std::optional<StringRef>>
577   ReadBundleStart(MemoryBuffer &Input) final {
578     while (NextSection != Obj->section_end()) {
579       CurrentSection = NextSection;
580       ++NextSection;
581 
582       // Check if the current section name starts with the reserved prefix. If
583       // so, return the triple.
584       Expected<std::optional<StringRef>> TripleOrErr =
585           IsOffloadSection(*CurrentSection);
586       if (!TripleOrErr)
587         return TripleOrErr.takeError();
588       if (*TripleOrErr)
589         return **TripleOrErr;
590     }
591     return std::nullopt;
592   }
593 
594   Error ReadBundleEnd(MemoryBuffer &Input) final { return Error::success(); }
595 
596   Error ReadBundle(raw_ostream &OS, MemoryBuffer &Input) final {
597     Expected<StringRef> ContentOrErr = CurrentSection->getContents();
598     if (!ContentOrErr)
599       return ContentOrErr.takeError();
600     StringRef Content = *ContentOrErr;
601 
602     // Copy fat object contents to the output when extracting host bundle.
603     std::string ModifiedContent;
604     if (Content.size() == 1u && Content.front() == 0) {
605       auto HostBundleOrErr = getHostBundle(
606           StringRef(Input.getBufferStart(), Input.getBufferSize()));
607       if (!HostBundleOrErr)
608         return HostBundleOrErr.takeError();
609 
610       ModifiedContent = std::move(*HostBundleOrErr);
611       Content = ModifiedContent;
612     }
613 
614     OS.write(Content.data(), Content.size());
615     return Error::success();
616   }
617 
618   Error WriteHeader(raw_ostream &OS,
619                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
620     assert(BundlerConfig.HostInputIndex != ~0u &&
621            "Host input index not defined.");
622 
623     // Record number of inputs.
624     NumberOfInputs = Inputs.size();
625     return Error::success();
626   }
627 
628   Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
629     ++NumberOfProcessedInputs;
630     return Error::success();
631   }
632 
633   Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
634     return Error::success();
635   }
636 
637   Error finalizeOutputFile() final {
638     assert(NumberOfProcessedInputs <= NumberOfInputs &&
639            "Processing more inputs that actually exist!");
640     assert(BundlerConfig.HostInputIndex != ~0u &&
641            "Host input index not defined.");
642 
643     // If this is not the last output, we don't have to do anything.
644     if (NumberOfProcessedInputs != NumberOfInputs)
645       return Error::success();
646 
647     // We will use llvm-objcopy to add target objects sections to the output
648     // fat object. These sections should have 'exclude' flag set which tells
649     // link editor to remove them from linker inputs when linking executable or
650     // shared library.
651 
652     assert(BundlerConfig.ObjcopyPath != "" &&
653            "llvm-objcopy path not specified");
654 
655     // Temporary files that need to be removed.
656     TempFileHandlerRAII TempFiles;
657 
658     // Compose llvm-objcopy command line for add target objects' sections with
659     // appropriate flags.
660     BumpPtrAllocator Alloc;
661     StringSaver SS{Alloc};
662     SmallVector<StringRef, 8u> ObjcopyArgs{"llvm-objcopy"};
663 
664     for (unsigned I = 0; I < NumberOfInputs; ++I) {
665       StringRef InputFile = BundlerConfig.InputFileNames[I];
666       if (I == BundlerConfig.HostInputIndex) {
667         // Special handling for the host bundle. We do not need to add a
668         // standard bundle for the host object since we are going to use fat
669         // object as a host object. Therefore use dummy contents (one zero byte)
670         // when creating section for the host bundle.
671         Expected<StringRef> TempFileOrErr = TempFiles.Create(ArrayRef<char>(0));
672         if (!TempFileOrErr)
673           return TempFileOrErr.takeError();
674         InputFile = *TempFileOrErr;
675       }
676 
677       ObjcopyArgs.push_back(
678           SS.save(Twine("--add-section=") + OFFLOAD_BUNDLER_MAGIC_STR +
679                   BundlerConfig.TargetNames[I] + "=" + InputFile));
680       ObjcopyArgs.push_back(
681           SS.save(Twine("--set-section-flags=") + OFFLOAD_BUNDLER_MAGIC_STR +
682                   BundlerConfig.TargetNames[I] + "=readonly,exclude"));
683     }
684     ObjcopyArgs.push_back("--");
685     ObjcopyArgs.push_back(
686         BundlerConfig.InputFileNames[BundlerConfig.HostInputIndex]);
687     ObjcopyArgs.push_back(BundlerConfig.OutputFileNames.front());
688 
689     if (Error Err = executeObjcopy(BundlerConfig.ObjcopyPath, ObjcopyArgs))
690       return Err;
691 
692     return Error::success();
693   }
694 
695   Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
696     return Error::success();
697   }
698 
699 private:
700   Error executeObjcopy(StringRef Objcopy, ArrayRef<StringRef> Args) {
701     // If the user asked for the commands to be printed out, we do that
702     // instead of executing it.
703     if (BundlerConfig.PrintExternalCommands) {
704       errs() << "\"" << Objcopy << "\"";
705       for (StringRef Arg : drop_begin(Args, 1))
706         errs() << " \"" << Arg << "\"";
707       errs() << "\n";
708     } else {
709       if (sys::ExecuteAndWait(Objcopy, Args))
710         return createStringError(inconvertibleErrorCode(),
711                                  "'llvm-objcopy' tool failed");
712     }
713     return Error::success();
714   }
715 
716   Expected<std::string> getHostBundle(StringRef Input) {
717     TempFileHandlerRAII TempFiles;
718 
719     auto ModifiedObjPathOrErr = TempFiles.Create(std::nullopt);
720     if (!ModifiedObjPathOrErr)
721       return ModifiedObjPathOrErr.takeError();
722     StringRef ModifiedObjPath = *ModifiedObjPathOrErr;
723 
724     BumpPtrAllocator Alloc;
725     StringSaver SS{Alloc};
726     SmallVector<StringRef, 16> ObjcopyArgs{"llvm-objcopy"};
727 
728     ObjcopyArgs.push_back("--regex");
729     ObjcopyArgs.push_back("--remove-section=__CLANG_OFFLOAD_BUNDLE__.*");
730     ObjcopyArgs.push_back("--");
731 
732     StringRef ObjcopyInputFileName;
733     // When unbundling an archive, the content of each object file in the
734     // archive is passed to this function by parameter Input, which is different
735     // from the content of the original input archive file, therefore it needs
736     // to be saved to a temporary file before passed to llvm-objcopy. Otherwise,
737     // Input is the same as the content of the original input file, therefore
738     // temporary file is not needed.
739     if (StringRef(BundlerConfig.FilesType).starts_with("a")) {
740       auto InputFileOrErr =
741           TempFiles.Create(ArrayRef<char>(Input.data(), Input.size()));
742       if (!InputFileOrErr)
743         return InputFileOrErr.takeError();
744       ObjcopyInputFileName = *InputFileOrErr;
745     } else
746       ObjcopyInputFileName = BundlerConfig.InputFileNames.front();
747 
748     ObjcopyArgs.push_back(ObjcopyInputFileName);
749     ObjcopyArgs.push_back(ModifiedObjPath);
750 
751     if (Error Err = executeObjcopy(BundlerConfig.ObjcopyPath, ObjcopyArgs))
752       return std::move(Err);
753 
754     auto BufOrErr = MemoryBuffer::getFile(ModifiedObjPath);
755     if (!BufOrErr)
756       return createStringError(BufOrErr.getError(),
757                                "Failed to read back the modified object file");
758 
759     return BufOrErr->get()->getBuffer().str();
760   }
761 };
762 
763 /// Handler for text files. The bundled file will have the following format.
764 ///
765 /// "Comment OFFLOAD_BUNDLER_MAGIC_STR__START__ triple"
766 /// Bundle 1
767 /// "Comment OFFLOAD_BUNDLER_MAGIC_STR__END__ triple"
768 /// ...
769 /// "Comment OFFLOAD_BUNDLER_MAGIC_STR__START__ triple"
770 /// Bundle N
771 /// "Comment OFFLOAD_BUNDLER_MAGIC_STR__END__ triple"
772 class TextFileHandler final : public FileHandler {
773   /// String that begins a line comment.
774   StringRef Comment;
775 
776   /// String that initiates a bundle.
777   std::string BundleStartString;
778 
779   /// String that closes a bundle.
780   std::string BundleEndString;
781 
782   /// Number of chars read from input.
783   size_t ReadChars = 0u;
784 
785 protected:
786   Error ReadHeader(MemoryBuffer &Input) final { return Error::success(); }
787 
788   Expected<std::optional<StringRef>>
789   ReadBundleStart(MemoryBuffer &Input) final {
790     StringRef FC = Input.getBuffer();
791 
792     // Find start of the bundle.
793     ReadChars = FC.find(BundleStartString, ReadChars);
794     if (ReadChars == FC.npos)
795       return std::nullopt;
796 
797     // Get position of the triple.
798     size_t TripleStart = ReadChars = ReadChars + BundleStartString.size();
799 
800     // Get position that closes the triple.
801     size_t TripleEnd = ReadChars = FC.find("\n", ReadChars);
802     if (TripleEnd == FC.npos)
803       return std::nullopt;
804 
805     // Next time we read after the new line.
806     ++ReadChars;
807 
808     return StringRef(&FC.data()[TripleStart], TripleEnd - TripleStart);
809   }
810 
811   Error ReadBundleEnd(MemoryBuffer &Input) final {
812     StringRef FC = Input.getBuffer();
813 
814     // Read up to the next new line.
815     assert(FC[ReadChars] == '\n' && "The bundle should end with a new line.");
816 
817     size_t TripleEnd = ReadChars = FC.find("\n", ReadChars + 1);
818     if (TripleEnd != FC.npos)
819       // Next time we read after the new line.
820       ++ReadChars;
821 
822     return Error::success();
823   }
824 
825   Error ReadBundle(raw_ostream &OS, MemoryBuffer &Input) final {
826     StringRef FC = Input.getBuffer();
827     size_t BundleStart = ReadChars;
828 
829     // Find end of the bundle.
830     size_t BundleEnd = ReadChars = FC.find(BundleEndString, ReadChars);
831 
832     StringRef Bundle(&FC.data()[BundleStart], BundleEnd - BundleStart);
833     OS << Bundle;
834 
835     return Error::success();
836   }
837 
838   Error WriteHeader(raw_ostream &OS,
839                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
840     return Error::success();
841   }
842 
843   Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
844     OS << BundleStartString << TargetTriple << "\n";
845     return Error::success();
846   }
847 
848   Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
849     OS << BundleEndString << TargetTriple << "\n";
850     return Error::success();
851   }
852 
853   Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
854     OS << Input.getBuffer();
855     return Error::success();
856   }
857 
858 public:
859   TextFileHandler(StringRef Comment) : Comment(Comment), ReadChars(0) {
860     BundleStartString =
861         "\n" + Comment.str() + " " OFFLOAD_BUNDLER_MAGIC_STR "__START__ ";
862     BundleEndString =
863         "\n" + Comment.str() + " " OFFLOAD_BUNDLER_MAGIC_STR "__END__ ";
864   }
865 
866   Error listBundleIDsCallback(MemoryBuffer &Input,
867                               const BundleInfo &Info) final {
868     // TODO: To list bundle IDs in a bundled text file we need to go through
869     // all bundles. The format of bundled text file may need to include a
870     // header if the performance of listing bundle IDs of bundled text file is
871     // important.
872     ReadChars = Input.getBuffer().find(BundleEndString, ReadChars);
873     if (Error Err = ReadBundleEnd(Input))
874       return Err;
875     return Error::success();
876   }
877 };
878 } // namespace
879 
880 /// Return an appropriate object file handler. We use the specific object
881 /// handler if we know how to deal with that format, otherwise we use a default
882 /// binary file handler.
883 static std::unique_ptr<FileHandler>
884 CreateObjectFileHandler(MemoryBuffer &FirstInput,
885                         const OffloadBundlerConfig &BundlerConfig) {
886   // Check if the input file format is one that we know how to deal with.
887   Expected<std::unique_ptr<Binary>> BinaryOrErr = createBinary(FirstInput);
888 
889   // We only support regular object files. If failed to open the input as a
890   // known binary or this is not an object file use the default binary handler.
891   if (errorToBool(BinaryOrErr.takeError()) || !isa<ObjectFile>(*BinaryOrErr))
892     return std::make_unique<BinaryFileHandler>(BundlerConfig);
893 
894   // Otherwise create an object file handler. The handler will be owned by the
895   // client of this function.
896   return std::make_unique<ObjectFileHandler>(
897       std::unique_ptr<ObjectFile>(cast<ObjectFile>(BinaryOrErr->release())),
898       BundlerConfig);
899 }
900 
901 /// Return an appropriate handler given the input files and options.
902 static Expected<std::unique_ptr<FileHandler>>
903 CreateFileHandler(MemoryBuffer &FirstInput,
904                   const OffloadBundlerConfig &BundlerConfig) {
905   std::string FilesType = BundlerConfig.FilesType;
906 
907   if (FilesType == "i")
908     return std::make_unique<TextFileHandler>(/*Comment=*/"//");
909   if (FilesType == "ii")
910     return std::make_unique<TextFileHandler>(/*Comment=*/"//");
911   if (FilesType == "cui")
912     return std::make_unique<TextFileHandler>(/*Comment=*/"//");
913   if (FilesType == "hipi")
914     return std::make_unique<TextFileHandler>(/*Comment=*/"//");
915   // TODO: `.d` should be eventually removed once `-M` and its variants are
916   // handled properly in offload compilation.
917   if (FilesType == "d")
918     return std::make_unique<TextFileHandler>(/*Comment=*/"#");
919   if (FilesType == "ll")
920     return std::make_unique<TextFileHandler>(/*Comment=*/";");
921   if (FilesType == "bc")
922     return std::make_unique<BinaryFileHandler>(BundlerConfig);
923   if (FilesType == "s")
924     return std::make_unique<TextFileHandler>(/*Comment=*/"#");
925   if (FilesType == "o")
926     return CreateObjectFileHandler(FirstInput, BundlerConfig);
927   if (FilesType == "a")
928     return CreateObjectFileHandler(FirstInput, BundlerConfig);
929   if (FilesType == "gch")
930     return std::make_unique<BinaryFileHandler>(BundlerConfig);
931   if (FilesType == "ast")
932     return std::make_unique<BinaryFileHandler>(BundlerConfig);
933 
934   return createStringError(errc::invalid_argument,
935                            "'" + FilesType + "': invalid file type specified");
936 }
937 
938 OffloadBundlerConfig::OffloadBundlerConfig()
939     : CompressedBundleVersion(CompressedOffloadBundle::DefaultVersion) {
940   if (llvm::compression::zstd::isAvailable()) {
941     CompressionFormat = llvm::compression::Format::Zstd;
942     // Compression level 3 is usually sufficient for zstd since long distance
943     // matching is enabled.
944     CompressionLevel = 3;
945   } else if (llvm::compression::zlib::isAvailable()) {
946     CompressionFormat = llvm::compression::Format::Zlib;
947     // Use default level for zlib since higher level does not have significant
948     // improvement.
949     CompressionLevel = llvm::compression::zlib::DefaultCompression;
950   }
951   auto IgnoreEnvVarOpt =
952       llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR");
953   if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1")
954     return;
955   auto VerboseEnvVarOpt = llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_VERBOSE");
956   if (VerboseEnvVarOpt.has_value())
957     Verbose = VerboseEnvVarOpt.value() == "1";
958   auto CompressEnvVarOpt =
959       llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS");
960   if (CompressEnvVarOpt.has_value())
961     Compress = CompressEnvVarOpt.value() == "1";
962   auto CompressionLevelEnvVarOpt =
963       llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESSION_LEVEL");
964   if (CompressionLevelEnvVarOpt.has_value()) {
965     llvm::StringRef CompressionLevelStr = CompressionLevelEnvVarOpt.value();
966     int Level;
967     if (!CompressionLevelStr.getAsInteger(10, Level))
968       CompressionLevel = Level;
969     else
970       llvm::errs()
971           << "Warning: Invalid value for OFFLOAD_BUNDLER_COMPRESSION_LEVEL: "
972           << CompressionLevelStr.str() << ". Ignoring it.\n";
973   }
974   auto CompressedBundleFormatVersionOpt =
975       llvm::sys::Process::GetEnv("COMPRESSED_BUNDLE_FORMAT_VERSION");
976   if (CompressedBundleFormatVersionOpt.has_value()) {
977     llvm::StringRef VersionStr = CompressedBundleFormatVersionOpt.value();
978     uint16_t Version;
979     if (!VersionStr.getAsInteger(10, Version)) {
980       if (Version >= 2 && Version <= 3)
981         CompressedBundleVersion = Version;
982       else
983         llvm::errs()
984             << "Warning: Invalid value for COMPRESSED_BUNDLE_FORMAT_VERSION: "
985             << VersionStr.str()
986             << ". Valid values are 2 or 3. Using default version "
987             << CompressedBundleVersion << ".\n";
988     } else
989       llvm::errs()
990           << "Warning: Invalid value for COMPRESSED_BUNDLE_FORMAT_VERSION: "
991           << VersionStr.str() << ". Using default version "
992           << CompressedBundleVersion << ".\n";
993   }
994 }
995 
996 // Utility function to format numbers with commas
997 static std::string formatWithCommas(unsigned long long Value) {
998   std::string Num = std::to_string(Value);
999   int InsertPosition = Num.length() - 3;
1000   while (InsertPosition > 0) {
1001     Num.insert(InsertPosition, ",");
1002     InsertPosition -= 3;
1003   }
1004   return Num;
1005 }
1006 
1007 llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
1008 CompressedOffloadBundle::compress(llvm::compression::Params P,
1009                                   const llvm::MemoryBuffer &Input,
1010                                   uint16_t Version, bool Verbose) {
1011   if (!llvm::compression::zstd::isAvailable() &&
1012       !llvm::compression::zlib::isAvailable())
1013     return createStringError(llvm::inconvertibleErrorCode(),
1014                              "Compression not supported");
1015   llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
1016                         *ClangOffloadBundlerTimerGroup);
1017   if (Verbose)
1018     HashTimer.startTimer();
1019   llvm::MD5 Hash;
1020   llvm::MD5::MD5Result Result;
1021   Hash.update(Input.getBuffer());
1022   Hash.final(Result);
1023   uint64_t TruncatedHash = Result.low();
1024   if (Verbose)
1025     HashTimer.stopTimer();
1026 
1027   SmallVector<uint8_t, 0> CompressedBuffer;
1028   auto BufferUint8 = llvm::ArrayRef<uint8_t>(
1029       reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
1030       Input.getBuffer().size());
1031   llvm::Timer CompressTimer("Compression Timer", "Compression time",
1032                             *ClangOffloadBundlerTimerGroup);
1033   if (Verbose)
1034     CompressTimer.startTimer();
1035   llvm::compression::compress(P, BufferUint8, CompressedBuffer);
1036   if (Verbose)
1037     CompressTimer.stopTimer();
1038 
1039   uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
1040 
1041   // Store sizes in 64-bit variables first
1042   uint64_t UncompressedSize64 = Input.getBuffer().size();
1043   uint64_t TotalFileSize64;
1044 
1045   // Calculate total file size based on version
1046   if (Version == 2) {
1047     // For V2, ensure the sizes don't exceed 32-bit limit
1048     if (UncompressedSize64 > std::numeric_limits<uint32_t>::max())
1049       return createStringError(llvm::inconvertibleErrorCode(),
1050                                "Uncompressed size exceeds version 2 limit");
1051     if ((MagicNumber.size() + sizeof(uint32_t) + sizeof(Version) +
1052          sizeof(CompressionMethod) + sizeof(uint32_t) + sizeof(TruncatedHash) +
1053          CompressedBuffer.size()) > std::numeric_limits<uint32_t>::max())
1054       return createStringError(llvm::inconvertibleErrorCode(),
1055                                "Total file size exceeds version 2 limit");
1056 
1057     TotalFileSize64 = MagicNumber.size() + sizeof(uint32_t) + sizeof(Version) +
1058                       sizeof(CompressionMethod) + sizeof(uint32_t) +
1059                       sizeof(TruncatedHash) + CompressedBuffer.size();
1060   } else { // Version 3
1061     TotalFileSize64 = MagicNumber.size() + sizeof(uint64_t) + sizeof(Version) +
1062                       sizeof(CompressionMethod) + sizeof(uint64_t) +
1063                       sizeof(TruncatedHash) + CompressedBuffer.size();
1064   }
1065 
1066   SmallVector<char, 0> FinalBuffer;
1067   llvm::raw_svector_ostream OS(FinalBuffer);
1068   OS << MagicNumber;
1069   OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
1070   OS.write(reinterpret_cast<const char *>(&CompressionMethod),
1071            sizeof(CompressionMethod));
1072 
1073   // Write size fields according to version
1074   if (Version == 2) {
1075     uint32_t TotalFileSize32 = static_cast<uint32_t>(TotalFileSize64);
1076     uint32_t UncompressedSize32 = static_cast<uint32_t>(UncompressedSize64);
1077     OS.write(reinterpret_cast<const char *>(&TotalFileSize32),
1078              sizeof(TotalFileSize32));
1079     OS.write(reinterpret_cast<const char *>(&UncompressedSize32),
1080              sizeof(UncompressedSize32));
1081   } else { // Version 3
1082     OS.write(reinterpret_cast<const char *>(&TotalFileSize64),
1083              sizeof(TotalFileSize64));
1084     OS.write(reinterpret_cast<const char *>(&UncompressedSize64),
1085              sizeof(UncompressedSize64));
1086   }
1087 
1088   OS.write(reinterpret_cast<const char *>(&TruncatedHash),
1089            sizeof(TruncatedHash));
1090   OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()),
1091            CompressedBuffer.size());
1092 
1093   if (Verbose) {
1094     auto MethodUsed =
1095         P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib";
1096     double CompressionRate =
1097         static_cast<double>(UncompressedSize64) / CompressedBuffer.size();
1098     double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
1099     double CompressionSpeedMBs =
1100         (UncompressedSize64 / (1024.0 * 1024.0)) / CompressionTimeSeconds;
1101     llvm::errs() << "Compressed bundle format version: " << Version << "\n"
1102                  << "Total file size (including headers): "
1103                  << formatWithCommas(TotalFileSize64) << " bytes\n"
1104                  << "Compression method used: " << MethodUsed << "\n"
1105                  << "Compression level: " << P.level << "\n"
1106                  << "Binary size before compression: "
1107                  << formatWithCommas(UncompressedSize64) << " bytes\n"
1108                  << "Binary size after compression: "
1109                  << formatWithCommas(CompressedBuffer.size()) << " bytes\n"
1110                  << "Compression rate: "
1111                  << llvm::format("%.2lf", CompressionRate) << "\n"
1112                  << "Compression ratio: "
1113                  << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
1114                  << "Compression speed: "
1115                  << llvm::format("%.2lf MB/s", CompressionSpeedMBs) << "\n"
1116                  << "Truncated MD5 hash: "
1117                  << llvm::format_hex(TruncatedHash, 16) << "\n";
1118   }
1119 
1120   return llvm::MemoryBuffer::getMemBufferCopy(
1121       llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
1122 }
1123 
1124 llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
1125 CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
1126                                     bool Verbose) {
1127   StringRef Blob = Input.getBuffer();
1128 
1129   // Check minimum header size (using V1 as it's the smallest)
1130   if (Blob.size() < V1HeaderSize)
1131     return llvm::MemoryBuffer::getMemBufferCopy(Blob);
1132 
1133   if (llvm::identify_magic(Blob) !=
1134       llvm::file_magic::offload_bundle_compressed) {
1135     if (Verbose)
1136       llvm::errs() << "Uncompressed bundle.\n";
1137     return llvm::MemoryBuffer::getMemBufferCopy(Blob);
1138   }
1139 
1140   size_t CurrentOffset = MagicSize;
1141 
1142   // Read version
1143   uint16_t ThisVersion;
1144   memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
1145   CurrentOffset += VersionFieldSize;
1146 
1147   // Verify header size based on version
1148   if (ThisVersion >= 2 && ThisVersion <= 3) {
1149     size_t RequiredSize = (ThisVersion == 2) ? V2HeaderSize : V3HeaderSize;
1150     if (Blob.size() < RequiredSize)
1151       return createStringError(inconvertibleErrorCode(),
1152                                "Compressed bundle header size too small");
1153   }
1154 
1155   // Read compression method
1156   uint16_t CompressionMethod;
1157   memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
1158   CurrentOffset += MethodFieldSize;
1159 
1160   // Read total file size (version 2+)
1161   uint64_t TotalFileSize = 0;
1162   if (ThisVersion >= 2) {
1163     if (ThisVersion == 2) {
1164       uint32_t TotalFileSize32;
1165       memcpy(&TotalFileSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
1166       TotalFileSize = TotalFileSize32;
1167       CurrentOffset += FileSizeFieldSizeV2;
1168     } else { // Version 3
1169       memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
1170       CurrentOffset += FileSizeFieldSizeV3;
1171     }
1172   }
1173 
1174   // Read uncompressed size
1175   uint64_t UncompressedSize = 0;
1176   if (ThisVersion <= 2) {
1177     uint32_t UncompressedSize32;
1178     memcpy(&UncompressedSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
1179     UncompressedSize = UncompressedSize32;
1180     CurrentOffset += UncompressedSizeFieldSizeV2;
1181   } else { // Version 3
1182     memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
1183     CurrentOffset += UncompressedSizeFieldSizeV3;
1184   }
1185 
1186   // Read hash
1187   uint64_t StoredHash;
1188   memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
1189   CurrentOffset += HashFieldSize;
1190 
1191   // Determine compression format
1192   llvm::compression::Format CompressionFormat;
1193   if (CompressionMethod ==
1194       static_cast<uint16_t>(llvm::compression::Format::Zlib))
1195     CompressionFormat = llvm::compression::Format::Zlib;
1196   else if (CompressionMethod ==
1197            static_cast<uint16_t>(llvm::compression::Format::Zstd))
1198     CompressionFormat = llvm::compression::Format::Zstd;
1199   else
1200     return createStringError(inconvertibleErrorCode(),
1201                              "Unknown compressing method");
1202 
1203   llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
1204                               *ClangOffloadBundlerTimerGroup);
1205   if (Verbose)
1206     DecompressTimer.startTimer();
1207 
1208   SmallVector<uint8_t, 0> DecompressedData;
1209   StringRef CompressedData = Blob.substr(CurrentOffset);
1210   if (llvm::Error DecompressionError = llvm::compression::decompress(
1211           CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
1212           DecompressedData, UncompressedSize))
1213     return createStringError(inconvertibleErrorCode(),
1214                              "Could not decompress embedded file contents: " +
1215                                  llvm::toString(std::move(DecompressionError)));
1216 
1217   if (Verbose) {
1218     DecompressTimer.stopTimer();
1219 
1220     double DecompressionTimeSeconds =
1221         DecompressTimer.getTotalTime().getWallTime();
1222 
1223     // Recalculate MD5 hash for integrity check
1224     llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
1225                                 "Hash recalculation time",
1226                                 *ClangOffloadBundlerTimerGroup);
1227     HashRecalcTimer.startTimer();
1228     llvm::MD5 Hash;
1229     llvm::MD5::MD5Result Result;
1230     Hash.update(llvm::ArrayRef<uint8_t>(DecompressedData.data(),
1231                                         DecompressedData.size()));
1232     Hash.final(Result);
1233     uint64_t RecalculatedHash = Result.low();
1234     HashRecalcTimer.stopTimer();
1235     bool HashMatch = (StoredHash == RecalculatedHash);
1236 
1237     double CompressionRate =
1238         static_cast<double>(UncompressedSize) / CompressedData.size();
1239     double DecompressionSpeedMBs =
1240         (UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds;
1241 
1242     llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n";
1243     if (ThisVersion >= 2)
1244       llvm::errs() << "Total file size (from header): "
1245                    << formatWithCommas(TotalFileSize) << " bytes\n";
1246     llvm::errs() << "Decompression method: "
1247                  << (CompressionFormat == llvm::compression::Format::Zlib
1248                          ? "zlib"
1249                          : "zstd")
1250                  << "\n"
1251                  << "Size before decompression: "
1252                  << formatWithCommas(CompressedData.size()) << " bytes\n"
1253                  << "Size after decompression: "
1254                  << formatWithCommas(UncompressedSize) << " bytes\n"
1255                  << "Compression rate: "
1256                  << llvm::format("%.2lf", CompressionRate) << "\n"
1257                  << "Compression ratio: "
1258                  << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
1259                  << "Decompression speed: "
1260                  << llvm::format("%.2lf MB/s", DecompressionSpeedMBs) << "\n"
1261                  << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
1262                  << "Recalculated hash: "
1263                  << llvm::format_hex(RecalculatedHash, 16) << "\n"
1264                  << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n";
1265   }
1266 
1267   return llvm::MemoryBuffer::getMemBufferCopy(
1268       llvm::toStringRef(DecompressedData));
1269 }
1270 
1271 // List bundle IDs. Return true if an error was found.
1272 Error OffloadBundler::ListBundleIDsInFile(
1273     StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) {
1274   // Open Input file.
1275   ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
1276       MemoryBuffer::getFileOrSTDIN(InputFileName, /*IsText=*/true);
1277   if (std::error_code EC = CodeOrErr.getError())
1278     return createFileError(InputFileName, EC);
1279 
1280   // Decompress the input if necessary.
1281   Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
1282       CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose);
1283   if (!DecompressedBufferOrErr)
1284     return createStringError(
1285         inconvertibleErrorCode(),
1286         "Failed to decompress input: " +
1287             llvm::toString(DecompressedBufferOrErr.takeError()));
1288 
1289   MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr;
1290 
1291   // Select the right files handler.
1292   Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
1293       CreateFileHandler(DecompressedInput, BundlerConfig);
1294   if (!FileHandlerOrErr)
1295     return FileHandlerOrErr.takeError();
1296 
1297   std::unique_ptr<FileHandler> &FH = *FileHandlerOrErr;
1298   assert(FH);
1299   return FH->listBundleIDs(DecompressedInput);
1300 }
1301 
1302 /// @brief Checks if a code object \p CodeObjectInfo is compatible with a given
1303 /// target \p TargetInfo.
1304 /// @link https://clang.llvm.org/docs/ClangOffloadBundler.html#bundle-entry-id
1305 bool isCodeObjectCompatible(const OffloadTargetInfo &CodeObjectInfo,
1306                             const OffloadTargetInfo &TargetInfo) {
1307 
1308   // Compatible in case of exact match.
1309   if (CodeObjectInfo == TargetInfo) {
1310     DEBUG_WITH_TYPE("CodeObjectCompatibility",
1311                     dbgs() << "Compatible: Exact match: \t[CodeObject: "
1312                            << CodeObjectInfo.str()
1313                            << "]\t:\t[Target: " << TargetInfo.str() << "]\n");
1314     return true;
1315   }
1316 
1317   // Incompatible if Kinds or Triples mismatch.
1318   if (!CodeObjectInfo.isOffloadKindCompatible(TargetInfo.OffloadKind) ||
1319       !CodeObjectInfo.Triple.isCompatibleWith(TargetInfo.Triple)) {
1320     DEBUG_WITH_TYPE(
1321         "CodeObjectCompatibility",
1322         dbgs() << "Incompatible: Kind/Triple mismatch \t[CodeObject: "
1323                << CodeObjectInfo.str() << "]\t:\t[Target: " << TargetInfo.str()
1324                << "]\n");
1325     return false;
1326   }
1327 
1328   // Incompatible if Processors mismatch.
1329   llvm::StringMap<bool> CodeObjectFeatureMap, TargetFeatureMap;
1330   std::optional<StringRef> CodeObjectProc = clang::parseTargetID(
1331       CodeObjectInfo.Triple, CodeObjectInfo.TargetID, &CodeObjectFeatureMap);
1332   std::optional<StringRef> TargetProc = clang::parseTargetID(
1333       TargetInfo.Triple, TargetInfo.TargetID, &TargetFeatureMap);
1334 
1335   // Both TargetProc and CodeObjectProc can't be empty here.
1336   if (!TargetProc || !CodeObjectProc ||
1337       CodeObjectProc.value() != TargetProc.value()) {
1338     DEBUG_WITH_TYPE("CodeObjectCompatibility",
1339                     dbgs() << "Incompatible: Processor mismatch \t[CodeObject: "
1340                            << CodeObjectInfo.str()
1341                            << "]\t:\t[Target: " << TargetInfo.str() << "]\n");
1342     return false;
1343   }
1344 
1345   // Incompatible if CodeObject has more features than Target, irrespective of
1346   // type or sign of features.
1347   if (CodeObjectFeatureMap.getNumItems() > TargetFeatureMap.getNumItems()) {
1348     DEBUG_WITH_TYPE("CodeObjectCompatibility",
1349                     dbgs() << "Incompatible: CodeObject has more features "
1350                               "than target \t[CodeObject: "
1351                            << CodeObjectInfo.str()
1352                            << "]\t:\t[Target: " << TargetInfo.str() << "]\n");
1353     return false;
1354   }
1355 
1356   // Compatible if each target feature specified by target is compatible with
1357   // target feature of code object. The target feature is compatible if the
1358   // code object does not specify it (meaning Any), or if it specifies it
1359   // with the same value (meaning On or Off).
1360   for (const auto &CodeObjectFeature : CodeObjectFeatureMap) {
1361     auto TargetFeature = TargetFeatureMap.find(CodeObjectFeature.getKey());
1362     if (TargetFeature == TargetFeatureMap.end()) {
1363       DEBUG_WITH_TYPE(
1364           "CodeObjectCompatibility",
1365           dbgs()
1366               << "Incompatible: Value of CodeObject's non-ANY feature is "
1367                  "not matching with Target feature's ANY value \t[CodeObject: "
1368               << CodeObjectInfo.str() << "]\t:\t[Target: " << TargetInfo.str()
1369               << "]\n");
1370       return false;
1371     } else if (TargetFeature->getValue() != CodeObjectFeature.getValue()) {
1372       DEBUG_WITH_TYPE(
1373           "CodeObjectCompatibility",
1374           dbgs() << "Incompatible: Value of CodeObject's non-ANY feature is "
1375                     "not matching with Target feature's non-ANY value "
1376                     "\t[CodeObject: "
1377                  << CodeObjectInfo.str()
1378                  << "]\t:\t[Target: " << TargetInfo.str() << "]\n");
1379       return false;
1380     }
1381   }
1382 
1383   // CodeObject is compatible if all features of Target are:
1384   //   - either, present in the Code Object's features map with the same sign,
1385   //   - or, the feature is missing from CodeObjects's features map i.e. it is
1386   //   set to ANY
1387   DEBUG_WITH_TYPE(
1388       "CodeObjectCompatibility",
1389       dbgs() << "Compatible: Target IDs are compatible \t[CodeObject: "
1390              << CodeObjectInfo.str() << "]\t:\t[Target: " << TargetInfo.str()
1391              << "]\n");
1392   return true;
1393 }
1394 
1395 /// Bundle the files. Return true if an error was found.
1396 Error OffloadBundler::BundleFiles() {
1397   std::error_code EC;
1398 
1399   // Create a buffer to hold the content before compressing.
1400   SmallVector<char, 0> Buffer;
1401   llvm::raw_svector_ostream BufferStream(Buffer);
1402 
1403   // Open input files.
1404   SmallVector<std::unique_ptr<MemoryBuffer>, 8u> InputBuffers;
1405   InputBuffers.reserve(BundlerConfig.InputFileNames.size());
1406   for (auto &I : BundlerConfig.InputFileNames) {
1407     ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
1408         MemoryBuffer::getFileOrSTDIN(I, /*IsText=*/true);
1409     if (std::error_code EC = CodeOrErr.getError())
1410       return createFileError(I, EC);
1411     InputBuffers.emplace_back(std::move(*CodeOrErr));
1412   }
1413 
1414   // Get the file handler. We use the host buffer as reference.
1415   assert((BundlerConfig.HostInputIndex != ~0u || BundlerConfig.AllowNoHost) &&
1416          "Host input index undefined??");
1417   Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr = CreateFileHandler(
1418       *InputBuffers[BundlerConfig.AllowNoHost ? 0
1419                                               : BundlerConfig.HostInputIndex],
1420       BundlerConfig);
1421   if (!FileHandlerOrErr)
1422     return FileHandlerOrErr.takeError();
1423 
1424   std::unique_ptr<FileHandler> &FH = *FileHandlerOrErr;
1425   assert(FH);
1426 
1427   // Write header.
1428   if (Error Err = FH->WriteHeader(BufferStream, InputBuffers))
1429     return Err;
1430 
1431   // Write all bundles along with the start/end markers. If an error was found
1432   // writing the end of the bundle component, abort the bundle writing.
1433   auto Input = InputBuffers.begin();
1434   for (auto &Triple : BundlerConfig.TargetNames) {
1435     if (Error Err = FH->WriteBundleStart(BufferStream, Triple))
1436       return Err;
1437     if (Error Err = FH->WriteBundle(BufferStream, **Input))
1438       return Err;
1439     if (Error Err = FH->WriteBundleEnd(BufferStream, Triple))
1440       return Err;
1441     ++Input;
1442   }
1443 
1444   raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC,
1445                             sys::fs::OF_None);
1446   if (EC)
1447     return createFileError(BundlerConfig.OutputFileNames.front(), EC);
1448 
1449   SmallVector<char, 0> CompressedBuffer;
1450   if (BundlerConfig.Compress) {
1451     std::unique_ptr<llvm::MemoryBuffer> BufferMemory =
1452         llvm::MemoryBuffer::getMemBufferCopy(
1453             llvm::StringRef(Buffer.data(), Buffer.size()));
1454     auto CompressionResult = CompressedOffloadBundle::compress(
1455         {BundlerConfig.CompressionFormat, BundlerConfig.CompressionLevel,
1456          /*zstdEnableLdm=*/true},
1457         *BufferMemory, BundlerConfig.CompressedBundleVersion,
1458         BundlerConfig.Verbose);
1459     if (auto Error = CompressionResult.takeError())
1460       return Error;
1461 
1462     auto CompressedMemBuffer = std::move(CompressionResult.get());
1463     CompressedBuffer.assign(CompressedMemBuffer->getBufferStart(),
1464                             CompressedMemBuffer->getBufferEnd());
1465   } else
1466     CompressedBuffer = Buffer;
1467 
1468   OutputFile.write(CompressedBuffer.data(), CompressedBuffer.size());
1469 
1470   return FH->finalizeOutputFile();
1471 }
1472 
1473 // Unbundle the files. Return true if an error was found.
1474 Error OffloadBundler::UnbundleFiles() {
1475   // Open Input file.
1476   ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
1477       MemoryBuffer::getFileOrSTDIN(BundlerConfig.InputFileNames.front(),
1478                                    /*IsText=*/true);
1479   if (std::error_code EC = CodeOrErr.getError())
1480     return createFileError(BundlerConfig.InputFileNames.front(), EC);
1481 
1482   // Decompress the input if necessary.
1483   Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
1484       CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose);
1485   if (!DecompressedBufferOrErr)
1486     return createStringError(
1487         inconvertibleErrorCode(),
1488         "Failed to decompress input: " +
1489             llvm::toString(DecompressedBufferOrErr.takeError()));
1490 
1491   MemoryBuffer &Input = **DecompressedBufferOrErr;
1492 
1493   // Select the right files handler.
1494   Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
1495       CreateFileHandler(Input, BundlerConfig);
1496   if (!FileHandlerOrErr)
1497     return FileHandlerOrErr.takeError();
1498 
1499   std::unique_ptr<FileHandler> &FH = *FileHandlerOrErr;
1500   assert(FH);
1501 
1502   // Read the header of the bundled file.
1503   if (Error Err = FH->ReadHeader(Input))
1504     return Err;
1505 
1506   // Create a work list that consist of the map triple/output file.
1507   StringMap<StringRef> Worklist;
1508   auto Output = BundlerConfig.OutputFileNames.begin();
1509   for (auto &Triple : BundlerConfig.TargetNames) {
1510     Worklist[Triple] = *Output;
1511     ++Output;
1512   }
1513 
1514   // Read all the bundles that are in the work list. If we find no bundles we
1515   // assume the file is meant for the host target.
1516   bool FoundHostBundle = false;
1517   while (!Worklist.empty()) {
1518     Expected<std::optional<StringRef>> CurTripleOrErr =
1519         FH->ReadBundleStart(Input);
1520     if (!CurTripleOrErr)
1521       return CurTripleOrErr.takeError();
1522 
1523     // We don't have more bundles.
1524     if (!*CurTripleOrErr)
1525       break;
1526 
1527     StringRef CurTriple = **CurTripleOrErr;
1528     assert(!CurTriple.empty());
1529 
1530     auto Output = Worklist.begin();
1531     for (auto E = Worklist.end(); Output != E; Output++) {
1532       if (isCodeObjectCompatible(
1533               OffloadTargetInfo(CurTriple, BundlerConfig),
1534               OffloadTargetInfo((*Output).first(), BundlerConfig))) {
1535         break;
1536       }
1537     }
1538 
1539     if (Output == Worklist.end())
1540       continue;
1541     // Check if the output file can be opened and copy the bundle to it.
1542     std::error_code EC;
1543     raw_fd_ostream OutputFile((*Output).second, EC, sys::fs::OF_None);
1544     if (EC)
1545       return createFileError((*Output).second, EC);
1546     if (Error Err = FH->ReadBundle(OutputFile, Input))
1547       return Err;
1548     if (Error Err = FH->ReadBundleEnd(Input))
1549       return Err;
1550     Worklist.erase(Output);
1551 
1552     // Record if we found the host bundle.
1553     auto OffloadInfo = OffloadTargetInfo(CurTriple, BundlerConfig);
1554     if (OffloadInfo.hasHostKind())
1555       FoundHostBundle = true;
1556   }
1557 
1558   if (!BundlerConfig.AllowMissingBundles && !Worklist.empty()) {
1559     std::string ErrMsg = "Can't find bundles for";
1560     std::set<StringRef> Sorted;
1561     for (auto &E : Worklist)
1562       Sorted.insert(E.first());
1563     unsigned I = 0;
1564     unsigned Last = Sorted.size() - 1;
1565     for (auto &E : Sorted) {
1566       if (I != 0 && Last > 1)
1567         ErrMsg += ",";
1568       ErrMsg += " ";
1569       if (I == Last && I != 0)
1570         ErrMsg += "and ";
1571       ErrMsg += E.str();
1572       ++I;
1573     }
1574     return createStringError(inconvertibleErrorCode(), ErrMsg);
1575   }
1576 
1577   // If no bundles were found, assume the input file is the host bundle and
1578   // create empty files for the remaining targets.
1579   if (Worklist.size() == BundlerConfig.TargetNames.size()) {
1580     for (auto &E : Worklist) {
1581       std::error_code EC;
1582       raw_fd_ostream OutputFile(E.second, EC, sys::fs::OF_None);
1583       if (EC)
1584         return createFileError(E.second, EC);
1585 
1586       // If this entry has a host kind, copy the input file to the output file.
1587       auto OffloadInfo = OffloadTargetInfo(E.getKey(), BundlerConfig);
1588       if (OffloadInfo.hasHostKind())
1589         OutputFile.write(Input.getBufferStart(), Input.getBufferSize());
1590     }
1591     return Error::success();
1592   }
1593 
1594   // If we found elements, we emit an error if none of those were for the host
1595   // in case host bundle name was provided in command line.
1596   if (!(FoundHostBundle || BundlerConfig.HostInputIndex == ~0u ||
1597         BundlerConfig.AllowMissingBundles))
1598     return createStringError(inconvertibleErrorCode(),
1599                              "Can't find bundle for the host target");
1600 
1601   // If we still have any elements in the worklist, create empty files for them.
1602   for (auto &E : Worklist) {
1603     std::error_code EC;
1604     raw_fd_ostream OutputFile(E.second, EC, sys::fs::OF_None);
1605     if (EC)
1606       return createFileError(E.second, EC);
1607   }
1608 
1609   return Error::success();
1610 }
1611 
1612 static Archive::Kind getDefaultArchiveKindForHost() {
1613   return Triple(sys::getDefaultTargetTriple()).isOSDarwin() ? Archive::K_DARWIN
1614                                                             : Archive::K_GNU;
1615 }
1616 
1617 /// @brief Computes a list of targets among all given targets which are
1618 /// compatible with this code object
1619 /// @param [in] CodeObjectInfo Code Object
1620 /// @param [out] CompatibleTargets List of all compatible targets among all
1621 /// given targets
1622 /// @return false, if no compatible target is found.
1623 static bool
1624 getCompatibleOffloadTargets(OffloadTargetInfo &CodeObjectInfo,
1625                             SmallVectorImpl<StringRef> &CompatibleTargets,
1626                             const OffloadBundlerConfig &BundlerConfig) {
1627   if (!CompatibleTargets.empty()) {
1628     DEBUG_WITH_TYPE("CodeObjectCompatibility",
1629                     dbgs() << "CompatibleTargets list should be empty\n");
1630     return false;
1631   }
1632   for (auto &Target : BundlerConfig.TargetNames) {
1633     auto TargetInfo = OffloadTargetInfo(Target, BundlerConfig);
1634     if (isCodeObjectCompatible(CodeObjectInfo, TargetInfo))
1635       CompatibleTargets.push_back(Target);
1636   }
1637   return !CompatibleTargets.empty();
1638 }
1639 
1640 // Check that each code object file in the input archive conforms to following
1641 // rule: for a specific processor, a feature either shows up in all target IDs,
1642 // or does not show up in any target IDs. Otherwise the target ID combination is
1643 // invalid.
1644 static Error
1645 CheckHeterogeneousArchive(StringRef ArchiveName,
1646                           const OffloadBundlerConfig &BundlerConfig) {
1647   std::vector<std::unique_ptr<MemoryBuffer>> ArchiveBuffers;
1648   ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
1649       MemoryBuffer::getFileOrSTDIN(ArchiveName, true, false);
1650   if (std::error_code EC = BufOrErr.getError())
1651     return createFileError(ArchiveName, EC);
1652 
1653   ArchiveBuffers.push_back(std::move(*BufOrErr));
1654   Expected<std::unique_ptr<llvm::object::Archive>> LibOrErr =
1655       Archive::create(ArchiveBuffers.back()->getMemBufferRef());
1656   if (!LibOrErr)
1657     return LibOrErr.takeError();
1658 
1659   auto Archive = std::move(*LibOrErr);
1660 
1661   Error ArchiveErr = Error::success();
1662   auto ChildEnd = Archive->child_end();
1663 
1664   /// Iterate over all bundled code object files in the input archive.
1665   for (auto ArchiveIter = Archive->child_begin(ArchiveErr);
1666        ArchiveIter != ChildEnd; ++ArchiveIter) {
1667     if (ArchiveErr)
1668       return ArchiveErr;
1669     auto ArchiveChildNameOrErr = (*ArchiveIter).getName();
1670     if (!ArchiveChildNameOrErr)
1671       return ArchiveChildNameOrErr.takeError();
1672 
1673     auto CodeObjectBufferRefOrErr = (*ArchiveIter).getMemoryBufferRef();
1674     if (!CodeObjectBufferRefOrErr)
1675       return CodeObjectBufferRefOrErr.takeError();
1676 
1677     auto CodeObjectBuffer =
1678         MemoryBuffer::getMemBuffer(*CodeObjectBufferRefOrErr, false);
1679 
1680     Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
1681         CreateFileHandler(*CodeObjectBuffer, BundlerConfig);
1682     if (!FileHandlerOrErr)
1683       return FileHandlerOrErr.takeError();
1684 
1685     std::unique_ptr<FileHandler> &FileHandler = *FileHandlerOrErr;
1686     assert(FileHandler);
1687 
1688     std::set<StringRef> BundleIds;
1689     auto CodeObjectFileError =
1690         FileHandler->getBundleIDs(*CodeObjectBuffer, BundleIds);
1691     if (CodeObjectFileError)
1692       return CodeObjectFileError;
1693 
1694     auto &&ConflictingArchs = clang::getConflictTargetIDCombination(BundleIds);
1695     if (ConflictingArchs) {
1696       std::string ErrMsg =
1697           Twine("conflicting TargetIDs [" + ConflictingArchs.value().first +
1698                 ", " + ConflictingArchs.value().second + "] found in " +
1699                 ArchiveChildNameOrErr.get() + " of " + ArchiveName)
1700               .str();
1701       return createStringError(inconvertibleErrorCode(), ErrMsg);
1702     }
1703   }
1704 
1705   return ArchiveErr;
1706 }
1707 
1708 /// UnbundleArchive takes an archive file (".a") as input containing bundled
1709 /// code object files, and a list of offload targets (not host), and extracts
1710 /// the code objects into a new archive file for each offload target. Each
1711 /// resulting archive file contains all code object files corresponding to that
1712 /// particular offload target. The created archive file does not
1713 /// contain an index of the symbols and code object files are named as
1714 /// <<Parent Bundle Name>-<CodeObject's TargetID>>, with ':' replaced with '_'.
1715 Error OffloadBundler::UnbundleArchive() {
1716   std::vector<std::unique_ptr<MemoryBuffer>> ArchiveBuffers;
1717 
1718   /// Map of target names with list of object files that will form the device
1719   /// specific archive for that target
1720   StringMap<std::vector<NewArchiveMember>> OutputArchivesMap;
1721 
1722   // Map of target names and output archive filenames
1723   StringMap<StringRef> TargetOutputFileNameMap;
1724 
1725   auto Output = BundlerConfig.OutputFileNames.begin();
1726   for (auto &Target : BundlerConfig.TargetNames) {
1727     TargetOutputFileNameMap[Target] = *Output;
1728     ++Output;
1729   }
1730 
1731   StringRef IFName = BundlerConfig.InputFileNames.front();
1732 
1733   if (BundlerConfig.CheckInputArchive) {
1734     // For a specific processor, a feature either shows up in all target IDs, or
1735     // does not show up in any target IDs. Otherwise the target ID combination
1736     // is invalid.
1737     auto ArchiveError = CheckHeterogeneousArchive(IFName, BundlerConfig);
1738     if (ArchiveError) {
1739       return ArchiveError;
1740     }
1741   }
1742 
1743   ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
1744       MemoryBuffer::getFileOrSTDIN(IFName, true, false);
1745   if (std::error_code EC = BufOrErr.getError())
1746     return createFileError(BundlerConfig.InputFileNames.front(), EC);
1747 
1748   ArchiveBuffers.push_back(std::move(*BufOrErr));
1749   Expected<std::unique_ptr<llvm::object::Archive>> LibOrErr =
1750       Archive::create(ArchiveBuffers.back()->getMemBufferRef());
1751   if (!LibOrErr)
1752     return LibOrErr.takeError();
1753 
1754   auto Archive = std::move(*LibOrErr);
1755 
1756   Error ArchiveErr = Error::success();
1757   auto ChildEnd = Archive->child_end();
1758 
1759   /// Iterate over all bundled code object files in the input archive.
1760   for (auto ArchiveIter = Archive->child_begin(ArchiveErr);
1761        ArchiveIter != ChildEnd; ++ArchiveIter) {
1762     if (ArchiveErr)
1763       return ArchiveErr;
1764     auto ArchiveChildNameOrErr = (*ArchiveIter).getName();
1765     if (!ArchiveChildNameOrErr)
1766       return ArchiveChildNameOrErr.takeError();
1767 
1768     StringRef BundledObjectFile = sys::path::filename(*ArchiveChildNameOrErr);
1769 
1770     auto CodeObjectBufferRefOrErr = (*ArchiveIter).getMemoryBufferRef();
1771     if (!CodeObjectBufferRefOrErr)
1772       return CodeObjectBufferRefOrErr.takeError();
1773 
1774     auto TempCodeObjectBuffer =
1775         MemoryBuffer::getMemBuffer(*CodeObjectBufferRefOrErr, false);
1776 
1777     // Decompress the buffer if necessary.
1778     Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
1779         CompressedOffloadBundle::decompress(*TempCodeObjectBuffer,
1780                                             BundlerConfig.Verbose);
1781     if (!DecompressedBufferOrErr)
1782       return createStringError(
1783           inconvertibleErrorCode(),
1784           "Failed to decompress code object: " +
1785               llvm::toString(DecompressedBufferOrErr.takeError()));
1786 
1787     MemoryBuffer &CodeObjectBuffer = **DecompressedBufferOrErr;
1788 
1789     Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
1790         CreateFileHandler(CodeObjectBuffer, BundlerConfig);
1791     if (!FileHandlerOrErr)
1792       return FileHandlerOrErr.takeError();
1793 
1794     std::unique_ptr<FileHandler> &FileHandler = *FileHandlerOrErr;
1795     assert(FileHandler &&
1796            "FileHandle creation failed for file in the archive!");
1797 
1798     if (Error ReadErr = FileHandler->ReadHeader(CodeObjectBuffer))
1799       return ReadErr;
1800 
1801     Expected<std::optional<StringRef>> CurBundleIDOrErr =
1802         FileHandler->ReadBundleStart(CodeObjectBuffer);
1803     if (!CurBundleIDOrErr)
1804       return CurBundleIDOrErr.takeError();
1805 
1806     std::optional<StringRef> OptionalCurBundleID = *CurBundleIDOrErr;
1807     // No device code in this child, skip.
1808     if (!OptionalCurBundleID)
1809       continue;
1810     StringRef CodeObject = *OptionalCurBundleID;
1811 
1812     // Process all bundle entries (CodeObjects) found in this child of input
1813     // archive.
1814     while (!CodeObject.empty()) {
1815       SmallVector<StringRef> CompatibleTargets;
1816       auto CodeObjectInfo = OffloadTargetInfo(CodeObject, BundlerConfig);
1817       if (getCompatibleOffloadTargets(CodeObjectInfo, CompatibleTargets,
1818                                       BundlerConfig)) {
1819         std::string BundleData;
1820         raw_string_ostream DataStream(BundleData);
1821         if (Error Err = FileHandler->ReadBundle(DataStream, CodeObjectBuffer))
1822           return Err;
1823 
1824         for (auto &CompatibleTarget : CompatibleTargets) {
1825           SmallString<128> BundledObjectFileName;
1826           BundledObjectFileName.assign(BundledObjectFile);
1827           auto OutputBundleName =
1828               Twine(llvm::sys::path::stem(BundledObjectFileName) + "-" +
1829                     CodeObject +
1830                     getDeviceLibraryFileName(BundledObjectFileName,
1831                                              CodeObjectInfo.TargetID))
1832                   .str();
1833           // Replace ':' in optional target feature list with '_' to ensure
1834           // cross-platform validity.
1835           std::replace(OutputBundleName.begin(), OutputBundleName.end(), ':',
1836                        '_');
1837 
1838           std::unique_ptr<MemoryBuffer> MemBuf = MemoryBuffer::getMemBufferCopy(
1839               DataStream.str(), OutputBundleName);
1840           ArchiveBuffers.push_back(std::move(MemBuf));
1841           llvm::MemoryBufferRef MemBufRef =
1842               MemoryBufferRef(*(ArchiveBuffers.back()));
1843 
1844           // For inserting <CompatibleTarget, list<CodeObject>> entry in
1845           // OutputArchivesMap.
1846           OutputArchivesMap[CompatibleTarget].push_back(
1847               NewArchiveMember(MemBufRef));
1848         }
1849       }
1850 
1851       if (Error Err = FileHandler->ReadBundleEnd(CodeObjectBuffer))
1852         return Err;
1853 
1854       Expected<std::optional<StringRef>> NextTripleOrErr =
1855           FileHandler->ReadBundleStart(CodeObjectBuffer);
1856       if (!NextTripleOrErr)
1857         return NextTripleOrErr.takeError();
1858 
1859       CodeObject = ((*NextTripleOrErr).has_value()) ? **NextTripleOrErr : "";
1860     } // End of processing of all bundle entries of this child of input archive.
1861   }   // End of while over children of input archive.
1862 
1863   assert(!ArchiveErr && "Error occurred while reading archive!");
1864 
1865   /// Write out an archive for each target
1866   for (auto &Target : BundlerConfig.TargetNames) {
1867     StringRef FileName = TargetOutputFileNameMap[Target];
1868     StringMapIterator<std::vector<llvm::NewArchiveMember>> CurArchiveMembers =
1869         OutputArchivesMap.find(Target);
1870     if (CurArchiveMembers != OutputArchivesMap.end()) {
1871       if (Error WriteErr = writeArchive(FileName, CurArchiveMembers->getValue(),
1872                                         SymtabWritingMode::NormalSymtab,
1873                                         getDefaultArchiveKindForHost(), true,
1874                                         false, nullptr))
1875         return WriteErr;
1876     } else if (!BundlerConfig.AllowMissingBundles) {
1877       std::string ErrMsg =
1878           Twine("no compatible code object found for the target '" + Target +
1879                 "' in heterogeneous archive library: " + IFName)
1880               .str();
1881       return createStringError(inconvertibleErrorCode(), ErrMsg);
1882     } else { // Create an empty archive file if no compatible code object is
1883              // found and "allow-missing-bundles" is enabled. It ensures that
1884              // the linker using output of this step doesn't complain about
1885              // the missing input file.
1886       std::vector<llvm::NewArchiveMember> EmptyArchive;
1887       EmptyArchive.clear();
1888       if (Error WriteErr = writeArchive(
1889               FileName, EmptyArchive, SymtabWritingMode::NormalSymtab,
1890               getDefaultArchiveKindForHost(), true, false, nullptr))
1891         return WriteErr;
1892     }
1893   }
1894 
1895   return Error::success();
1896 }
1897