10b57cec5SDimitry Andric //===-- TarWriter.cpp - Tar archive file creator --------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // TarWriter class provides a feature to create a tar archive file.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric // I put emphasis on simplicity over comprehensiveness when implementing this
120b57cec5SDimitry Andric // class because we don't need a full-fledged archive file generator in LLVM
130b57cec5SDimitry Andric // at the moment.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric // The filename field in the Unix V7 tar header is 100 bytes. Longer filenames
160b57cec5SDimitry Andric // are stored using the PAX extension. The PAX header is standardized in
170b57cec5SDimitry Andric // POSIX.1-2001.
180b57cec5SDimitry Andric //
190b57cec5SDimitry Andric // The struct definition of UstarHeader is copied from
200b57cec5SDimitry Andric // https://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
210b57cec5SDimitry Andric //
220b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
230b57cec5SDimitry Andric
240b57cec5SDimitry Andric #include "llvm/Support/TarWriter.h"
250b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
260b57cec5SDimitry Andric #include "llvm/Support/FileSystem.h"
270b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
280b57cec5SDimitry Andric #include "llvm/Support/Path.h"
290b57cec5SDimitry Andric
300b57cec5SDimitry Andric using namespace llvm;
310b57cec5SDimitry Andric
320b57cec5SDimitry Andric // Each file in an archive must be aligned to this block size.
330b57cec5SDimitry Andric static const int BlockSize = 512;
340b57cec5SDimitry Andric
350b57cec5SDimitry Andric struct UstarHeader {
360b57cec5SDimitry Andric char Name[100];
370b57cec5SDimitry Andric char Mode[8];
380b57cec5SDimitry Andric char Uid[8];
390b57cec5SDimitry Andric char Gid[8];
400b57cec5SDimitry Andric char Size[12];
410b57cec5SDimitry Andric char Mtime[12];
420b57cec5SDimitry Andric char Checksum[8];
430b57cec5SDimitry Andric char TypeFlag;
440b57cec5SDimitry Andric char Linkname[100];
450b57cec5SDimitry Andric char Magic[6];
460b57cec5SDimitry Andric char Version[2];
470b57cec5SDimitry Andric char Uname[32];
480b57cec5SDimitry Andric char Gname[32];
490b57cec5SDimitry Andric char DevMajor[8];
500b57cec5SDimitry Andric char DevMinor[8];
510b57cec5SDimitry Andric char Prefix[155];
520b57cec5SDimitry Andric char Pad[12];
530b57cec5SDimitry Andric };
540b57cec5SDimitry Andric static_assert(sizeof(UstarHeader) == BlockSize, "invalid Ustar header");
550b57cec5SDimitry Andric
makeUstarHeader()560b57cec5SDimitry Andric static UstarHeader makeUstarHeader() {
570b57cec5SDimitry Andric UstarHeader Hdr = {};
580b57cec5SDimitry Andric memcpy(Hdr.Magic, "ustar", 5); // Ustar magic
590b57cec5SDimitry Andric memcpy(Hdr.Version, "00", 2); // Ustar version
600b57cec5SDimitry Andric return Hdr;
610b57cec5SDimitry Andric }
620b57cec5SDimitry Andric
630b57cec5SDimitry Andric // A PAX attribute is in the form of "<length> <key>=<value>\n"
640b57cec5SDimitry Andric // where <length> is the length of the entire string including
650b57cec5SDimitry Andric // the length field itself. An example string is this.
660b57cec5SDimitry Andric //
670b57cec5SDimitry Andric // 25 ctime=1084839148.1212\n
680b57cec5SDimitry Andric //
690b57cec5SDimitry Andric // This function create such string.
formatPax(StringRef Key,StringRef Val)700b57cec5SDimitry Andric static std::string formatPax(StringRef Key, StringRef Val) {
710b57cec5SDimitry Andric int Len = Key.size() + Val.size() + 3; // +3 for " ", "=" and "\n"
720b57cec5SDimitry Andric
730b57cec5SDimitry Andric // We need to compute total size twice because appending
740b57cec5SDimitry Andric // a length field could change total size by one.
750b57cec5SDimitry Andric int Total = Len + Twine(Len).str().size();
760b57cec5SDimitry Andric Total = Len + Twine(Total).str().size();
770b57cec5SDimitry Andric return (Twine(Total) + " " + Key + "=" + Val + "\n").str();
780b57cec5SDimitry Andric }
790b57cec5SDimitry Andric
800b57cec5SDimitry Andric // Headers in tar files must be aligned to 512 byte boundaries.
810b57cec5SDimitry Andric // This function forwards the current file position to the next boundary.
pad(raw_fd_ostream & OS)820b57cec5SDimitry Andric static void pad(raw_fd_ostream &OS) {
830b57cec5SDimitry Andric uint64_t Pos = OS.tell();
840b57cec5SDimitry Andric OS.seek(alignTo(Pos, BlockSize));
850b57cec5SDimitry Andric }
860b57cec5SDimitry Andric
870b57cec5SDimitry Andric // Computes a checksum for a tar header.
computeChecksum(UstarHeader & Hdr)880b57cec5SDimitry Andric static void computeChecksum(UstarHeader &Hdr) {
890b57cec5SDimitry Andric // Before computing a checksum, checksum field must be
900b57cec5SDimitry Andric // filled with space characters.
910b57cec5SDimitry Andric memset(Hdr.Checksum, ' ', sizeof(Hdr.Checksum));
920b57cec5SDimitry Andric
930b57cec5SDimitry Andric // Compute a checksum and set it to the checksum field.
940b57cec5SDimitry Andric unsigned Chksum = 0;
950b57cec5SDimitry Andric for (size_t I = 0; I < sizeof(Hdr); ++I)
960b57cec5SDimitry Andric Chksum += reinterpret_cast<uint8_t *>(&Hdr)[I];
970b57cec5SDimitry Andric snprintf(Hdr.Checksum, sizeof(Hdr.Checksum), "%06o", Chksum);
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric
1000b57cec5SDimitry Andric // Create a tar header and write it to a given output stream.
writePaxHeader(raw_fd_ostream & OS,StringRef Path)1010b57cec5SDimitry Andric static void writePaxHeader(raw_fd_ostream &OS, StringRef Path) {
1020b57cec5SDimitry Andric // A PAX header consists of a 512-byte header followed
1030b57cec5SDimitry Andric // by key-value strings. First, create key-value strings.
1040b57cec5SDimitry Andric std::string PaxAttr = formatPax("path", Path);
1050b57cec5SDimitry Andric
1060b57cec5SDimitry Andric // Create a 512-byte header.
1070b57cec5SDimitry Andric UstarHeader Hdr = makeUstarHeader();
1080b57cec5SDimitry Andric snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", PaxAttr.size());
1090b57cec5SDimitry Andric Hdr.TypeFlag = 'x'; // PAX magic
1100b57cec5SDimitry Andric computeChecksum(Hdr);
1110b57cec5SDimitry Andric
1120b57cec5SDimitry Andric // Write them down.
1130b57cec5SDimitry Andric OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr));
1140b57cec5SDimitry Andric OS << PaxAttr;
1150b57cec5SDimitry Andric pad(OS);
1160b57cec5SDimitry Andric }
1170b57cec5SDimitry Andric
1180b57cec5SDimitry Andric // Path fits in a Ustar header if
1190b57cec5SDimitry Andric //
1200b57cec5SDimitry Andric // - Path is less than 100 characters long, or
1210b57cec5SDimitry Andric // - Path is in the form of "<prefix>/<name>" where <prefix> is less
1220b57cec5SDimitry Andric // than or equal to 155 characters long and <name> is less than 100
1230b57cec5SDimitry Andric // characters long. Both <prefix> and <name> can contain extra '/'.
1240b57cec5SDimitry Andric //
1250b57cec5SDimitry Andric // If Path fits in a Ustar header, updates Prefix and Name and returns true.
1260b57cec5SDimitry Andric // Otherwise, returns false.
splitUstar(StringRef Path,StringRef & Prefix,StringRef & Name)1270b57cec5SDimitry Andric static bool splitUstar(StringRef Path, StringRef &Prefix, StringRef &Name) {
1280b57cec5SDimitry Andric if (Path.size() < sizeof(UstarHeader::Name)) {
1290b57cec5SDimitry Andric Prefix = "";
1300b57cec5SDimitry Andric Name = Path;
1310b57cec5SDimitry Andric return true;
1320b57cec5SDimitry Andric }
1330b57cec5SDimitry Andric
134*5ffd83dbSDimitry Andric // tar 1.13 and earlier unconditionally look at the tar header interpreted
135*5ffd83dbSDimitry Andric // as an 'oldgnu_header', which has an 'isextended' byte at offset 482 in the
136*5ffd83dbSDimitry Andric // header, corresponding to offset 137 in the prefix. That's the version of
137*5ffd83dbSDimitry Andric // tar in gnuwin, so only use 137 of the 155 bytes in the prefix. This means
138*5ffd83dbSDimitry Andric // we'll need a pax header after 237 bytes of path instead of after 255,
139*5ffd83dbSDimitry Andric // but in return paths up to 237 bytes work with gnuwin, instead of just
140*5ffd83dbSDimitry Andric // 137 bytes of directory + 100 bytes of basename previously.
141*5ffd83dbSDimitry Andric // (tar-1.13 also doesn't support pax headers, but in practice all paths in
142*5ffd83dbSDimitry Andric // llvm's test suite are short enough for that to not matter.)
143*5ffd83dbSDimitry Andric const int MaxPrefix = 137;
144*5ffd83dbSDimitry Andric size_t Sep = Path.rfind('/', MaxPrefix + 1);
1450b57cec5SDimitry Andric if (Sep == StringRef::npos)
1460b57cec5SDimitry Andric return false;
1470b57cec5SDimitry Andric if (Path.size() - Sep - 1 >= sizeof(UstarHeader::Name))
1480b57cec5SDimitry Andric return false;
1490b57cec5SDimitry Andric
1500b57cec5SDimitry Andric Prefix = Path.substr(0, Sep);
1510b57cec5SDimitry Andric Name = Path.substr(Sep + 1);
1520b57cec5SDimitry Andric return true;
1530b57cec5SDimitry Andric }
1540b57cec5SDimitry Andric
1550b57cec5SDimitry Andric // The PAX header is an extended format, so a PAX header needs
1560b57cec5SDimitry Andric // to be followed by a "real" header.
writeUstarHeader(raw_fd_ostream & OS,StringRef Prefix,StringRef Name,size_t Size)1570b57cec5SDimitry Andric static void writeUstarHeader(raw_fd_ostream &OS, StringRef Prefix,
1580b57cec5SDimitry Andric StringRef Name, size_t Size) {
1590b57cec5SDimitry Andric UstarHeader Hdr = makeUstarHeader();
1600b57cec5SDimitry Andric memcpy(Hdr.Name, Name.data(), Name.size());
1610b57cec5SDimitry Andric memcpy(Hdr.Mode, "0000664", 8);
1620b57cec5SDimitry Andric snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", Size);
1630b57cec5SDimitry Andric memcpy(Hdr.Prefix, Prefix.data(), Prefix.size());
1640b57cec5SDimitry Andric computeChecksum(Hdr);
1650b57cec5SDimitry Andric OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr));
1660b57cec5SDimitry Andric }
1670b57cec5SDimitry Andric
1680b57cec5SDimitry Andric // Creates a TarWriter instance and returns it.
create(StringRef OutputPath,StringRef BaseDir)1690b57cec5SDimitry Andric Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath,
1700b57cec5SDimitry Andric StringRef BaseDir) {
1710b57cec5SDimitry Andric using namespace sys::fs;
1720b57cec5SDimitry Andric int FD;
1730b57cec5SDimitry Andric if (std::error_code EC =
1740b57cec5SDimitry Andric openFileForWrite(OutputPath, FD, CD_CreateAlways, OF_None))
1750b57cec5SDimitry Andric return make_error<StringError>("cannot open " + OutputPath, EC);
1760b57cec5SDimitry Andric return std::unique_ptr<TarWriter>(new TarWriter(FD, BaseDir));
1770b57cec5SDimitry Andric }
1780b57cec5SDimitry Andric
TarWriter(int FD,StringRef BaseDir)1790b57cec5SDimitry Andric TarWriter::TarWriter(int FD, StringRef BaseDir)
180*5ffd83dbSDimitry Andric : OS(FD, /*shouldClose=*/true, /*unbuffered=*/false),
181*5ffd83dbSDimitry Andric BaseDir(std::string(BaseDir)) {}
1820b57cec5SDimitry Andric
1830b57cec5SDimitry Andric // Append a given file to an archive.
append(StringRef Path,StringRef Data)1840b57cec5SDimitry Andric void TarWriter::append(StringRef Path, StringRef Data) {
1850b57cec5SDimitry Andric // Write Path and Data.
1860b57cec5SDimitry Andric std::string Fullpath = BaseDir + "/" + sys::path::convert_to_slash(Path);
1870b57cec5SDimitry Andric
1880b57cec5SDimitry Andric // We do not want to include the same file more than once.
1890b57cec5SDimitry Andric if (!Files.insert(Fullpath).second)
1900b57cec5SDimitry Andric return;
1910b57cec5SDimitry Andric
1920b57cec5SDimitry Andric StringRef Prefix;
1930b57cec5SDimitry Andric StringRef Name;
1940b57cec5SDimitry Andric if (splitUstar(Fullpath, Prefix, Name)) {
1950b57cec5SDimitry Andric writeUstarHeader(OS, Prefix, Name, Data.size());
1960b57cec5SDimitry Andric } else {
1970b57cec5SDimitry Andric writePaxHeader(OS, Fullpath);
1980b57cec5SDimitry Andric writeUstarHeader(OS, "", "", Data.size());
1990b57cec5SDimitry Andric }
2000b57cec5SDimitry Andric
2010b57cec5SDimitry Andric OS << Data;
2020b57cec5SDimitry Andric pad(OS);
2030b57cec5SDimitry Andric
2040b57cec5SDimitry Andric // POSIX requires tar archives end with two null blocks.
2050b57cec5SDimitry Andric // Here, we write the terminator and then seek back, so that
2060b57cec5SDimitry Andric // the file being output is terminated correctly at any moment.
2070b57cec5SDimitry Andric uint64_t Pos = OS.tell();
2080b57cec5SDimitry Andric OS << std::string(BlockSize * 2, '\0');
2090b57cec5SDimitry Andric OS.seek(Pos);
2100b57cec5SDimitry Andric OS.flush();
2110b57cec5SDimitry Andric }
212