xref: /llvm-project/llvm/utils/split-file/split-file.cpp (revision 74dcf0b595d4d230f65a7bba7b0164c019d3c08b)
13e119c0eSMarkus Böck //===- split-file.cpp - Input splitting utility ---------------------------===//
23e119c0eSMarkus Böck //
33e119c0eSMarkus Böck // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
43e119c0eSMarkus Böck // See https://llvm.org/LICENSE.txt for license information.
53e119c0eSMarkus Böck // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63e119c0eSMarkus Böck //
73e119c0eSMarkus Böck //===----------------------------------------------------------------------===//
83e119c0eSMarkus Böck //
93e119c0eSMarkus Böck // Split input into multipe parts separated by regex '^(.|//)--- ' and extract
103e119c0eSMarkus Böck // the specified part.
113e119c0eSMarkus Böck //
123e119c0eSMarkus Böck //===----------------------------------------------------------------------===//
133e119c0eSMarkus Böck 
143e119c0eSMarkus Böck #include "llvm/ADT/DenseMap.h"
153e119c0eSMarkus Böck #include "llvm/ADT/StringExtras.h"
163e119c0eSMarkus Böck #include "llvm/ADT/StringRef.h"
173e119c0eSMarkus Böck #include "llvm/Support/CommandLine.h"
183e119c0eSMarkus Böck #include "llvm/Support/FileOutputBuffer.h"
193e119c0eSMarkus Böck #include "llvm/Support/FileSystem.h"
203e119c0eSMarkus Böck #include "llvm/Support/LineIterator.h"
213e119c0eSMarkus Böck #include "llvm/Support/MemoryBuffer.h"
223e119c0eSMarkus Böck #include "llvm/Support/Path.h"
233e119c0eSMarkus Böck #include "llvm/Support/ToolOutputFile.h"
243e119c0eSMarkus Böck #include "llvm/Support/WithColor.h"
253e119c0eSMarkus Böck #include <string>
263e119c0eSMarkus Böck #include <system_error>
273e119c0eSMarkus Böck 
283e119c0eSMarkus Böck using namespace llvm;
293e119c0eSMarkus Böck 
303e119c0eSMarkus Böck static cl::OptionCategory cat("split-file Options");
313e119c0eSMarkus Böck 
323e119c0eSMarkus Böck static cl::opt<std::string> input(cl::Positional, cl::desc("filename"),
333e119c0eSMarkus Böck                                   cl::cat(cat));
343e119c0eSMarkus Böck 
353e119c0eSMarkus Böck static cl::opt<std::string> output(cl::Positional, cl::desc("directory"),
363e119c0eSMarkus Böck                                    cl::value_desc("directory"), cl::cat(cat));
373e119c0eSMarkus Böck 
383e119c0eSMarkus Böck static cl::opt<bool> leadingLines("leading-lines",
393e119c0eSMarkus Böck                                     cl::desc("Preserve line numbers"),
403e119c0eSMarkus Böck                                     cl::cat(cat));
413e119c0eSMarkus Böck 
423e119c0eSMarkus Böck static cl::opt<bool> noLeadingLines("no-leading-lines",
433e119c0eSMarkus Böck                                     cl::desc("Don't preserve line numbers (default)"),
443e119c0eSMarkus Böck                                     cl::cat(cat));
453e119c0eSMarkus Böck 
463e119c0eSMarkus Böck static StringRef toolName;
473e119c0eSMarkus Böck static int errorCount;
483e119c0eSMarkus Böck 
493e119c0eSMarkus Böck [[noreturn]] static void fatal(StringRef filename, const Twine &message) {
503e119c0eSMarkus Böck   if (filename.empty())
513e119c0eSMarkus Böck     WithColor::error(errs(), toolName) << message << '\n';
523e119c0eSMarkus Böck   else
533e119c0eSMarkus Böck     WithColor::error(errs(), toolName) << filename << ": " << message << '\n';
543e119c0eSMarkus Böck   exit(1);
553e119c0eSMarkus Böck }
563e119c0eSMarkus Böck 
573e119c0eSMarkus Böck static void error(StringRef filename, int64_t line, const Twine &message) {
583e119c0eSMarkus Böck   ++errorCount;
593e119c0eSMarkus Böck   errs() << filename << ':' << line << ": ";
603e119c0eSMarkus Böck   WithColor::error(errs()) << message << '\n';
613e119c0eSMarkus Böck }
623e119c0eSMarkus Böck 
633e119c0eSMarkus Böck namespace {
643e119c0eSMarkus Böck struct Part {
653e119c0eSMarkus Böck   const char *begin = nullptr;
663e119c0eSMarkus Böck   const char *end = nullptr;
673e119c0eSMarkus Böck   int64_t leadingLines = 0;
683e119c0eSMarkus Böck };
693e119c0eSMarkus Böck } // namespace
703e119c0eSMarkus Böck 
713e119c0eSMarkus Böck static int handle(MemoryBuffer &inputBuf, StringRef input) {
723e119c0eSMarkus Böck   DenseMap<StringRef, Part> partToBegin;
733e119c0eSMarkus Böck   StringRef lastPart, separator;
743e119c0eSMarkus Böck   StringRef EOL = inputBuf.getBuffer().detectEOL();
753e119c0eSMarkus Böck   for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) {
763e119c0eSMarkus Böck     const int64_t lineNo = i.line_number();
773e119c0eSMarkus Böck     const StringRef line = *i++;
78586ecdf2SKazu Hirata     const size_t markerLen = line.starts_with("//") ? 6 : 5;
793e119c0eSMarkus Böck     if (!(line.size() >= markerLen &&
80586ecdf2SKazu Hirata           line.substr(markerLen - 4).starts_with("--- ")))
813e119c0eSMarkus Böck       continue;
823e119c0eSMarkus Böck     separator = line.substr(0, markerLen);
833e119c0eSMarkus Böck     const StringRef partName = line.substr(markerLen);
843e119c0eSMarkus Böck     if (partName.empty()) {
853e119c0eSMarkus Böck       error(input, lineNo, "empty part name");
863e119c0eSMarkus Böck       continue;
873e119c0eSMarkus Böck     }
883e119c0eSMarkus Böck     if (isSpace(partName.front()) || isSpace(partName.back())) {
893e119c0eSMarkus Böck       error(input, lineNo, "part name cannot have leading or trailing space");
903e119c0eSMarkus Böck       continue;
913e119c0eSMarkus Böck     }
923e119c0eSMarkus Böck 
933e119c0eSMarkus Böck     auto res = partToBegin.try_emplace(partName);
943e119c0eSMarkus Böck     if (!res.second) {
953e119c0eSMarkus Böck       error(input, lineNo,
963e119c0eSMarkus Böck             "'" + separator + partName + "' occurs more than once");
973e119c0eSMarkus Böck       continue;
983e119c0eSMarkus Böck     }
993e119c0eSMarkus Böck     if (!lastPart.empty())
1003e119c0eSMarkus Böck       partToBegin[lastPart].end = line.data();
1013e119c0eSMarkus Böck     Part &cur = res.first->second;
1023e119c0eSMarkus Böck     if (!i.is_at_eof())
1033e119c0eSMarkus Böck       cur.begin = i->data();
1043e119c0eSMarkus Böck     // If --leading-lines is specified, numEmptyLines is 0. Append newlines so
1053e119c0eSMarkus Böck     // that the extracted part preserves line numbers.
1063e119c0eSMarkus Böck     cur.leadingLines = leadingLines ? i.line_number() - 1 : 0;
1073e119c0eSMarkus Böck 
1083e119c0eSMarkus Böck     lastPart = partName;
1093e119c0eSMarkus Böck   }
1103e119c0eSMarkus Böck   if (lastPart.empty())
1113e119c0eSMarkus Böck     fatal(input, "no part separator was found");
1123e119c0eSMarkus Böck   if (errorCount)
1133e119c0eSMarkus Böck     return 1;
1143e119c0eSMarkus Böck   partToBegin[lastPart].end = inputBuf.getBufferEnd();
1153e119c0eSMarkus Böck 
1163e119c0eSMarkus Böck   std::vector<std::unique_ptr<ToolOutputFile>> outputFiles;
1173e119c0eSMarkus Böck   SmallString<256> partPath;
1183e119c0eSMarkus Böck   for (auto &keyValue : partToBegin) {
1193e119c0eSMarkus Böck     partPath.clear();
1203e119c0eSMarkus Böck     sys::path::append(partPath, output, keyValue.first);
1213e119c0eSMarkus Böck     std::error_code ec =
1223e119c0eSMarkus Böck         sys::fs::create_directories(sys::path::parent_path(partPath));
1233e119c0eSMarkus Böck     if (ec)
1243e119c0eSMarkus Böck       fatal(input, ec.message());
1253e119c0eSMarkus Böck     auto f = std::make_unique<ToolOutputFile>(partPath.str(), ec,
126*74dcf0b5SAbhina Sree                                               llvm::sys::fs::OF_Text);
1273e119c0eSMarkus Böck     if (!f)
1283e119c0eSMarkus Böck       fatal(input, ec.message());
1293e119c0eSMarkus Böck 
1303e119c0eSMarkus Böck     Part &part = keyValue.second;
1313e119c0eSMarkus Böck     for (int64_t i = 0; i != part.leadingLines; ++i)
1323e119c0eSMarkus Böck       (*f).os() << EOL;
1333e119c0eSMarkus Böck     if (part.begin)
1343e119c0eSMarkus Böck       (*f).os().write(part.begin, part.end - part.begin);
1353e119c0eSMarkus Böck     outputFiles.push_back(std::move(f));
1363e119c0eSMarkus Böck   }
1373e119c0eSMarkus Böck 
1383e119c0eSMarkus Böck   for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles)
1393e119c0eSMarkus Böck     outputFile->keep();
1403e119c0eSMarkus Böck   return 0;
1413e119c0eSMarkus Böck }
1423e119c0eSMarkus Böck 
1433e119c0eSMarkus Böck int main(int argc, const char **argv) {
1443e119c0eSMarkus Böck   toolName = sys::path::stem(argv[0]);
1453e119c0eSMarkus Böck   cl::HideUnrelatedOptions({&cat});
1463e119c0eSMarkus Böck   cl::ParseCommandLineOptions(
1473e119c0eSMarkus Böck       argc, argv,
1483e119c0eSMarkus Böck       "Split input into multiple parts separated by regex '^(.|//)--- ' and "
1493e119c0eSMarkus Böck       "extract the part specified by '^(.|//)--- <part>'\n",
1503e119c0eSMarkus Böck       nullptr,
1513e119c0eSMarkus Böck       /*EnvVar=*/nullptr,
1523e119c0eSMarkus Böck       /*LongOptionsUseDoubleDash=*/true);
1533e119c0eSMarkus Böck 
1543e119c0eSMarkus Böck   if (input.empty())
1553e119c0eSMarkus Böck     fatal("", "input filename is not specified");
1563e119c0eSMarkus Böck   if (output.empty())
1573e119c0eSMarkus Böck     fatal("", "output directory is not specified");
1583e119c0eSMarkus Böck   ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr =
159*74dcf0b5SAbhina Sree       MemoryBuffer::getFileOrSTDIN(input, /*IsText=*/true);
1603e119c0eSMarkus Böck   if (std::error_code ec = bufferOrErr.getError())
1613e119c0eSMarkus Böck     fatal(input, ec.message());
1623e119c0eSMarkus Böck 
1633e119c0eSMarkus Böck   // Delete output if it is a file or an empty directory, so that we can create
1643e119c0eSMarkus Böck   // a directory.
1653e119c0eSMarkus Böck   sys::fs::file_status status;
1663e119c0eSMarkus Böck   if (std::error_code ec = sys::fs::status(output, status))
1673e119c0eSMarkus Böck     if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory))
1683e119c0eSMarkus Böck       fatal(output, ec.message());
1693e119c0eSMarkus Böck   if (status.type() != sys::fs::file_type::file_not_found &&
1703e119c0eSMarkus Böck       status.type() != sys::fs::file_type::directory_file &&
1713e119c0eSMarkus Böck       status.type() != sys::fs::file_type::regular_file)
1723e119c0eSMarkus Böck     fatal(output, "output cannot be a special file");
1733e119c0eSMarkus Böck   if (std::error_code ec = sys::fs::remove(output, /*IgnoreNonExisting=*/true))
1743e119c0eSMarkus Böck     if (ec.value() != static_cast<int>(std::errc::directory_not_empty) &&
1753e119c0eSMarkus Böck         ec.value() != static_cast<int>(std::errc::file_exists))
1763e119c0eSMarkus Böck       fatal(output, ec.message());
1773e119c0eSMarkus Böck   return handle(**bufferOrErr, input);
1783e119c0eSMarkus Böck }
179