xref: /llvm-project/clang-tools-extra/modularize/ModularizeUtilities.cpp (revision 61946687bc68ccba763571cb420049b9a3749dfe)
1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class for loading and validating a module map or
10 // header list by checking that all headers in the corresponding directories
11 // are accounted for.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Driver/Options.h"
17 #include "clang/Frontend/CompilerInstance.h"
18 #include "clang/Frontend/FrontendActions.h"
19 #include "CoverageChecker.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/Support/FileUtilities.h"
22 #include "llvm/Support/MemoryBuffer.h"
23 #include "llvm/Support/Path.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "ModularizeUtilities.h"
26 
27 using namespace clang;
28 using namespace llvm;
29 using namespace Modularize;
30 
31 namespace {
32 // Subclass TargetOptions so we can construct it inline with
33 // the minimal option, the triple.
34 class ModuleMapTargetOptions : public clang::TargetOptions {
35 public:
36   ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); }
37 };
38 } // namespace
39 
40 // ModularizeUtilities class implementation.
41 
42 // Constructor.
43 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths,
44                                          llvm::StringRef Prefix,
45                                          llvm::StringRef ProblemFilesListPath)
46     : InputFilePaths(InputPaths), HeaderPrefix(Prefix),
47       ProblemFilesPath(ProblemFilesListPath), HasModuleMap(false),
48       MissingHeaderCount(0),
49       // Init clang stuff needed for loading the module map and preprocessing.
50       LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
51       DiagnosticOpts(new DiagnosticOptions()),
52       DC(llvm::errs(), DiagnosticOpts.get()),
53       Diagnostics(
54           new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)),
55       TargetOpts(new ModuleMapTargetOptions()),
56       Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)),
57       FileMgr(new FileManager(FileSystemOpts)),
58       SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)),
59       HeaderInfo(new HeaderSearch(std::make_shared<HeaderSearchOptions>(),
60                                   *SourceMgr, *Diagnostics, *LangOpts,
61                                   Target.get())) {}
62 
63 // Create instance of ModularizeUtilities, to simplify setting up
64 // subordinate objects.
65 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities(
66     std::vector<std::string> &InputPaths, llvm::StringRef Prefix,
67     llvm::StringRef ProblemFilesListPath) {
68 
69   return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath);
70 }
71 
72 // Load all header lists and dependencies.
73 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() {
74   // For each input file.
75   for (auto I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) {
76     llvm::StringRef InputPath = *I;
77     // If it's a module map.
78     if (InputPath.ends_with(".modulemap")) {
79       // Load the module map.
80       if (std::error_code EC = loadModuleMap(InputPath))
81         return EC;
82     } else {
83       // Else we assume it's a header list and load it.
84       if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) {
85         errs() << "modularize: error: Unable to get header list '" << InputPath
86           << "': " << EC.message() << '\n';
87         return EC;
88       }
89     }
90   }
91   // If we have a problem files list.
92   if (ProblemFilesPath.size() != 0) {
93     // Load problem files list.
94     if (std::error_code EC = loadProblemHeaderList(ProblemFilesPath)) {
95       errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath
96         << "': " << EC.message() << '\n';
97       return EC;
98     }
99   }
100   return std::error_code();
101 }
102 
103 // Do coverage checks.
104 // For each loaded module map, do header coverage check.
105 // Starting from the directory of the module.modulemap file,
106 // Find all header files, optionally looking only at files
107 // covered by the include path options, and compare against
108 // the headers referenced by the module.modulemap file.
109 // Display warnings for unaccounted-for header files.
110 // Returns 0 if there were no errors or warnings, 1 if there
111 // were warnings, 2 if any other problem, such as a bad
112 // module map path argument was specified.
113 std::error_code ModularizeUtilities::doCoverageCheck(
114     std::vector<std::string> &IncludePaths,
115     llvm::ArrayRef<std::string> CommandLine) {
116   int ModuleMapCount = ModuleMaps.size();
117   int ModuleMapIndex;
118   std::error_code EC;
119   for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) {
120     std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex];
121     auto Checker = CoverageChecker::createCoverageChecker(
122         InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine,
123         ModMap.get());
124     std::error_code LocalEC = Checker->doChecks();
125     if (LocalEC.value() > 0)
126       EC = LocalEC;
127   }
128   return EC;
129 }
130 
131 // Load single header list and dependencies.
132 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies(
133     llvm::StringRef InputPath) {
134 
135   // By default, use the path component of the list file name.
136   SmallString<256> HeaderDirectory(InputPath);
137   llvm::sys::path::remove_filename(HeaderDirectory);
138   SmallString<256> CurrentDirectory;
139   llvm::sys::fs::current_path(CurrentDirectory);
140 
141   // Get the prefix if we have one.
142   if (HeaderPrefix.size() != 0)
143     HeaderDirectory = HeaderPrefix;
144 
145   // Read the header list file into a buffer.
146   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
147     MemoryBuffer::getFile(InputPath);
148   if (std::error_code EC = listBuffer.getError())
149     return EC;
150 
151   // Parse the header list into strings.
152   SmallVector<StringRef, 32> Strings;
153   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
154 
155   // Collect the header file names from the string list.
156   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
157     E = Strings.end();
158     I != E; ++I) {
159     StringRef Line = I->trim();
160     // Ignore comments and empty lines.
161     if (Line.empty() || (Line[0] == '#'))
162       continue;
163     std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':');
164     SmallString<256> HeaderFileName;
165     // Prepend header file name prefix if it's not absolute.
166     if (llvm::sys::path::is_absolute(TargetAndDependents.first))
167       llvm::sys::path::native(TargetAndDependents.first, HeaderFileName);
168     else {
169       if (HeaderDirectory.size() != 0)
170         HeaderFileName = HeaderDirectory;
171       else
172         HeaderFileName = CurrentDirectory;
173       llvm::sys::path::append(HeaderFileName, TargetAndDependents.first);
174       llvm::sys::path::native(HeaderFileName);
175     }
176     // Handle optional dependencies.
177     DependentsVector Dependents;
178     SmallVector<StringRef, 4> DependentsList;
179     TargetAndDependents.second.split(DependentsList, " ", -1, false);
180     int Count = DependentsList.size();
181     for (int Index = 0; Index < Count; ++Index) {
182       SmallString<256> Dependent;
183       if (llvm::sys::path::is_absolute(DependentsList[Index]))
184         Dependent = DependentsList[Index];
185       else {
186         if (HeaderDirectory.size() != 0)
187           Dependent = HeaderDirectory;
188         else
189           Dependent = CurrentDirectory;
190         llvm::sys::path::append(Dependent, DependentsList[Index]);
191       }
192       llvm::sys::path::native(Dependent);
193       Dependents.push_back(getCanonicalPath(Dependent.str()));
194     }
195     // Get canonical form.
196     HeaderFileName = getCanonicalPath(HeaderFileName);
197     // Save the resulting header file path and dependencies.
198     HeaderFileNames.push_back(std::string(HeaderFileName));
199     Dependencies[HeaderFileName.str()] = Dependents;
200   }
201   return std::error_code();
202 }
203 
204 // Load problem header list.
205 std::error_code ModularizeUtilities::loadProblemHeaderList(
206   llvm::StringRef InputPath) {
207 
208   // By default, use the path component of the list file name.
209   SmallString<256> HeaderDirectory(InputPath);
210   llvm::sys::path::remove_filename(HeaderDirectory);
211   SmallString<256> CurrentDirectory;
212   llvm::sys::fs::current_path(CurrentDirectory);
213 
214   // Get the prefix if we have one.
215   if (HeaderPrefix.size() != 0)
216     HeaderDirectory = HeaderPrefix;
217 
218   // Read the header list file into a buffer.
219   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
220     MemoryBuffer::getFile(InputPath);
221   if (std::error_code EC = listBuffer.getError())
222     return EC;
223 
224   // Parse the header list into strings.
225   SmallVector<StringRef, 32> Strings;
226   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
227 
228   // Collect the header file names from the string list.
229   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
230     E = Strings.end();
231     I != E; ++I) {
232     StringRef Line = I->trim();
233     // Ignore comments and empty lines.
234     if (Line.empty() || (Line[0] == '#'))
235       continue;
236     SmallString<256> HeaderFileName;
237     // Prepend header file name prefix if it's not absolute.
238     if (llvm::sys::path::is_absolute(Line))
239       llvm::sys::path::native(Line, HeaderFileName);
240     else {
241       if (HeaderDirectory.size() != 0)
242         HeaderFileName = HeaderDirectory;
243       else
244         HeaderFileName = CurrentDirectory;
245       llvm::sys::path::append(HeaderFileName, Line);
246       llvm::sys::path::native(HeaderFileName);
247     }
248     // Get canonical form.
249     HeaderFileName = getCanonicalPath(HeaderFileName);
250     // Save the resulting header file path.
251     ProblemFileNames.push_back(std::string(HeaderFileName));
252   }
253   return std::error_code();
254 }
255 
256 // Load single module map and extract header file list.
257 std::error_code ModularizeUtilities::loadModuleMap(
258     llvm::StringRef InputPath) {
259   // Get file entry for module.modulemap file.
260   auto ModuleMapEntryOrErr = SourceMgr->getFileManager().getFileRef(InputPath);
261 
262   // return error if not found.
263   if (!ModuleMapEntryOrErr) {
264     llvm::errs() << "error: File \"" << InputPath << "\" not found.\n";
265     return errorToErrorCode(ModuleMapEntryOrErr.takeError());
266   }
267   FileEntryRef ModuleMapEntry = *ModuleMapEntryOrErr;
268 
269   // Because the module map parser uses a ForwardingDiagnosticConsumer,
270   // which doesn't forward the BeginSourceFile call, we do it explicitly here.
271   DC.BeginSourceFile(*LangOpts, nullptr);
272 
273   // Figure out the home directory for the module map file.
274   DirectoryEntryRef Dir = ModuleMapEntry.getDir();
275   StringRef DirName(Dir.getName());
276   if (llvm::sys::path::filename(DirName) == "Modules") {
277     DirName = llvm::sys::path::parent_path(DirName);
278     if (DirName.ends_with(".framework")) {
279       auto FrameworkDirOrErr = FileMgr->getDirectoryRef(DirName);
280       if (!FrameworkDirOrErr) {
281         // This can happen if there's a race between the above check and the
282         // removal of the directory.
283         return errorToErrorCode(FrameworkDirOrErr.takeError());
284       }
285       Dir = *FrameworkDirOrErr;
286     }
287   }
288 
289   std::unique_ptr<ModuleMap> ModMap;
290   ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts,
291     Target.get(), *HeaderInfo));
292 
293   // Parse module.modulemap file into module map.
294   if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) {
295     return std::error_code(1, std::generic_category());
296   }
297 
298   // Do matching end call.
299   DC.EndSourceFile();
300 
301   // Reset missing header count.
302   MissingHeaderCount = 0;
303 
304   if (!collectModuleMapHeaders(ModMap.get()))
305     return std::error_code(1, std::generic_category());
306 
307   // Save module map.
308   ModuleMaps.push_back(std::move(ModMap));
309 
310   // Indicate we are using module maps.
311   HasModuleMap = true;
312 
313   // Return code of 1 for missing headers.
314   if (MissingHeaderCount)
315     return std::error_code(1, std::generic_category());
316 
317   return std::error_code();
318 }
319 
320 // Collect module map headers.
321 // Walks the modules and collects referenced headers into
322 // HeaderFileNames.
323 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) {
324   SmallVector<std::pair<StringRef, const clang::Module *>, 0> Vec;
325   for (auto &M : ModMap->modules())
326     Vec.emplace_back(M.first(), M.second);
327   llvm::sort(Vec, llvm::less_first());
328   for (auto &I : Vec)
329     if (!collectModuleHeaders(*I.second))
330       return false;
331   return true;
332 }
333 
334 // Collect referenced headers from one module.
335 // Collects the headers referenced in the given module into
336 // HeaderFileNames.
337 bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
338 
339   // Ignore explicit modules because they often have dependencies
340   // we can't know.
341   if (Mod.IsExplicit)
342     return true;
343 
344   // Treat headers in umbrella directory as dependencies.
345   DependentsVector UmbrellaDependents;
346 
347   // Recursively do submodules.
348   for (auto *Submodule : Mod.submodules())
349     collectModuleHeaders(*Submodule);
350 
351   if (std::optional<clang::Module::Header> UmbrellaHeader =
352           Mod.getUmbrellaHeaderAsWritten()) {
353     std::string HeaderPath = getCanonicalPath(UmbrellaHeader->Entry.getName());
354     // Collect umbrella header.
355     HeaderFileNames.push_back(HeaderPath);
356 
357     // FUTURE: When needed, umbrella header header collection goes here.
358   } else if (std::optional<clang::Module::DirectoryName> UmbrellaDir =
359                  Mod.getUmbrellaDirAsWritten()) {
360     // If there normal headers, assume these are umbrellas and skip collection.
361     if (Mod.getHeaders(Module::HK_Normal).empty()) {
362       // Collect headers in umbrella directory.
363       if (!collectUmbrellaHeaders(UmbrellaDir->Entry.getName(),
364                                   UmbrellaDependents))
365         return false;
366     }
367   }
368 
369   // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
370   // assuming they are marked as such either because of unsuitability for
371   // modules or because they are meant to be included by another header,
372   // and thus should be ignored by modularize.
373 
374   for (const auto &Header : Mod.getHeaders(clang::Module::HK_Normal))
375     HeaderFileNames.push_back(getCanonicalPath(Header.Entry.getName()));
376 
377   int MissingCountThisModule = Mod.MissingHeaders.size();
378 
379   for (int Index = 0; Index < MissingCountThisModule; ++Index) {
380     std::string MissingFile = Mod.MissingHeaders[Index].FileName;
381     SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc;
382     errs() << Loc.printToString(*SourceMgr)
383       << ": error : Header not found: " << MissingFile << "\n";
384   }
385 
386   MissingHeaderCount += MissingCountThisModule;
387 
388   return true;
389 }
390 
391 // Collect headers from an umbrella directory.
392 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName,
393   DependentsVector &Dependents) {
394   // Initialize directory name.
395   SmallString<256> Directory(UmbrellaDirName);
396   // Walk the directory.
397   std::error_code EC;
398   for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
399     I.increment(EC)) {
400     if (EC)
401       return false;
402     std::string File(I->path());
403     llvm::ErrorOr<llvm::sys::fs::basic_file_status> Status = I->status();
404     if (!Status)
405       return false;
406     llvm::sys::fs::file_type Type = Status->type();
407     // If the file is a directory, ignore the name and recurse.
408     if (Type == llvm::sys::fs::file_type::directory_file) {
409       if (!collectUmbrellaHeaders(File, Dependents))
410         return false;
411       continue;
412     }
413     // If the file does not have a common header extension, ignore it.
414     if (!isHeader(File))
415       continue;
416     // Save header name.
417     std::string HeaderPath = getCanonicalPath(File);
418     Dependents.push_back(HeaderPath);
419   }
420   return true;
421 }
422 
423 // Replace .. embedded in path for purposes of having
424 // a canonical path.
425 static std::string replaceDotDot(StringRef Path) {
426   SmallString<128> Buffer;
427   llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path),
428     E = llvm::sys::path::end(Path);
429   while (B != E) {
430     if (*B == "..")
431       llvm::sys::path::remove_filename(Buffer);
432     else if (*B != ".")
433       llvm::sys::path::append(Buffer, *B);
434     ++B;
435   }
436   if (Path.ends_with("/") || Path.ends_with("\\"))
437     Buffer.append(1, Path.back());
438   return Buffer.c_str();
439 }
440 
441 // Convert header path to canonical form.
442 // The canonical form is basically just use forward slashes, and remove "./".
443 // \param FilePath The file path, relative to the module map directory.
444 // \returns The file path in canonical form.
445 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) {
446   std::string Tmp(replaceDotDot(FilePath));
447   std::replace(Tmp.begin(), Tmp.end(), '\\', '/');
448   StringRef Tmp2(Tmp);
449   if (Tmp2.starts_with("./"))
450     Tmp = std::string(Tmp2.substr(2));
451   return Tmp;
452 }
453 
454 // Check for header file extension.
455 // If the file extension is .h, .inc, or missing, it's
456 // assumed to be a header.
457 // \param FileName The file name.  Must not be a directory.
458 // \returns true if it has a header extension or no extension.
459 bool ModularizeUtilities::isHeader(StringRef FileName) {
460   StringRef Extension = llvm::sys::path::extension(FileName);
461   if (Extension.size() == 0)
462     return true;
463   if (Extension.equals_insensitive(".h"))
464     return true;
465   if (Extension.equals_insensitive(".inc"))
466     return true;
467   return false;
468 }
469 
470 // Get directory path component from file path.
471 // \returns the component of the given path, which will be
472 // relative if the given path is relative, absolute if the
473 // given path is absolute, or "." if the path has no leading
474 // path component.
475 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) {
476   SmallString<256> Directory(Path);
477   sys::path::remove_filename(Directory);
478   if (Directory.size() == 0)
479     return ".";
480   return std::string(Directory);
481 }
482 
483 // Add unique problem file.
484 // Also standardizes the path.
485 void ModularizeUtilities::addUniqueProblemFile(std::string FilePath) {
486   FilePath = getCanonicalPath(FilePath);
487   // Don't add if already present.
488   for(auto &TestFilePath : ProblemFileNames) {
489     if (TestFilePath == FilePath)
490       return;
491   }
492   ProblemFileNames.push_back(FilePath);
493 }
494 
495 // Add file with no compile errors.
496 // Also standardizes the path.
497 void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath) {
498   FilePath = getCanonicalPath(FilePath);
499   GoodFileNames.push_back(FilePath);
500 }
501 
502 // List problem files.
503 void ModularizeUtilities::displayProblemFiles() {
504   errs() << "\nThese are the files with possible errors:\n\n";
505   for (auto &ProblemFile : ProblemFileNames) {
506     errs() << ProblemFile << "\n";
507   }
508 }
509 
510 // List files with no problems.
511 void ModularizeUtilities::displayGoodFiles() {
512   errs() << "\nThese are the files with no detected errors:\n\n";
513   for (auto &GoodFile : HeaderFileNames) {
514     bool Good = true;
515     for (auto &ProblemFile : ProblemFileNames) {
516       if (ProblemFile == GoodFile) {
517         Good = false;
518         break;
519       }
520     }
521     if (Good)
522       errs() << GoodFile << "\n";
523   }
524 }
525 
526 // List files with problem files commented out.
527 void ModularizeUtilities::displayCombinedFiles() {
528   errs() <<
529     "\nThese are the combined files, with problem files preceded by #:\n\n";
530   for (auto &File : HeaderFileNames) {
531     bool Good = true;
532     for (auto &ProblemFile : ProblemFileNames) {
533       if (ProblemFile == File) {
534         Good = false;
535         break;
536       }
537     }
538     errs() << (Good ? "" : "#") << File << "\n";
539   }
540 }
541