1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class for loading and validating a module map or 10 // header list by checking that all headers in the corresponding directories 11 // are accounted for. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Basic/SourceManager.h" 16 #include "clang/Driver/Options.h" 17 #include "clang/Frontend/CompilerInstance.h" 18 #include "clang/Frontend/FrontendActions.h" 19 #include "CoverageChecker.h" 20 #include "llvm/ADT/SmallString.h" 21 #include "llvm/Support/FileUtilities.h" 22 #include "llvm/Support/MemoryBuffer.h" 23 #include "llvm/Support/Path.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "ModularizeUtilities.h" 26 27 using namespace clang; 28 using namespace llvm; 29 using namespace Modularize; 30 31 namespace { 32 // Subclass TargetOptions so we can construct it inline with 33 // the minimal option, the triple. 34 class ModuleMapTargetOptions : public clang::TargetOptions { 35 public: 36 ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); } 37 }; 38 } // namespace 39 40 // ModularizeUtilities class implementation. 41 42 // Constructor. 43 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths, 44 llvm::StringRef Prefix, 45 llvm::StringRef ProblemFilesListPath) 46 : InputFilePaths(InputPaths), HeaderPrefix(Prefix), 47 ProblemFilesPath(ProblemFilesListPath), HasModuleMap(false), 48 MissingHeaderCount(0), 49 // Init clang stuff needed for loading the module map and preprocessing. 50 LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()), 51 DiagnosticOpts(new DiagnosticOptions()), 52 DC(llvm::errs(), DiagnosticOpts.get()), 53 Diagnostics( 54 new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)), 55 TargetOpts(new ModuleMapTargetOptions()), 56 Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)), 57 FileMgr(new FileManager(FileSystemOpts)), 58 SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)), 59 HeaderInfo(new HeaderSearch(std::make_shared<HeaderSearchOptions>(), 60 *SourceMgr, *Diagnostics, *LangOpts, 61 Target.get())) {} 62 63 // Create instance of ModularizeUtilities, to simplify setting up 64 // subordinate objects. 65 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities( 66 std::vector<std::string> &InputPaths, llvm::StringRef Prefix, 67 llvm::StringRef ProblemFilesListPath) { 68 69 return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath); 70 } 71 72 // Load all header lists and dependencies. 73 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() { 74 // For each input file. 75 for (auto I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) { 76 llvm::StringRef InputPath = *I; 77 // If it's a module map. 78 if (InputPath.ends_with(".modulemap")) { 79 // Load the module map. 80 if (std::error_code EC = loadModuleMap(InputPath)) 81 return EC; 82 } else { 83 // Else we assume it's a header list and load it. 84 if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) { 85 errs() << "modularize: error: Unable to get header list '" << InputPath 86 << "': " << EC.message() << '\n'; 87 return EC; 88 } 89 } 90 } 91 // If we have a problem files list. 92 if (ProblemFilesPath.size() != 0) { 93 // Load problem files list. 94 if (std::error_code EC = loadProblemHeaderList(ProblemFilesPath)) { 95 errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath 96 << "': " << EC.message() << '\n'; 97 return EC; 98 } 99 } 100 return std::error_code(); 101 } 102 103 // Do coverage checks. 104 // For each loaded module map, do header coverage check. 105 // Starting from the directory of the module.modulemap file, 106 // Find all header files, optionally looking only at files 107 // covered by the include path options, and compare against 108 // the headers referenced by the module.modulemap file. 109 // Display warnings for unaccounted-for header files. 110 // Returns 0 if there were no errors or warnings, 1 if there 111 // were warnings, 2 if any other problem, such as a bad 112 // module map path argument was specified. 113 std::error_code ModularizeUtilities::doCoverageCheck( 114 std::vector<std::string> &IncludePaths, 115 llvm::ArrayRef<std::string> CommandLine) { 116 int ModuleMapCount = ModuleMaps.size(); 117 int ModuleMapIndex; 118 std::error_code EC; 119 for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) { 120 std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex]; 121 auto Checker = CoverageChecker::createCoverageChecker( 122 InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine, 123 ModMap.get()); 124 std::error_code LocalEC = Checker->doChecks(); 125 if (LocalEC.value() > 0) 126 EC = LocalEC; 127 } 128 return EC; 129 } 130 131 // Load single header list and dependencies. 132 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies( 133 llvm::StringRef InputPath) { 134 135 // By default, use the path component of the list file name. 136 SmallString<256> HeaderDirectory(InputPath); 137 llvm::sys::path::remove_filename(HeaderDirectory); 138 SmallString<256> CurrentDirectory; 139 llvm::sys::fs::current_path(CurrentDirectory); 140 141 // Get the prefix if we have one. 142 if (HeaderPrefix.size() != 0) 143 HeaderDirectory = HeaderPrefix; 144 145 // Read the header list file into a buffer. 146 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer = 147 MemoryBuffer::getFile(InputPath); 148 if (std::error_code EC = listBuffer.getError()) 149 return EC; 150 151 // Parse the header list into strings. 152 SmallVector<StringRef, 32> Strings; 153 listBuffer.get()->getBuffer().split(Strings, "\n", -1, false); 154 155 // Collect the header file names from the string list. 156 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(), 157 E = Strings.end(); 158 I != E; ++I) { 159 StringRef Line = I->trim(); 160 // Ignore comments and empty lines. 161 if (Line.empty() || (Line[0] == '#')) 162 continue; 163 std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':'); 164 SmallString<256> HeaderFileName; 165 // Prepend header file name prefix if it's not absolute. 166 if (llvm::sys::path::is_absolute(TargetAndDependents.first)) 167 llvm::sys::path::native(TargetAndDependents.first, HeaderFileName); 168 else { 169 if (HeaderDirectory.size() != 0) 170 HeaderFileName = HeaderDirectory; 171 else 172 HeaderFileName = CurrentDirectory; 173 llvm::sys::path::append(HeaderFileName, TargetAndDependents.first); 174 llvm::sys::path::native(HeaderFileName); 175 } 176 // Handle optional dependencies. 177 DependentsVector Dependents; 178 SmallVector<StringRef, 4> DependentsList; 179 TargetAndDependents.second.split(DependentsList, " ", -1, false); 180 int Count = DependentsList.size(); 181 for (int Index = 0; Index < Count; ++Index) { 182 SmallString<256> Dependent; 183 if (llvm::sys::path::is_absolute(DependentsList[Index])) 184 Dependent = DependentsList[Index]; 185 else { 186 if (HeaderDirectory.size() != 0) 187 Dependent = HeaderDirectory; 188 else 189 Dependent = CurrentDirectory; 190 llvm::sys::path::append(Dependent, DependentsList[Index]); 191 } 192 llvm::sys::path::native(Dependent); 193 Dependents.push_back(getCanonicalPath(Dependent.str())); 194 } 195 // Get canonical form. 196 HeaderFileName = getCanonicalPath(HeaderFileName); 197 // Save the resulting header file path and dependencies. 198 HeaderFileNames.push_back(std::string(HeaderFileName)); 199 Dependencies[HeaderFileName.str()] = Dependents; 200 } 201 return std::error_code(); 202 } 203 204 // Load problem header list. 205 std::error_code ModularizeUtilities::loadProblemHeaderList( 206 llvm::StringRef InputPath) { 207 208 // By default, use the path component of the list file name. 209 SmallString<256> HeaderDirectory(InputPath); 210 llvm::sys::path::remove_filename(HeaderDirectory); 211 SmallString<256> CurrentDirectory; 212 llvm::sys::fs::current_path(CurrentDirectory); 213 214 // Get the prefix if we have one. 215 if (HeaderPrefix.size() != 0) 216 HeaderDirectory = HeaderPrefix; 217 218 // Read the header list file into a buffer. 219 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer = 220 MemoryBuffer::getFile(InputPath); 221 if (std::error_code EC = listBuffer.getError()) 222 return EC; 223 224 // Parse the header list into strings. 225 SmallVector<StringRef, 32> Strings; 226 listBuffer.get()->getBuffer().split(Strings, "\n", -1, false); 227 228 // Collect the header file names from the string list. 229 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(), 230 E = Strings.end(); 231 I != E; ++I) { 232 StringRef Line = I->trim(); 233 // Ignore comments and empty lines. 234 if (Line.empty() || (Line[0] == '#')) 235 continue; 236 SmallString<256> HeaderFileName; 237 // Prepend header file name prefix if it's not absolute. 238 if (llvm::sys::path::is_absolute(Line)) 239 llvm::sys::path::native(Line, HeaderFileName); 240 else { 241 if (HeaderDirectory.size() != 0) 242 HeaderFileName = HeaderDirectory; 243 else 244 HeaderFileName = CurrentDirectory; 245 llvm::sys::path::append(HeaderFileName, Line); 246 llvm::sys::path::native(HeaderFileName); 247 } 248 // Get canonical form. 249 HeaderFileName = getCanonicalPath(HeaderFileName); 250 // Save the resulting header file path. 251 ProblemFileNames.push_back(std::string(HeaderFileName)); 252 } 253 return std::error_code(); 254 } 255 256 // Load single module map and extract header file list. 257 std::error_code ModularizeUtilities::loadModuleMap( 258 llvm::StringRef InputPath) { 259 // Get file entry for module.modulemap file. 260 auto ModuleMapEntryOrErr = SourceMgr->getFileManager().getFileRef(InputPath); 261 262 // return error if not found. 263 if (!ModuleMapEntryOrErr) { 264 llvm::errs() << "error: File \"" << InputPath << "\" not found.\n"; 265 return errorToErrorCode(ModuleMapEntryOrErr.takeError()); 266 } 267 FileEntryRef ModuleMapEntry = *ModuleMapEntryOrErr; 268 269 // Because the module map parser uses a ForwardingDiagnosticConsumer, 270 // which doesn't forward the BeginSourceFile call, we do it explicitly here. 271 DC.BeginSourceFile(*LangOpts, nullptr); 272 273 // Figure out the home directory for the module map file. 274 DirectoryEntryRef Dir = ModuleMapEntry.getDir(); 275 StringRef DirName(Dir.getName()); 276 if (llvm::sys::path::filename(DirName) == "Modules") { 277 DirName = llvm::sys::path::parent_path(DirName); 278 if (DirName.ends_with(".framework")) { 279 auto FrameworkDirOrErr = FileMgr->getDirectoryRef(DirName); 280 if (!FrameworkDirOrErr) { 281 // This can happen if there's a race between the above check and the 282 // removal of the directory. 283 return errorToErrorCode(FrameworkDirOrErr.takeError()); 284 } 285 Dir = *FrameworkDirOrErr; 286 } 287 } 288 289 std::unique_ptr<ModuleMap> ModMap; 290 ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts, 291 Target.get(), *HeaderInfo)); 292 293 // Parse module.modulemap file into module map. 294 if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) { 295 return std::error_code(1, std::generic_category()); 296 } 297 298 // Do matching end call. 299 DC.EndSourceFile(); 300 301 // Reset missing header count. 302 MissingHeaderCount = 0; 303 304 if (!collectModuleMapHeaders(ModMap.get())) 305 return std::error_code(1, std::generic_category()); 306 307 // Save module map. 308 ModuleMaps.push_back(std::move(ModMap)); 309 310 // Indicate we are using module maps. 311 HasModuleMap = true; 312 313 // Return code of 1 for missing headers. 314 if (MissingHeaderCount) 315 return std::error_code(1, std::generic_category()); 316 317 return std::error_code(); 318 } 319 320 // Collect module map headers. 321 // Walks the modules and collects referenced headers into 322 // HeaderFileNames. 323 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) { 324 SmallVector<std::pair<StringRef, const clang::Module *>, 0> Vec; 325 for (auto &M : ModMap->modules()) 326 Vec.emplace_back(M.first(), M.second); 327 llvm::sort(Vec, llvm::less_first()); 328 for (auto &I : Vec) 329 if (!collectModuleHeaders(*I.second)) 330 return false; 331 return true; 332 } 333 334 // Collect referenced headers from one module. 335 // Collects the headers referenced in the given module into 336 // HeaderFileNames. 337 bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) { 338 339 // Ignore explicit modules because they often have dependencies 340 // we can't know. 341 if (Mod.IsExplicit) 342 return true; 343 344 // Treat headers in umbrella directory as dependencies. 345 DependentsVector UmbrellaDependents; 346 347 // Recursively do submodules. 348 for (auto *Submodule : Mod.submodules()) 349 collectModuleHeaders(*Submodule); 350 351 if (std::optional<clang::Module::Header> UmbrellaHeader = 352 Mod.getUmbrellaHeaderAsWritten()) { 353 std::string HeaderPath = getCanonicalPath(UmbrellaHeader->Entry.getName()); 354 // Collect umbrella header. 355 HeaderFileNames.push_back(HeaderPath); 356 357 // FUTURE: When needed, umbrella header header collection goes here. 358 } else if (std::optional<clang::Module::DirectoryName> UmbrellaDir = 359 Mod.getUmbrellaDirAsWritten()) { 360 // If there normal headers, assume these are umbrellas and skip collection. 361 if (Mod.getHeaders(Module::HK_Normal).empty()) { 362 // Collect headers in umbrella directory. 363 if (!collectUmbrellaHeaders(UmbrellaDir->Entry.getName(), 364 UmbrellaDependents)) 365 return false; 366 } 367 } 368 369 // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded, 370 // assuming they are marked as such either because of unsuitability for 371 // modules or because they are meant to be included by another header, 372 // and thus should be ignored by modularize. 373 374 for (const auto &Header : Mod.getHeaders(clang::Module::HK_Normal)) 375 HeaderFileNames.push_back(getCanonicalPath(Header.Entry.getName())); 376 377 int MissingCountThisModule = Mod.MissingHeaders.size(); 378 379 for (int Index = 0; Index < MissingCountThisModule; ++Index) { 380 std::string MissingFile = Mod.MissingHeaders[Index].FileName; 381 SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc; 382 errs() << Loc.printToString(*SourceMgr) 383 << ": error : Header not found: " << MissingFile << "\n"; 384 } 385 386 MissingHeaderCount += MissingCountThisModule; 387 388 return true; 389 } 390 391 // Collect headers from an umbrella directory. 392 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName, 393 DependentsVector &Dependents) { 394 // Initialize directory name. 395 SmallString<256> Directory(UmbrellaDirName); 396 // Walk the directory. 397 std::error_code EC; 398 for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E; 399 I.increment(EC)) { 400 if (EC) 401 return false; 402 std::string File(I->path()); 403 llvm::ErrorOr<llvm::sys::fs::basic_file_status> Status = I->status(); 404 if (!Status) 405 return false; 406 llvm::sys::fs::file_type Type = Status->type(); 407 // If the file is a directory, ignore the name and recurse. 408 if (Type == llvm::sys::fs::file_type::directory_file) { 409 if (!collectUmbrellaHeaders(File, Dependents)) 410 return false; 411 continue; 412 } 413 // If the file does not have a common header extension, ignore it. 414 if (!isHeader(File)) 415 continue; 416 // Save header name. 417 std::string HeaderPath = getCanonicalPath(File); 418 Dependents.push_back(HeaderPath); 419 } 420 return true; 421 } 422 423 // Replace .. embedded in path for purposes of having 424 // a canonical path. 425 static std::string replaceDotDot(StringRef Path) { 426 SmallString<128> Buffer; 427 llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path), 428 E = llvm::sys::path::end(Path); 429 while (B != E) { 430 if (*B == "..") 431 llvm::sys::path::remove_filename(Buffer); 432 else if (*B != ".") 433 llvm::sys::path::append(Buffer, *B); 434 ++B; 435 } 436 if (Path.ends_with("/") || Path.ends_with("\\")) 437 Buffer.append(1, Path.back()); 438 return Buffer.c_str(); 439 } 440 441 // Convert header path to canonical form. 442 // The canonical form is basically just use forward slashes, and remove "./". 443 // \param FilePath The file path, relative to the module map directory. 444 // \returns The file path in canonical form. 445 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) { 446 std::string Tmp(replaceDotDot(FilePath)); 447 std::replace(Tmp.begin(), Tmp.end(), '\\', '/'); 448 StringRef Tmp2(Tmp); 449 if (Tmp2.starts_with("./")) 450 Tmp = std::string(Tmp2.substr(2)); 451 return Tmp; 452 } 453 454 // Check for header file extension. 455 // If the file extension is .h, .inc, or missing, it's 456 // assumed to be a header. 457 // \param FileName The file name. Must not be a directory. 458 // \returns true if it has a header extension or no extension. 459 bool ModularizeUtilities::isHeader(StringRef FileName) { 460 StringRef Extension = llvm::sys::path::extension(FileName); 461 if (Extension.size() == 0) 462 return true; 463 if (Extension.equals_insensitive(".h")) 464 return true; 465 if (Extension.equals_insensitive(".inc")) 466 return true; 467 return false; 468 } 469 470 // Get directory path component from file path. 471 // \returns the component of the given path, which will be 472 // relative if the given path is relative, absolute if the 473 // given path is absolute, or "." if the path has no leading 474 // path component. 475 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) { 476 SmallString<256> Directory(Path); 477 sys::path::remove_filename(Directory); 478 if (Directory.size() == 0) 479 return "."; 480 return std::string(Directory); 481 } 482 483 // Add unique problem file. 484 // Also standardizes the path. 485 void ModularizeUtilities::addUniqueProblemFile(std::string FilePath) { 486 FilePath = getCanonicalPath(FilePath); 487 // Don't add if already present. 488 for(auto &TestFilePath : ProblemFileNames) { 489 if (TestFilePath == FilePath) 490 return; 491 } 492 ProblemFileNames.push_back(FilePath); 493 } 494 495 // Add file with no compile errors. 496 // Also standardizes the path. 497 void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath) { 498 FilePath = getCanonicalPath(FilePath); 499 GoodFileNames.push_back(FilePath); 500 } 501 502 // List problem files. 503 void ModularizeUtilities::displayProblemFiles() { 504 errs() << "\nThese are the files with possible errors:\n\n"; 505 for (auto &ProblemFile : ProblemFileNames) { 506 errs() << ProblemFile << "\n"; 507 } 508 } 509 510 // List files with no problems. 511 void ModularizeUtilities::displayGoodFiles() { 512 errs() << "\nThese are the files with no detected errors:\n\n"; 513 for (auto &GoodFile : HeaderFileNames) { 514 bool Good = true; 515 for (auto &ProblemFile : ProblemFileNames) { 516 if (ProblemFile == GoodFile) { 517 Good = false; 518 break; 519 } 520 } 521 if (Good) 522 errs() << GoodFile << "\n"; 523 } 524 } 525 526 // List files with problem files commented out. 527 void ModularizeUtilities::displayCombinedFiles() { 528 errs() << 529 "\nThese are the combined files, with problem files preceded by #:\n\n"; 530 for (auto &File : HeaderFileNames) { 531 bool Good = true; 532 for (auto &ProblemFile : ProblemFileNames) { 533 if (ProblemFile == File) { 534 Good = false; 535 break; 536 } 537 } 538 errs() << (Good ? "" : "#") << File << "\n"; 539 } 540 } 541