1 //===- ExtractAPI/ExtractAPIConsumer.cpp ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the ExtractAPIAction, and ASTConsumer to collect API 11 /// information. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/AST/ASTConcept.h" 16 #include "clang/AST/ASTConsumer.h" 17 #include "clang/AST/ASTContext.h" 18 #include "clang/AST/DeclObjC.h" 19 #include "clang/Basic/DiagnosticFrontend.h" 20 #include "clang/Basic/FileEntry.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Basic/TargetInfo.h" 24 #include "clang/ExtractAPI/API.h" 25 #include "clang/ExtractAPI/APIIgnoresList.h" 26 #include "clang/ExtractAPI/ExtractAPIVisitor.h" 27 #include "clang/ExtractAPI/FrontendActions.h" 28 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h" 29 #include "clang/Frontend/ASTConsumers.h" 30 #include "clang/Frontend/CompilerInstance.h" 31 #include "clang/Frontend/FrontendOptions.h" 32 #include "clang/Frontend/MultiplexConsumer.h" 33 #include "clang/InstallAPI/HeaderFile.h" 34 #include "clang/Lex/MacroInfo.h" 35 #include "clang/Lex/PPCallbacks.h" 36 #include "clang/Lex/Preprocessor.h" 37 #include "clang/Lex/PreprocessorOptions.h" 38 #include "llvm/ADT/DenseSet.h" 39 #include "llvm/ADT/STLExtras.h" 40 #include "llvm/ADT/SmallString.h" 41 #include "llvm/ADT/SmallVector.h" 42 #include "llvm/Support/Casting.h" 43 #include "llvm/Support/Error.h" 44 #include "llvm/Support/FileSystem.h" 45 #include "llvm/Support/MemoryBuffer.h" 46 #include "llvm/Support/Path.h" 47 #include "llvm/Support/Regex.h" 48 #include "llvm/Support/raw_ostream.h" 49 #include <memory> 50 #include <optional> 51 #include <utility> 52 53 using namespace clang; 54 using namespace extractapi; 55 56 namespace { 57 58 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI, 59 StringRef File, 60 bool *IsQuoted = nullptr) { 61 assert(CI.hasFileManager() && 62 "CompilerInstance does not have a FileNamager!"); 63 64 using namespace llvm::sys; 65 const auto &FS = CI.getVirtualFileSystem(); 66 67 SmallString<128> FilePath(File.begin(), File.end()); 68 FS.makeAbsolute(FilePath); 69 path::remove_dots(FilePath, true); 70 FilePath = path::convert_to_slash(FilePath); 71 File = FilePath; 72 73 // Checks whether `Dir` is a strict path prefix of `File`. If so returns 74 // the prefix length. Otherwise return 0. 75 auto CheckDir = [&](llvm::StringRef Dir) -> unsigned { 76 llvm::SmallString<32> DirPath(Dir.begin(), Dir.end()); 77 FS.makeAbsolute(DirPath); 78 path::remove_dots(DirPath, true); 79 Dir = DirPath; 80 for (auto NI = path::begin(File), NE = path::end(File), 81 DI = path::begin(Dir), DE = path::end(Dir); 82 /*termination condition in loop*/; ++NI, ++DI) { 83 // '.' components in File are ignored. 84 while (NI != NE && *NI == ".") 85 ++NI; 86 if (NI == NE) 87 break; 88 89 // '.' components in Dir are ignored. 90 while (DI != DE && *DI == ".") 91 ++DI; 92 93 // Dir is a prefix of File, up to '.' components and choice of path 94 // separators. 95 if (DI == DE) 96 return NI - path::begin(File); 97 98 // Consider all path separators equal. 99 if (NI->size() == 1 && DI->size() == 1 && 100 path::is_separator(NI->front()) && path::is_separator(DI->front())) 101 continue; 102 103 // Special case Apple .sdk folders since the search path is typically a 104 // symlink like `iPhoneSimulator14.5.sdk` while the file is instead 105 // located in `iPhoneSimulator.sdk` (the real folder). 106 if (NI->ends_with(".sdk") && DI->ends_with(".sdk")) { 107 StringRef NBasename = path::stem(*NI); 108 StringRef DBasename = path::stem(*DI); 109 if (DBasename.starts_with(NBasename)) 110 continue; 111 } 112 113 if (*NI != *DI) 114 break; 115 } 116 return 0; 117 }; 118 119 unsigned PrefixLength = 0; 120 121 // Go through the search paths and find the first one that is a prefix of 122 // the header. 123 for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries) { 124 // Note whether the match is found in a quoted entry. 125 if (IsQuoted) 126 *IsQuoted = Entry.Group == frontend::Quoted; 127 128 if (auto EntryFile = CI.getFileManager().getOptionalFileRef(Entry.Path)) { 129 if (auto HMap = HeaderMap::Create(*EntryFile, CI.getFileManager())) { 130 // If this is a headermap entry, try to reverse lookup the full path 131 // for a spelled name before mapping. 132 StringRef SpelledFilename = HMap->reverseLookupFilename(File); 133 if (!SpelledFilename.empty()) 134 return SpelledFilename.str(); 135 136 // No matching mapping in this headermap, try next search entry. 137 continue; 138 } 139 } 140 141 // Entry is a directory search entry, try to check if it's a prefix of File. 142 PrefixLength = CheckDir(Entry.Path); 143 if (PrefixLength > 0) { 144 // The header is found in a framework path, construct the framework-style 145 // include name `<Framework/Header.h>` 146 if (Entry.IsFramework) { 147 SmallVector<StringRef, 4> Matches; 148 clang::installapi::HeaderFile::getFrameworkIncludeRule().match( 149 File, &Matches); 150 // Returned matches are always in stable order. 151 if (Matches.size() != 4) 152 return std::nullopt; 153 154 return path::convert_to_slash( 155 (Matches[1].drop_front(Matches[1].rfind('/') + 1) + "/" + 156 Matches[3]) 157 .str()); 158 } 159 160 // The header is found in a normal search path, strip the search path 161 // prefix to get an include name. 162 return path::convert_to_slash(File.drop_front(PrefixLength)); 163 } 164 } 165 166 // Couldn't determine a include name, use full path instead. 167 return std::nullopt; 168 } 169 170 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI, 171 FileEntryRef FE, 172 bool *IsQuoted = nullptr) { 173 return getRelativeIncludeName(CI, FE.getNameAsRequested(), IsQuoted); 174 } 175 176 struct LocationFileChecker { 177 bool operator()(SourceLocation Loc) { 178 // If the loc refers to a macro expansion we need to first get the file 179 // location of the expansion. 180 auto &SM = CI.getSourceManager(); 181 auto FileLoc = SM.getFileLoc(Loc); 182 FileID FID = SM.getFileID(FileLoc); 183 if (FID.isInvalid()) 184 return false; 185 186 OptionalFileEntryRef File = SM.getFileEntryRefForID(FID); 187 if (!File) 188 return false; 189 190 if (KnownFileEntries.count(*File)) 191 return true; 192 193 if (ExternalFileEntries.count(*File)) 194 return false; 195 196 // Try to reduce the include name the same way we tried to include it. 197 bool IsQuoted = false; 198 if (auto IncludeName = getRelativeIncludeName(CI, *File, &IsQuoted)) 199 if (llvm::any_of(KnownFiles, 200 [&IsQuoted, &IncludeName](const auto &KnownFile) { 201 return KnownFile.first.equals(*IncludeName) && 202 KnownFile.second == IsQuoted; 203 })) { 204 KnownFileEntries.insert(*File); 205 return true; 206 } 207 208 // Record that the file was not found to avoid future reverse lookup for 209 // the same file. 210 ExternalFileEntries.insert(*File); 211 return false; 212 } 213 214 LocationFileChecker(const CompilerInstance &CI, 215 SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles) 216 : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() { 217 for (const auto &KnownFile : KnownFiles) 218 if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first)) 219 KnownFileEntries.insert(*FileEntry); 220 } 221 222 private: 223 const CompilerInstance &CI; 224 SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles; 225 llvm::DenseSet<const FileEntry *> KnownFileEntries; 226 llvm::DenseSet<const FileEntry *> ExternalFileEntries; 227 }; 228 229 struct BatchExtractAPIVisitor : ExtractAPIVisitor<BatchExtractAPIVisitor> { 230 bool shouldDeclBeIncluded(const Decl *D) const { 231 bool ShouldBeIncluded = true; 232 // Check that we have the definition for redeclarable types. 233 if (auto *TD = llvm::dyn_cast<TagDecl>(D)) 234 ShouldBeIncluded = TD->isThisDeclarationADefinition(); 235 else if (auto *Interface = llvm::dyn_cast<ObjCInterfaceDecl>(D)) 236 ShouldBeIncluded = Interface->isThisDeclarationADefinition(); 237 else if (auto *Protocol = llvm::dyn_cast<ObjCProtocolDecl>(D)) 238 ShouldBeIncluded = Protocol->isThisDeclarationADefinition(); 239 240 ShouldBeIncluded = ShouldBeIncluded && LCF(D->getLocation()); 241 return ShouldBeIncluded; 242 } 243 244 BatchExtractAPIVisitor(LocationFileChecker &LCF, ASTContext &Context, 245 APISet &API) 246 : ExtractAPIVisitor<BatchExtractAPIVisitor>(Context, API), LCF(LCF) {} 247 248 private: 249 LocationFileChecker &LCF; 250 }; 251 252 class WrappingExtractAPIConsumer : public ASTConsumer { 253 public: 254 WrappingExtractAPIConsumer(ASTContext &Context, APISet &API) 255 : Visitor(Context, API) {} 256 257 void HandleTranslationUnit(ASTContext &Context) override { 258 // Use ExtractAPIVisitor to traverse symbol declarations in the context. 259 Visitor.TraverseDecl(Context.getTranslationUnitDecl()); 260 } 261 262 private: 263 ExtractAPIVisitor<> Visitor; 264 }; 265 266 class ExtractAPIConsumer : public ASTConsumer { 267 public: 268 ExtractAPIConsumer(ASTContext &Context, 269 std::unique_ptr<LocationFileChecker> LCF, APISet &API) 270 : Visitor(*LCF, Context, API), LCF(std::move(LCF)) {} 271 272 void HandleTranslationUnit(ASTContext &Context) override { 273 // Use ExtractAPIVisitor to traverse symbol declarations in the context. 274 Visitor.TraverseDecl(Context.getTranslationUnitDecl()); 275 } 276 277 private: 278 BatchExtractAPIVisitor Visitor; 279 std::unique_ptr<LocationFileChecker> LCF; 280 }; 281 282 class MacroCallback : public PPCallbacks { 283 public: 284 MacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP) 285 : SM(SM), API(API), PP(PP) {} 286 287 void MacroDefined(const Token &MacroNameToken, 288 const MacroDirective *MD) override { 289 auto *MacroInfo = MD->getMacroInfo(); 290 291 if (MacroInfo->isBuiltinMacro()) 292 return; 293 294 auto SourceLoc = MacroNameToken.getLocation(); 295 if (SM.isWrittenInBuiltinFile(SourceLoc) || 296 SM.isWrittenInCommandLineFile(SourceLoc)) 297 return; 298 299 PendingMacros.emplace_back(MacroNameToken, MD); 300 } 301 302 // If a macro gets undefined at some point during preprocessing of the inputs 303 // it means that it isn't an exposed API and we should therefore not add a 304 // macro definition for it. 305 void MacroUndefined(const Token &MacroNameToken, const MacroDefinition &MD, 306 const MacroDirective *Undef) override { 307 // If this macro wasn't previously defined we don't need to do anything 308 // here. 309 if (!Undef) 310 return; 311 312 llvm::erase_if(PendingMacros, [&MD, this](const PendingMacro &PM) { 313 return MD.getMacroInfo()->isIdenticalTo(*PM.MD->getMacroInfo(), PP, 314 /*Syntactically*/ false); 315 }); 316 } 317 318 void EndOfMainFile() override { 319 for (auto &PM : PendingMacros) { 320 // `isUsedForHeaderGuard` is only set when the preprocessor leaves the 321 // file so check for it here. 322 if (PM.MD->getMacroInfo()->isUsedForHeaderGuard()) 323 continue; 324 325 if (!shouldMacroBeIncluded(PM)) 326 continue; 327 328 StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName(); 329 PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation()); 330 StringRef USR = 331 API.recordUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM); 332 333 API.addMacroDefinition( 334 Name, USR, Loc, 335 DeclarationFragmentsBuilder::getFragmentsForMacro(Name, PM.MD), 336 DeclarationFragmentsBuilder::getSubHeadingForMacro(Name), 337 SM.isInSystemHeader(PM.MacroNameToken.getLocation())); 338 } 339 340 PendingMacros.clear(); 341 } 342 343 protected: 344 struct PendingMacro { 345 Token MacroNameToken; 346 const MacroDirective *MD; 347 348 PendingMacro(const Token &MacroNameToken, const MacroDirective *MD) 349 : MacroNameToken(MacroNameToken), MD(MD) {} 350 }; 351 352 virtual bool shouldMacroBeIncluded(const PendingMacro &PM) { return true; } 353 354 const SourceManager &SM; 355 APISet &API; 356 Preprocessor &PP; 357 llvm::SmallVector<PendingMacro> PendingMacros; 358 }; 359 360 class APIMacroCallback : public MacroCallback { 361 public: 362 APIMacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP, 363 LocationFileChecker &LCF) 364 : MacroCallback(SM, API, PP), LCF(LCF) {} 365 366 bool shouldMacroBeIncluded(const PendingMacro &PM) override { 367 // Do not include macros from external files 368 return LCF(PM.MacroNameToken.getLocation()); 369 } 370 371 private: 372 LocationFileChecker &LCF; 373 }; 374 375 } // namespace 376 377 void ExtractAPIActionBase::ImplEndSourceFileAction() { 378 if (!OS) 379 return; 380 381 // Setup a SymbolGraphSerializer to write out collected API information in 382 // the Symbol Graph format. 383 // FIXME: Make the kind of APISerializer configurable. 384 SymbolGraphSerializer SGSerializer(*API, IgnoresList); 385 SGSerializer.serialize(*OS); 386 OS.reset(); 387 } 388 389 std::unique_ptr<raw_pwrite_stream> 390 ExtractAPIAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile) { 391 std::unique_ptr<raw_pwrite_stream> OS; 392 OS = CI.createDefaultOutputFile(/*Binary=*/false, InFile, 393 /*Extension=*/"json", 394 /*RemoveFileOnSignal=*/false); 395 if (!OS) 396 return nullptr; 397 return OS; 398 } 399 400 std::unique_ptr<ASTConsumer> 401 ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { 402 OS = CreateOutputFile(CI, InFile); 403 404 if (!OS) 405 return nullptr; 406 407 auto ProductName = CI.getFrontendOpts().ProductName; 408 409 // Now that we have enough information about the language options and the 410 // target triple, let's create the APISet before anyone uses it. 411 API = std::make_unique<APISet>( 412 CI.getTarget().getTriple(), 413 CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName); 414 415 auto LCF = std::make_unique<LocationFileChecker>(CI, KnownInputFiles); 416 417 CI.getPreprocessor().addPPCallbacks(std::make_unique<APIMacroCallback>( 418 CI.getSourceManager(), *API, CI.getPreprocessor(), *LCF)); 419 420 // Do not include location in anonymous decls. 421 PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy(); 422 Policy.AnonymousTagLocations = false; 423 CI.getASTContext().setPrintingPolicy(Policy); 424 425 if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) { 426 llvm::handleAllErrors( 427 APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList, 428 CI.getFileManager()) 429 .moveInto(IgnoresList), 430 [&CI](const IgnoresFileNotFound &Err) { 431 CI.getDiagnostics().Report( 432 diag::err_extract_api_ignores_file_not_found) 433 << Err.Path; 434 }); 435 } 436 437 return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(), 438 std::move(LCF), *API); 439 } 440 441 bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) { 442 auto &Inputs = CI.getFrontendOpts().Inputs; 443 if (Inputs.empty()) 444 return true; 445 446 if (!CI.hasFileManager()) 447 if (!CI.createFileManager()) 448 return false; 449 450 auto Kind = Inputs[0].getKind(); 451 452 // Convert the header file inputs into a single input buffer. 453 SmallString<256> HeaderContents; 454 bool IsQuoted = false; 455 for (const FrontendInputFile &FIF : Inputs) { 456 if (Kind.isObjectiveC()) 457 HeaderContents += "#import"; 458 else 459 HeaderContents += "#include"; 460 461 StringRef FilePath = FIF.getFile(); 462 if (auto RelativeName = getRelativeIncludeName(CI, FilePath, &IsQuoted)) { 463 if (IsQuoted) 464 HeaderContents += " \""; 465 else 466 HeaderContents += " <"; 467 468 HeaderContents += *RelativeName; 469 470 if (IsQuoted) 471 HeaderContents += "\"\n"; 472 else 473 HeaderContents += ">\n"; 474 KnownInputFiles.emplace_back(static_cast<SmallString<32>>(*RelativeName), 475 IsQuoted); 476 } else { 477 HeaderContents += " \""; 478 HeaderContents += FilePath; 479 HeaderContents += "\"\n"; 480 KnownInputFiles.emplace_back(FilePath, true); 481 } 482 } 483 484 if (CI.getHeaderSearchOpts().Verbose) 485 CI.getVerboseOutputStream() << getInputBufferName() << ":\n" 486 << HeaderContents << "\n"; 487 488 Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents, 489 getInputBufferName()); 490 491 // Set that buffer up as our "real" input in the CompilerInstance. 492 Inputs.clear(); 493 Inputs.emplace_back(Buffer->getMemBufferRef(), Kind, /*IsSystem*/ false); 494 495 return true; 496 } 497 498 void ExtractAPIAction::EndSourceFileAction() { ImplEndSourceFileAction(); } 499 500 std::unique_ptr<ASTConsumer> 501 WrappingExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, 502 StringRef InFile) { 503 auto OtherConsumer = WrapperFrontendAction::CreateASTConsumer(CI, InFile); 504 if (!OtherConsumer) 505 return nullptr; 506 507 CreatedASTConsumer = true; 508 509 OS = CreateOutputFile(CI, InFile); 510 if (!OS) 511 return nullptr; 512 513 auto ProductName = CI.getFrontendOpts().ProductName; 514 515 // Now that we have enough information about the language options and the 516 // target triple, let's create the APISet before anyone uses it. 517 API = std::make_unique<APISet>( 518 CI.getTarget().getTriple(), 519 CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName); 520 521 CI.getPreprocessor().addPPCallbacks(std::make_unique<MacroCallback>( 522 CI.getSourceManager(), *API, CI.getPreprocessor())); 523 524 // Do not include location in anonymous decls. 525 PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy(); 526 Policy.AnonymousTagLocations = false; 527 CI.getASTContext().setPrintingPolicy(Policy); 528 529 if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) { 530 llvm::handleAllErrors( 531 APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList, 532 CI.getFileManager()) 533 .moveInto(IgnoresList), 534 [&CI](const IgnoresFileNotFound &Err) { 535 CI.getDiagnostics().Report( 536 diag::err_extract_api_ignores_file_not_found) 537 << Err.Path; 538 }); 539 } 540 541 auto WrappingConsumer = 542 std::make_unique<WrappingExtractAPIConsumer>(CI.getASTContext(), *API); 543 std::vector<std::unique_ptr<ASTConsumer>> Consumers; 544 Consumers.push_back(std::move(OtherConsumer)); 545 Consumers.push_back(std::move(WrappingConsumer)); 546 547 return std::make_unique<MultiplexConsumer>(std::move(Consumers)); 548 } 549 550 void WrappingExtractAPIAction::EndSourceFileAction() { 551 // Invoke wrapped action's method. 552 WrapperFrontendAction::EndSourceFileAction(); 553 554 if (CreatedASTConsumer) { 555 ImplEndSourceFileAction(); 556 } 557 } 558 559 std::unique_ptr<raw_pwrite_stream> 560 WrappingExtractAPIAction::CreateOutputFile(CompilerInstance &CI, 561 StringRef InFile) { 562 std::unique_ptr<raw_pwrite_stream> OS; 563 std::string OutputDir = CI.getFrontendOpts().SymbolGraphOutputDir; 564 565 // The symbol graphs need to be generated as a side effect of regular 566 // compilation so the output should be dumped in the directory provided with 567 // the command line option. 568 llvm::SmallString<128> OutFilePath(OutputDir); 569 auto Seperator = llvm::sys::path::get_separator(); 570 auto Infilename = llvm::sys::path::filename(InFile); 571 OutFilePath.append({Seperator, Infilename}); 572 llvm::sys::path::replace_extension(OutFilePath, "json"); 573 // StringRef outputFilePathref = *OutFilePath; 574 575 // don't use the default output file 576 OS = CI.createOutputFile(/*OutputPath=*/OutFilePath, /*Binary=*/false, 577 /*RemoveFileOnSignal=*/true, 578 /*UseTemporary=*/true, 579 /*CreateMissingDirectories=*/true); 580 if (!OS) 581 return nullptr; 582 return OS; 583 } 584