xref: /llvm-project/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp (revision 209a1e8dfdf1c104dd53b50eb196d6bc0dd01659)
1 //===- ExtractAPI/ExtractAPIConsumer.cpp ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the ExtractAPIAction, and ASTConsumer to collect API
11 /// information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/AST/ASTConcept.h"
16 #include "clang/AST/ASTConsumer.h"
17 #include "clang/AST/ASTContext.h"
18 #include "clang/AST/DeclObjC.h"
19 #include "clang/Basic/DiagnosticFrontend.h"
20 #include "clang/Basic/FileEntry.h"
21 #include "clang/Basic/SourceLocation.h"
22 #include "clang/Basic/SourceManager.h"
23 #include "clang/Basic/TargetInfo.h"
24 #include "clang/ExtractAPI/API.h"
25 #include "clang/ExtractAPI/APIIgnoresList.h"
26 #include "clang/ExtractAPI/ExtractAPIVisitor.h"
27 #include "clang/ExtractAPI/FrontendActions.h"
28 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h"
29 #include "clang/Frontend/ASTConsumers.h"
30 #include "clang/Frontend/CompilerInstance.h"
31 #include "clang/Frontend/FrontendOptions.h"
32 #include "clang/Frontend/MultiplexConsumer.h"
33 #include "clang/InstallAPI/HeaderFile.h"
34 #include "clang/Lex/MacroInfo.h"
35 #include "clang/Lex/PPCallbacks.h"
36 #include "clang/Lex/Preprocessor.h"
37 #include "clang/Lex/PreprocessorOptions.h"
38 #include "llvm/ADT/DenseSet.h"
39 #include "llvm/ADT/STLExtras.h"
40 #include "llvm/ADT/SmallString.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/Error.h"
44 #include "llvm/Support/FileSystem.h"
45 #include "llvm/Support/MemoryBuffer.h"
46 #include "llvm/Support/Path.h"
47 #include "llvm/Support/Regex.h"
48 #include "llvm/Support/raw_ostream.h"
49 #include <memory>
50 #include <optional>
51 #include <utility>
52 
53 using namespace clang;
54 using namespace extractapi;
55 
56 namespace {
57 
58 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI,
59                                                   StringRef File,
60                                                   bool *IsQuoted = nullptr) {
61   assert(CI.hasFileManager() &&
62          "CompilerInstance does not have a FileNamager!");
63 
64   using namespace llvm::sys;
65   const auto &FS = CI.getVirtualFileSystem();
66 
67   SmallString<128> FilePath(File.begin(), File.end());
68   FS.makeAbsolute(FilePath);
69   path::remove_dots(FilePath, true);
70   FilePath = path::convert_to_slash(FilePath);
71   File = FilePath;
72 
73   // Checks whether `Dir` is a strict path prefix of `File`. If so returns
74   // the prefix length. Otherwise return 0.
75   auto CheckDir = [&](llvm::StringRef Dir) -> unsigned {
76     llvm::SmallString<32> DirPath(Dir.begin(), Dir.end());
77     FS.makeAbsolute(DirPath);
78     path::remove_dots(DirPath, true);
79     Dir = DirPath;
80     for (auto NI = path::begin(File), NE = path::end(File),
81               DI = path::begin(Dir), DE = path::end(Dir);
82          /*termination condition in loop*/; ++NI, ++DI) {
83       // '.' components in File are ignored.
84       while (NI != NE && *NI == ".")
85         ++NI;
86       if (NI == NE)
87         break;
88 
89       // '.' components in Dir are ignored.
90       while (DI != DE && *DI == ".")
91         ++DI;
92 
93       // Dir is a prefix of File, up to '.' components and choice of path
94       // separators.
95       if (DI == DE)
96         return NI - path::begin(File);
97 
98       // Consider all path separators equal.
99       if (NI->size() == 1 && DI->size() == 1 &&
100           path::is_separator(NI->front()) && path::is_separator(DI->front()))
101         continue;
102 
103       // Special case Apple .sdk folders since the search path is typically a
104       // symlink like `iPhoneSimulator14.5.sdk` while the file is instead
105       // located in `iPhoneSimulator.sdk` (the real folder).
106       if (NI->ends_with(".sdk") && DI->ends_with(".sdk")) {
107         StringRef NBasename = path::stem(*NI);
108         StringRef DBasename = path::stem(*DI);
109         if (DBasename.starts_with(NBasename))
110           continue;
111       }
112 
113       if (*NI != *DI)
114         break;
115     }
116     return 0;
117   };
118 
119   unsigned PrefixLength = 0;
120 
121   // Go through the search paths and find the first one that is a prefix of
122   // the header.
123   for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries) {
124     // Note whether the match is found in a quoted entry.
125     if (IsQuoted)
126       *IsQuoted = Entry.Group == frontend::Quoted;
127 
128     if (auto EntryFile = CI.getFileManager().getOptionalFileRef(Entry.Path)) {
129       if (auto HMap = HeaderMap::Create(*EntryFile, CI.getFileManager())) {
130         // If this is a headermap entry, try to reverse lookup the full path
131         // for a spelled name before mapping.
132         StringRef SpelledFilename = HMap->reverseLookupFilename(File);
133         if (!SpelledFilename.empty())
134           return SpelledFilename.str();
135 
136         // No matching mapping in this headermap, try next search entry.
137         continue;
138       }
139     }
140 
141     // Entry is a directory search entry, try to check if it's a prefix of File.
142     PrefixLength = CheckDir(Entry.Path);
143     if (PrefixLength > 0) {
144       // The header is found in a framework path, construct the framework-style
145       // include name `<Framework/Header.h>`
146       if (Entry.IsFramework) {
147         SmallVector<StringRef, 4> Matches;
148         clang::installapi::HeaderFile::getFrameworkIncludeRule().match(
149             File, &Matches);
150         // Returned matches are always in stable order.
151         if (Matches.size() != 4)
152           return std::nullopt;
153 
154         return path::convert_to_slash(
155             (Matches[1].drop_front(Matches[1].rfind('/') + 1) + "/" +
156              Matches[3])
157                 .str());
158       }
159 
160       // The header is found in a normal search path, strip the search path
161       // prefix to get an include name.
162       return path::convert_to_slash(File.drop_front(PrefixLength));
163     }
164   }
165 
166   // Couldn't determine a include name, use full path instead.
167   return std::nullopt;
168 }
169 
170 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI,
171                                                   FileEntryRef FE,
172                                                   bool *IsQuoted = nullptr) {
173   return getRelativeIncludeName(CI, FE.getNameAsRequested(), IsQuoted);
174 }
175 
176 struct LocationFileChecker {
177   bool operator()(SourceLocation Loc) {
178     // If the loc refers to a macro expansion we need to first get the file
179     // location of the expansion.
180     auto &SM = CI.getSourceManager();
181     auto FileLoc = SM.getFileLoc(Loc);
182     FileID FID = SM.getFileID(FileLoc);
183     if (FID.isInvalid())
184       return false;
185 
186     OptionalFileEntryRef File = SM.getFileEntryRefForID(FID);
187     if (!File)
188       return false;
189 
190     if (KnownFileEntries.count(*File))
191       return true;
192 
193     if (ExternalFileEntries.count(*File))
194       return false;
195 
196     // Try to reduce the include name the same way we tried to include it.
197     bool IsQuoted = false;
198     if (auto IncludeName = getRelativeIncludeName(CI, *File, &IsQuoted))
199       if (llvm::any_of(KnownFiles,
200                        [&IsQuoted, &IncludeName](const auto &KnownFile) {
201                          return KnownFile.first.equals(*IncludeName) &&
202                                 KnownFile.second == IsQuoted;
203                        })) {
204         KnownFileEntries.insert(*File);
205         return true;
206       }
207 
208     // Record that the file was not found to avoid future reverse lookup for
209     // the same file.
210     ExternalFileEntries.insert(*File);
211     return false;
212   }
213 
214   LocationFileChecker(const CompilerInstance &CI,
215                       SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles)
216       : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() {
217     for (const auto &KnownFile : KnownFiles)
218       if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first))
219         KnownFileEntries.insert(*FileEntry);
220   }
221 
222 private:
223   const CompilerInstance &CI;
224   SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles;
225   llvm::DenseSet<const FileEntry *> KnownFileEntries;
226   llvm::DenseSet<const FileEntry *> ExternalFileEntries;
227 };
228 
229 struct BatchExtractAPIVisitor : ExtractAPIVisitor<BatchExtractAPIVisitor> {
230   bool shouldDeclBeIncluded(const Decl *D) const {
231     bool ShouldBeIncluded = true;
232     // Check that we have the definition for redeclarable types.
233     if (auto *TD = llvm::dyn_cast<TagDecl>(D))
234       ShouldBeIncluded = TD->isThisDeclarationADefinition();
235     else if (auto *Interface = llvm::dyn_cast<ObjCInterfaceDecl>(D))
236       ShouldBeIncluded = Interface->isThisDeclarationADefinition();
237     else if (auto *Protocol = llvm::dyn_cast<ObjCProtocolDecl>(D))
238       ShouldBeIncluded = Protocol->isThisDeclarationADefinition();
239 
240     ShouldBeIncluded = ShouldBeIncluded && LCF(D->getLocation());
241     return ShouldBeIncluded;
242   }
243 
244   BatchExtractAPIVisitor(LocationFileChecker &LCF, ASTContext &Context,
245                          APISet &API)
246       : ExtractAPIVisitor<BatchExtractAPIVisitor>(Context, API), LCF(LCF) {}
247 
248 private:
249   LocationFileChecker &LCF;
250 };
251 
252 class WrappingExtractAPIConsumer : public ASTConsumer {
253 public:
254   WrappingExtractAPIConsumer(ASTContext &Context, APISet &API)
255       : Visitor(Context, API) {}
256 
257   void HandleTranslationUnit(ASTContext &Context) override {
258     // Use ExtractAPIVisitor to traverse symbol declarations in the context.
259     Visitor.TraverseDecl(Context.getTranslationUnitDecl());
260   }
261 
262 private:
263   ExtractAPIVisitor<> Visitor;
264 };
265 
266 class ExtractAPIConsumer : public ASTConsumer {
267 public:
268   ExtractAPIConsumer(ASTContext &Context,
269                      std::unique_ptr<LocationFileChecker> LCF, APISet &API)
270       : Visitor(*LCF, Context, API), LCF(std::move(LCF)) {}
271 
272   void HandleTranslationUnit(ASTContext &Context) override {
273     // Use ExtractAPIVisitor to traverse symbol declarations in the context.
274     Visitor.TraverseDecl(Context.getTranslationUnitDecl());
275   }
276 
277 private:
278   BatchExtractAPIVisitor Visitor;
279   std::unique_ptr<LocationFileChecker> LCF;
280 };
281 
282 class MacroCallback : public PPCallbacks {
283 public:
284   MacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP)
285       : SM(SM), API(API), PP(PP) {}
286 
287   void MacroDefined(const Token &MacroNameToken,
288                     const MacroDirective *MD) override {
289     auto *MacroInfo = MD->getMacroInfo();
290 
291     if (MacroInfo->isBuiltinMacro())
292       return;
293 
294     auto SourceLoc = MacroNameToken.getLocation();
295     if (SM.isWrittenInBuiltinFile(SourceLoc) ||
296         SM.isWrittenInCommandLineFile(SourceLoc))
297       return;
298 
299     PendingMacros.emplace_back(MacroNameToken, MD);
300   }
301 
302   // If a macro gets undefined at some point during preprocessing of the inputs
303   // it means that it isn't an exposed API and we should therefore not add a
304   // macro definition for it.
305   void MacroUndefined(const Token &MacroNameToken, const MacroDefinition &MD,
306                       const MacroDirective *Undef) override {
307     // If this macro wasn't previously defined we don't need to do anything
308     // here.
309     if (!Undef)
310       return;
311 
312     llvm::erase_if(PendingMacros, [&MD, this](const PendingMacro &PM) {
313       return MD.getMacroInfo()->isIdenticalTo(*PM.MD->getMacroInfo(), PP,
314                                               /*Syntactically*/ false);
315     });
316   }
317 
318   void EndOfMainFile() override {
319     for (auto &PM : PendingMacros) {
320       // `isUsedForHeaderGuard` is only set when the preprocessor leaves the
321       // file so check for it here.
322       if (PM.MD->getMacroInfo()->isUsedForHeaderGuard())
323         continue;
324 
325       if (!shouldMacroBeIncluded(PM))
326         continue;
327 
328       StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName();
329       PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation());
330       StringRef USR =
331           API.recordUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM);
332 
333       API.addMacroDefinition(
334           Name, USR, Loc,
335           DeclarationFragmentsBuilder::getFragmentsForMacro(Name, PM.MD),
336           DeclarationFragmentsBuilder::getSubHeadingForMacro(Name),
337           SM.isInSystemHeader(PM.MacroNameToken.getLocation()));
338     }
339 
340     PendingMacros.clear();
341   }
342 
343 protected:
344   struct PendingMacro {
345     Token MacroNameToken;
346     const MacroDirective *MD;
347 
348     PendingMacro(const Token &MacroNameToken, const MacroDirective *MD)
349         : MacroNameToken(MacroNameToken), MD(MD) {}
350   };
351 
352   virtual bool shouldMacroBeIncluded(const PendingMacro &PM) { return true; }
353 
354   const SourceManager &SM;
355   APISet &API;
356   Preprocessor &PP;
357   llvm::SmallVector<PendingMacro> PendingMacros;
358 };
359 
360 class APIMacroCallback : public MacroCallback {
361 public:
362   APIMacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP,
363                    LocationFileChecker &LCF)
364       : MacroCallback(SM, API, PP), LCF(LCF) {}
365 
366   bool shouldMacroBeIncluded(const PendingMacro &PM) override {
367     // Do not include macros from external files
368     return LCF(PM.MacroNameToken.getLocation());
369   }
370 
371 private:
372   LocationFileChecker &LCF;
373 };
374 
375 } // namespace
376 
377 void ExtractAPIActionBase::ImplEndSourceFileAction() {
378   if (!OS)
379     return;
380 
381   // Setup a SymbolGraphSerializer to write out collected API information in
382   // the Symbol Graph format.
383   // FIXME: Make the kind of APISerializer configurable.
384   SymbolGraphSerializer SGSerializer(*API, IgnoresList);
385   SGSerializer.serialize(*OS);
386   OS.reset();
387 }
388 
389 std::unique_ptr<raw_pwrite_stream>
390 ExtractAPIAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile) {
391   std::unique_ptr<raw_pwrite_stream> OS;
392   OS = CI.createDefaultOutputFile(/*Binary=*/false, InFile,
393                                   /*Extension=*/"json",
394                                   /*RemoveFileOnSignal=*/false);
395   if (!OS)
396     return nullptr;
397   return OS;
398 }
399 
400 std::unique_ptr<ASTConsumer>
401 ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
402   OS = CreateOutputFile(CI, InFile);
403 
404   if (!OS)
405     return nullptr;
406 
407   auto ProductName = CI.getFrontendOpts().ProductName;
408 
409   // Now that we have enough information about the language options and the
410   // target triple, let's create the APISet before anyone uses it.
411   API = std::make_unique<APISet>(
412       CI.getTarget().getTriple(),
413       CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName);
414 
415   auto LCF = std::make_unique<LocationFileChecker>(CI, KnownInputFiles);
416 
417   CI.getPreprocessor().addPPCallbacks(std::make_unique<APIMacroCallback>(
418       CI.getSourceManager(), *API, CI.getPreprocessor(), *LCF));
419 
420   // Do not include location in anonymous decls.
421   PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy();
422   Policy.AnonymousTagLocations = false;
423   CI.getASTContext().setPrintingPolicy(Policy);
424 
425   if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) {
426     llvm::handleAllErrors(
427         APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList,
428                                CI.getFileManager())
429             .moveInto(IgnoresList),
430         [&CI](const IgnoresFileNotFound &Err) {
431           CI.getDiagnostics().Report(
432               diag::err_extract_api_ignores_file_not_found)
433               << Err.Path;
434         });
435   }
436 
437   return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(),
438                                               std::move(LCF), *API);
439 }
440 
441 bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) {
442   auto &Inputs = CI.getFrontendOpts().Inputs;
443   if (Inputs.empty())
444     return true;
445 
446   if (!CI.hasFileManager())
447     if (!CI.createFileManager())
448       return false;
449 
450   auto Kind = Inputs[0].getKind();
451 
452   // Convert the header file inputs into a single input buffer.
453   SmallString<256> HeaderContents;
454   bool IsQuoted = false;
455   for (const FrontendInputFile &FIF : Inputs) {
456     if (Kind.isObjectiveC())
457       HeaderContents += "#import";
458     else
459       HeaderContents += "#include";
460 
461     StringRef FilePath = FIF.getFile();
462     if (auto RelativeName = getRelativeIncludeName(CI, FilePath, &IsQuoted)) {
463       if (IsQuoted)
464         HeaderContents += " \"";
465       else
466         HeaderContents += " <";
467 
468       HeaderContents += *RelativeName;
469 
470       if (IsQuoted)
471         HeaderContents += "\"\n";
472       else
473         HeaderContents += ">\n";
474       KnownInputFiles.emplace_back(static_cast<SmallString<32>>(*RelativeName),
475                                    IsQuoted);
476     } else {
477       HeaderContents += " \"";
478       HeaderContents += FilePath;
479       HeaderContents += "\"\n";
480       KnownInputFiles.emplace_back(FilePath, true);
481     }
482   }
483 
484   if (CI.getHeaderSearchOpts().Verbose)
485     CI.getVerboseOutputStream() << getInputBufferName() << ":\n"
486                                 << HeaderContents << "\n";
487 
488   Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents,
489                                                 getInputBufferName());
490 
491   // Set that buffer up as our "real" input in the CompilerInstance.
492   Inputs.clear();
493   Inputs.emplace_back(Buffer->getMemBufferRef(), Kind, /*IsSystem*/ false);
494 
495   return true;
496 }
497 
498 void ExtractAPIAction::EndSourceFileAction() { ImplEndSourceFileAction(); }
499 
500 std::unique_ptr<ASTConsumer>
501 WrappingExtractAPIAction::CreateASTConsumer(CompilerInstance &CI,
502                                             StringRef InFile) {
503   auto OtherConsumer = WrapperFrontendAction::CreateASTConsumer(CI, InFile);
504   if (!OtherConsumer)
505     return nullptr;
506 
507   CreatedASTConsumer = true;
508 
509   OS = CreateOutputFile(CI, InFile);
510   if (!OS)
511     return nullptr;
512 
513   auto ProductName = CI.getFrontendOpts().ProductName;
514 
515   // Now that we have enough information about the language options and the
516   // target triple, let's create the APISet before anyone uses it.
517   API = std::make_unique<APISet>(
518       CI.getTarget().getTriple(),
519       CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName);
520 
521   CI.getPreprocessor().addPPCallbacks(std::make_unique<MacroCallback>(
522       CI.getSourceManager(), *API, CI.getPreprocessor()));
523 
524   // Do not include location in anonymous decls.
525   PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy();
526   Policy.AnonymousTagLocations = false;
527   CI.getASTContext().setPrintingPolicy(Policy);
528 
529   if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) {
530     llvm::handleAllErrors(
531         APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList,
532                                CI.getFileManager())
533             .moveInto(IgnoresList),
534         [&CI](const IgnoresFileNotFound &Err) {
535           CI.getDiagnostics().Report(
536               diag::err_extract_api_ignores_file_not_found)
537               << Err.Path;
538         });
539   }
540 
541   auto WrappingConsumer =
542       std::make_unique<WrappingExtractAPIConsumer>(CI.getASTContext(), *API);
543   std::vector<std::unique_ptr<ASTConsumer>> Consumers;
544   Consumers.push_back(std::move(OtherConsumer));
545   Consumers.push_back(std::move(WrappingConsumer));
546 
547   return std::make_unique<MultiplexConsumer>(std::move(Consumers));
548 }
549 
550 void WrappingExtractAPIAction::EndSourceFileAction() {
551   // Invoke wrapped action's method.
552   WrapperFrontendAction::EndSourceFileAction();
553 
554   if (CreatedASTConsumer) {
555     ImplEndSourceFileAction();
556   }
557 }
558 
559 std::unique_ptr<raw_pwrite_stream>
560 WrappingExtractAPIAction::CreateOutputFile(CompilerInstance &CI,
561                                            StringRef InFile) {
562   std::unique_ptr<raw_pwrite_stream> OS;
563   std::string OutputDir = CI.getFrontendOpts().SymbolGraphOutputDir;
564 
565   // The symbol graphs need to be generated as a side effect of regular
566   // compilation so the output should be dumped in the directory provided with
567   // the command line option.
568   llvm::SmallString<128> OutFilePath(OutputDir);
569   auto Seperator = llvm::sys::path::get_separator();
570   auto Infilename = llvm::sys::path::filename(InFile);
571   OutFilePath.append({Seperator, Infilename});
572   llvm::sys::path::replace_extension(OutFilePath, "json");
573   // StringRef outputFilePathref = *OutFilePath;
574 
575   // don't use the default output file
576   OS = CI.createOutputFile(/*OutputPath=*/OutFilePath, /*Binary=*/false,
577                            /*RemoveFileOnSignal=*/true,
578                            /*UseTemporary=*/true,
579                            /*CreateMissingDirectories=*/true);
580   if (!OS)
581     return nullptr;
582   return OS;
583 }
584