xref: /llvm-project/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp (revision 5a3130e3b645cf5fc179d9274eb1b62b7f0c7438)
1 //===- ExtractAPI/ExtractAPIConsumer.cpp ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the ExtractAPIAction, and ASTConsumer to collect API
11 /// information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/AST/ASTConcept.h"
16 #include "clang/AST/ASTConsumer.h"
17 #include "clang/AST/ASTContext.h"
18 #include "clang/AST/DeclObjC.h"
19 #include "clang/Basic/DiagnosticFrontend.h"
20 #include "clang/Basic/SourceLocation.h"
21 #include "clang/Basic/SourceManager.h"
22 #include "clang/Basic/TargetInfo.h"
23 #include "clang/ExtractAPI/API.h"
24 #include "clang/ExtractAPI/APIIgnoresList.h"
25 #include "clang/ExtractAPI/ExtractAPIVisitor.h"
26 #include "clang/ExtractAPI/FrontendActions.h"
27 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h"
28 #include "clang/Frontend/ASTConsumers.h"
29 #include "clang/Frontend/CompilerInstance.h"
30 #include "clang/Frontend/FrontendOptions.h"
31 #include "clang/Frontend/MultiplexConsumer.h"
32 #include "clang/Lex/MacroInfo.h"
33 #include "clang/Lex/PPCallbacks.h"
34 #include "clang/Lex/Preprocessor.h"
35 #include "clang/Lex/PreprocessorOptions.h"
36 #include "llvm/ADT/DenseSet.h"
37 #include "llvm/ADT/STLExtras.h"
38 #include "llvm/ADT/SmallString.h"
39 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Error.h"
42 #include "llvm/Support/FileSystem.h"
43 #include "llvm/Support/MemoryBuffer.h"
44 #include "llvm/Support/Path.h"
45 #include "llvm/Support/Regex.h"
46 #include "llvm/Support/raw_ostream.h"
47 #include <memory>
48 #include <optional>
49 #include <utility>
50 
51 using namespace clang;
52 using namespace extractapi;
53 
54 namespace {
55 
56 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI,
57                                                   StringRef File,
58                                                   bool *IsQuoted = nullptr) {
59   assert(CI.hasFileManager() &&
60          "CompilerInstance does not have a FileNamager!");
61 
62   using namespace llvm::sys;
63   // Matches framework include patterns
64   const llvm::Regex Rule("/(.+)\\.framework/(.+)?Headers/(.+)");
65 
66   const auto &FS = CI.getVirtualFileSystem();
67 
68   SmallString<128> FilePath(File.begin(), File.end());
69   FS.makeAbsolute(FilePath);
70   path::remove_dots(FilePath, true);
71   FilePath = path::convert_to_slash(FilePath);
72   File = FilePath;
73 
74   // Checks whether `Dir` is a strict path prefix of `File`. If so returns
75   // the prefix length. Otherwise return 0.
76   auto CheckDir = [&](llvm::StringRef Dir) -> unsigned {
77     llvm::SmallString<32> DirPath(Dir.begin(), Dir.end());
78     FS.makeAbsolute(DirPath);
79     path::remove_dots(DirPath, true);
80     Dir = DirPath;
81     for (auto NI = path::begin(File), NE = path::end(File),
82               DI = path::begin(Dir), DE = path::end(Dir);
83          /*termination condition in loop*/; ++NI, ++DI) {
84       // '.' components in File are ignored.
85       while (NI != NE && *NI == ".")
86         ++NI;
87       if (NI == NE)
88         break;
89 
90       // '.' components in Dir are ignored.
91       while (DI != DE && *DI == ".")
92         ++DI;
93 
94       // Dir is a prefix of File, up to '.' components and choice of path
95       // separators.
96       if (DI == DE)
97         return NI - path::begin(File);
98 
99       // Consider all path separators equal.
100       if (NI->size() == 1 && DI->size() == 1 &&
101           path::is_separator(NI->front()) && path::is_separator(DI->front()))
102         continue;
103 
104       // Special case Apple .sdk folders since the search path is typically a
105       // symlink like `iPhoneSimulator14.5.sdk` while the file is instead
106       // located in `iPhoneSimulator.sdk` (the real folder).
107       if (NI->endswith(".sdk") && DI->endswith(".sdk")) {
108         StringRef NBasename = path::stem(*NI);
109         StringRef DBasename = path::stem(*DI);
110         if (DBasename.startswith(NBasename))
111           continue;
112       }
113 
114       if (*NI != *DI)
115         break;
116     }
117     return 0;
118   };
119 
120   unsigned PrefixLength = 0;
121 
122   // Go through the search paths and find the first one that is a prefix of
123   // the header.
124   for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries) {
125     // Note whether the match is found in a quoted entry.
126     if (IsQuoted)
127       *IsQuoted = Entry.Group == frontend::Quoted;
128 
129     if (auto EntryFile = CI.getFileManager().getOptionalFileRef(Entry.Path)) {
130       if (auto HMap = HeaderMap::Create(*EntryFile, CI.getFileManager())) {
131         // If this is a headermap entry, try to reverse lookup the full path
132         // for a spelled name before mapping.
133         StringRef SpelledFilename = HMap->reverseLookupFilename(File);
134         if (!SpelledFilename.empty())
135           return SpelledFilename.str();
136 
137         // No matching mapping in this headermap, try next search entry.
138         continue;
139       }
140     }
141 
142     // Entry is a directory search entry, try to check if it's a prefix of File.
143     PrefixLength = CheckDir(Entry.Path);
144     if (PrefixLength > 0) {
145       // The header is found in a framework path, construct the framework-style
146       // include name `<Framework/Header.h>`
147       if (Entry.IsFramework) {
148         SmallVector<StringRef, 4> Matches;
149         Rule.match(File, &Matches);
150         // Returned matches are always in stable order.
151         if (Matches.size() != 4)
152           return std::nullopt;
153 
154         return path::convert_to_slash(
155             (Matches[1].drop_front(Matches[1].rfind('/') + 1) + "/" +
156              Matches[3])
157                 .str());
158       }
159 
160       // The header is found in a normal search path, strip the search path
161       // prefix to get an include name.
162       return path::convert_to_slash(File.drop_front(PrefixLength));
163     }
164   }
165 
166   // Couldn't determine a include name, use full path instead.
167   return std::nullopt;
168 }
169 
170 struct LocationFileChecker {
171   bool operator()(SourceLocation Loc) {
172     // If the loc refers to a macro expansion we need to first get the file
173     // location of the expansion.
174     auto &SM = CI.getSourceManager();
175     auto FileLoc = SM.getFileLoc(Loc);
176     FileID FID = SM.getFileID(FileLoc);
177     if (FID.isInvalid())
178       return false;
179 
180     OptionalFileEntryRef File = SM.getFileEntryRefForID(FID);
181     if (!File)
182       return false;
183 
184     if (KnownFileEntries.count(*File))
185       return true;
186 
187     if (ExternalFileEntries.count(*File))
188       return false;
189 
190     StringRef FileName = SM.getFileManager().getCanonicalName(*File);
191 
192     // Try to reduce the include name the same way we tried to include it.
193     bool IsQuoted = false;
194     if (auto IncludeName = getRelativeIncludeName(CI, FileName, &IsQuoted))
195       if (llvm::any_of(KnownFiles,
196                        [&IsQuoted, &IncludeName](const auto &KnownFile) {
197                          return KnownFile.first.equals(*IncludeName) &&
198                                 KnownFile.second == IsQuoted;
199                        })) {
200         KnownFileEntries.insert(*File);
201         return true;
202       }
203 
204     // Record that the file was not found to avoid future reverse lookup for
205     // the same file.
206     ExternalFileEntries.insert(*File);
207     return false;
208   }
209 
210   LocationFileChecker(const CompilerInstance &CI,
211                       SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles)
212       : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() {
213     for (const auto &KnownFile : KnownFiles)
214       if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first))
215         KnownFileEntries.insert(*FileEntry);
216   }
217 
218 private:
219   const CompilerInstance &CI;
220   SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles;
221   llvm::DenseSet<const FileEntry *> KnownFileEntries;
222   llvm::DenseSet<const FileEntry *> ExternalFileEntries;
223 };
224 
225 struct BatchExtractAPIVisitor : ExtractAPIVisitor<BatchExtractAPIVisitor> {
226   bool shouldDeclBeIncluded(const Decl *D) const {
227     bool ShouldBeIncluded = true;
228     // Check that we have the definition for redeclarable types.
229     if (auto *TD = llvm::dyn_cast<TagDecl>(D))
230       ShouldBeIncluded = TD->isThisDeclarationADefinition();
231     else if (auto *Interface = llvm::dyn_cast<ObjCInterfaceDecl>(D))
232       ShouldBeIncluded = Interface->isThisDeclarationADefinition();
233     else if (auto *Protocol = llvm::dyn_cast<ObjCProtocolDecl>(D))
234       ShouldBeIncluded = Protocol->isThisDeclarationADefinition();
235 
236     ShouldBeIncluded = ShouldBeIncluded && LCF(D->getLocation());
237     return ShouldBeIncluded;
238   }
239 
240   BatchExtractAPIVisitor(LocationFileChecker &LCF, ASTContext &Context,
241                          APISet &API)
242       : ExtractAPIVisitor<BatchExtractAPIVisitor>(Context, API), LCF(LCF) {}
243 
244 private:
245   LocationFileChecker &LCF;
246 };
247 
248 class WrappingExtractAPIConsumer : public ASTConsumer {
249 public:
250   WrappingExtractAPIConsumer(ASTContext &Context, APISet &API)
251       : Visitor(Context, API) {}
252 
253   void HandleTranslationUnit(ASTContext &Context) override {
254     // Use ExtractAPIVisitor to traverse symbol declarations in the context.
255     Visitor.TraverseDecl(Context.getTranslationUnitDecl());
256   }
257 
258 private:
259   ExtractAPIVisitor<> Visitor;
260 };
261 
262 class ExtractAPIConsumer : public ASTConsumer {
263 public:
264   ExtractAPIConsumer(ASTContext &Context,
265                      std::unique_ptr<LocationFileChecker> LCF, APISet &API)
266       : Visitor(*LCF, Context, API), LCF(std::move(LCF)) {}
267 
268   void HandleTranslationUnit(ASTContext &Context) override {
269     // Use ExtractAPIVisitor to traverse symbol declarations in the context.
270     Visitor.TraverseDecl(Context.getTranslationUnitDecl());
271   }
272 
273 private:
274   BatchExtractAPIVisitor Visitor;
275   std::unique_ptr<LocationFileChecker> LCF;
276 };
277 
278 class MacroCallback : public PPCallbacks {
279 public:
280   MacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP)
281       : SM(SM), API(API), PP(PP) {}
282 
283   void MacroDefined(const Token &MacroNameToken,
284                     const MacroDirective *MD) override {
285     auto *MacroInfo = MD->getMacroInfo();
286 
287     if (MacroInfo->isBuiltinMacro())
288       return;
289 
290     auto SourceLoc = MacroNameToken.getLocation();
291     if (SM.isWrittenInBuiltinFile(SourceLoc) ||
292         SM.isWrittenInCommandLineFile(SourceLoc))
293       return;
294 
295     PendingMacros.emplace_back(MacroNameToken, MD);
296   }
297 
298   // If a macro gets undefined at some point during preprocessing of the inputs
299   // it means that it isn't an exposed API and we should therefore not add a
300   // macro definition for it.
301   void MacroUndefined(const Token &MacroNameToken, const MacroDefinition &MD,
302                       const MacroDirective *Undef) override {
303     // If this macro wasn't previously defined we don't need to do anything
304     // here.
305     if (!Undef)
306       return;
307 
308     llvm::erase_if(PendingMacros, [&MD, this](const PendingMacro &PM) {
309       return MD.getMacroInfo()->isIdenticalTo(*PM.MD->getMacroInfo(), PP,
310                                               /*Syntactically*/ false);
311     });
312   }
313 
314   void EndOfMainFile() override {
315     for (auto &PM : PendingMacros) {
316       // `isUsedForHeaderGuard` is only set when the preprocessor leaves the
317       // file so check for it here.
318       if (PM.MD->getMacroInfo()->isUsedForHeaderGuard())
319         continue;
320 
321       if (!shouldMacroBeIncluded(PM))
322         continue;
323 
324       StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName();
325       PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation());
326       StringRef USR =
327           API.recordUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM);
328 
329       API.addMacroDefinition(
330           Name, USR, Loc,
331           DeclarationFragmentsBuilder::getFragmentsForMacro(Name, PM.MD),
332           DeclarationFragmentsBuilder::getSubHeadingForMacro(Name),
333           SM.isInSystemHeader(PM.MacroNameToken.getLocation()));
334     }
335 
336     PendingMacros.clear();
337   }
338 
339 protected:
340   struct PendingMacro {
341     Token MacroNameToken;
342     const MacroDirective *MD;
343 
344     PendingMacro(const Token &MacroNameToken, const MacroDirective *MD)
345         : MacroNameToken(MacroNameToken), MD(MD) {}
346   };
347 
348   virtual bool shouldMacroBeIncluded(const PendingMacro &PM) { return true; }
349 
350   const SourceManager &SM;
351   APISet &API;
352   Preprocessor &PP;
353   llvm::SmallVector<PendingMacro> PendingMacros;
354 };
355 
356 class APIMacroCallback : public MacroCallback {
357 public:
358   APIMacroCallback(const SourceManager &SM, APISet &API, Preprocessor &PP,
359                    LocationFileChecker &LCF)
360       : MacroCallback(SM, API, PP), LCF(LCF) {}
361 
362   bool shouldMacroBeIncluded(const PendingMacro &PM) override {
363     // Do not include macros from external files
364     return LCF(PM.MacroNameToken.getLocation());
365   }
366 
367 private:
368   LocationFileChecker &LCF;
369 };
370 
371 } // namespace
372 
373 void ExtractAPIActionBase::ImplEndSourceFileAction() {
374   if (!OS)
375     return;
376 
377   // Setup a SymbolGraphSerializer to write out collected API information in
378   // the Symbol Graph format.
379   // FIXME: Make the kind of APISerializer configurable.
380   SymbolGraphSerializer SGSerializer(*API, IgnoresList);
381   SGSerializer.serialize(*OS);
382   OS.reset();
383 }
384 
385 std::unique_ptr<raw_pwrite_stream>
386 ExtractAPIAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile) {
387   std::unique_ptr<raw_pwrite_stream> OS;
388   OS = CI.createDefaultOutputFile(/*Binary=*/false, InFile,
389                                   /*Extension=*/"json",
390                                   /*RemoveFileOnSignal=*/false);
391   if (!OS)
392     return nullptr;
393   return OS;
394 }
395 
396 std::unique_ptr<ASTConsumer>
397 ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
398   OS = CreateOutputFile(CI, InFile);
399 
400   if (!OS)
401     return nullptr;
402 
403   auto ProductName = CI.getFrontendOpts().ProductName;
404 
405   // Now that we have enough information about the language options and the
406   // target triple, let's create the APISet before anyone uses it.
407   API = std::make_unique<APISet>(
408       CI.getTarget().getTriple(),
409       CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName);
410 
411   auto LCF = std::make_unique<LocationFileChecker>(CI, KnownInputFiles);
412 
413   CI.getPreprocessor().addPPCallbacks(std::make_unique<APIMacroCallback>(
414       CI.getSourceManager(), *API, CI.getPreprocessor(), *LCF));
415 
416   // Do not include location in anonymous decls.
417   PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy();
418   Policy.AnonymousTagLocations = false;
419   CI.getASTContext().setPrintingPolicy(Policy);
420 
421   if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) {
422     llvm::handleAllErrors(
423         APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList,
424                                CI.getFileManager())
425             .moveInto(IgnoresList),
426         [&CI](const IgnoresFileNotFound &Err) {
427           CI.getDiagnostics().Report(
428               diag::err_extract_api_ignores_file_not_found)
429               << Err.Path;
430         });
431   }
432 
433   return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(),
434                                               std::move(LCF), *API);
435 }
436 
437 bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) {
438   auto &Inputs = CI.getFrontendOpts().Inputs;
439   if (Inputs.empty())
440     return true;
441 
442   if (!CI.hasFileManager())
443     if (!CI.createFileManager())
444       return false;
445 
446   auto Kind = Inputs[0].getKind();
447 
448   // Convert the header file inputs into a single input buffer.
449   SmallString<256> HeaderContents;
450   bool IsQuoted = false;
451   for (const FrontendInputFile &FIF : Inputs) {
452     if (Kind.isObjectiveC())
453       HeaderContents += "#import";
454     else
455       HeaderContents += "#include";
456 
457     StringRef FilePath = FIF.getFile();
458     if (auto RelativeName = getRelativeIncludeName(CI, FilePath, &IsQuoted)) {
459       if (IsQuoted)
460         HeaderContents += " \"";
461       else
462         HeaderContents += " <";
463 
464       HeaderContents += *RelativeName;
465 
466       if (IsQuoted)
467         HeaderContents += "\"\n";
468       else
469         HeaderContents += ">\n";
470       KnownInputFiles.emplace_back(static_cast<SmallString<32>>(*RelativeName),
471                                    IsQuoted);
472     } else {
473       HeaderContents += " \"";
474       HeaderContents += FilePath;
475       HeaderContents += "\"\n";
476       KnownInputFiles.emplace_back(FilePath, true);
477     }
478   }
479 
480   if (CI.getHeaderSearchOpts().Verbose)
481     CI.getVerboseOutputStream() << getInputBufferName() << ":\n"
482                                 << HeaderContents << "\n";
483 
484   Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents,
485                                                 getInputBufferName());
486 
487   // Set that buffer up as our "real" input in the CompilerInstance.
488   Inputs.clear();
489   Inputs.emplace_back(Buffer->getMemBufferRef(), Kind, /*IsSystem*/ false);
490 
491   return true;
492 }
493 
494 void ExtractAPIAction::EndSourceFileAction() { ImplEndSourceFileAction(); }
495 
496 std::unique_ptr<ASTConsumer>
497 WrappingExtractAPIAction::CreateASTConsumer(CompilerInstance &CI,
498                                             StringRef InFile) {
499   auto OtherConsumer = WrapperFrontendAction::CreateASTConsumer(CI, InFile);
500   if (!OtherConsumer)
501     return nullptr;
502 
503   CreatedASTConsumer = true;
504 
505   OS = CreateOutputFile(CI, InFile);
506   if (!OS)
507     return nullptr;
508 
509   auto ProductName = CI.getFrontendOpts().ProductName;
510 
511   // Now that we have enough information about the language options and the
512   // target triple, let's create the APISet before anyone uses it.
513   API = std::make_unique<APISet>(
514       CI.getTarget().getTriple(),
515       CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName);
516 
517   CI.getPreprocessor().addPPCallbacks(std::make_unique<MacroCallback>(
518       CI.getSourceManager(), *API, CI.getPreprocessor()));
519 
520   // Do not include location in anonymous decls.
521   PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy();
522   Policy.AnonymousTagLocations = false;
523   CI.getASTContext().setPrintingPolicy(Policy);
524 
525   if (!CI.getFrontendOpts().ExtractAPIIgnoresFileList.empty()) {
526     llvm::handleAllErrors(
527         APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFileList,
528                                CI.getFileManager())
529             .moveInto(IgnoresList),
530         [&CI](const IgnoresFileNotFound &Err) {
531           CI.getDiagnostics().Report(
532               diag::err_extract_api_ignores_file_not_found)
533               << Err.Path;
534         });
535   }
536 
537   auto WrappingConsumer =
538       std::make_unique<WrappingExtractAPIConsumer>(CI.getASTContext(), *API);
539   std::vector<std::unique_ptr<ASTConsumer>> Consumers;
540   Consumers.push_back(std::move(OtherConsumer));
541   Consumers.push_back(std::move(WrappingConsumer));
542 
543   return std::make_unique<MultiplexConsumer>(std::move(Consumers));
544 }
545 
546 void WrappingExtractAPIAction::EndSourceFileAction() {
547   // Invoke wrapped action's method.
548   WrapperFrontendAction::EndSourceFileAction();
549 
550   if (CreatedASTConsumer) {
551     ImplEndSourceFileAction();
552   }
553 }
554 
555 std::unique_ptr<raw_pwrite_stream>
556 WrappingExtractAPIAction::CreateOutputFile(CompilerInstance &CI,
557                                            StringRef InFile) {
558   std::unique_ptr<raw_pwrite_stream> OS;
559   std::string OutputDir = CI.getFrontendOpts().SymbolGraphOutputDir;
560 
561   // The symbol graphs need to be generated as a side effect of regular
562   // compilation so the output should be dumped in the directory provided with
563   // the command line option.
564   llvm::SmallString<128> OutFilePath(OutputDir);
565   auto Seperator = llvm::sys::path::get_separator();
566   auto Infilename = llvm::sys::path::filename(InFile);
567   OutFilePath.append({Seperator, Infilename});
568   llvm::sys::path::replace_extension(OutFilePath, "json");
569   // StringRef outputFilePathref = *OutFilePath;
570 
571   // don't use the default output file
572   OS = CI.createOutputFile(/*OutputPath=*/OutFilePath, /*Binary=*/false,
573                            /*RemoveFileOnSignal=*/true,
574                            /*UseTemporary=*/true,
575                            /*CreateMissingDirectories=*/true);
576   if (!OS)
577     return nullptr;
578   return OS;
579 }
580