1 //===- ExtractAPI/ExtractAPIConsumer.cpp ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the ExtractAPIAction, and ASTConsumer to collect API
11 /// information.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "clang/AST/ASTConsumer.h"
16 #include "clang/AST/ASTContext.h"
17 #include "clang/Basic/DiagnosticFrontend.h"
18 #include "clang/Basic/SourceLocation.h"
19 #include "clang/Basic/SourceManager.h"
20 #include "clang/Basic/TargetInfo.h"
21 #include "clang/ExtractAPI/API.h"
22 #include "clang/ExtractAPI/APIIgnoresList.h"
23 #include "clang/ExtractAPI/ExtractAPIVisitor.h"
24 #include "clang/ExtractAPI/FrontendActions.h"
25 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h"
26 #include "clang/Frontend/ASTConsumers.h"
27 #include "clang/Frontend/CompilerInstance.h"
28 #include "clang/Frontend/FrontendOptions.h"
29 #include "clang/Lex/MacroInfo.h"
30 #include "clang/Lex/PPCallbacks.h"
31 #include "clang/Lex/Preprocessor.h"
32 #include "clang/Lex/PreprocessorOptions.h"
33 #include "llvm/ADT/DenseSet.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/Support/Error.h"
37 #include "llvm/Support/FileSystem.h"
38 #include "llvm/Support/MemoryBuffer.h"
39 #include "llvm/Support/Path.h"
40 #include "llvm/Support/Regex.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include <memory>
43 #include <optional>
44 #include <utility>
45
46 using namespace clang;
47 using namespace extractapi;
48
49 namespace {
50
getRelativeIncludeName(const CompilerInstance & CI,StringRef File,bool * IsQuoted=nullptr)51 std::optional<std::string> getRelativeIncludeName(const CompilerInstance &CI,
52 StringRef File,
53 bool *IsQuoted = nullptr) {
54 assert(CI.hasFileManager() &&
55 "CompilerInstance does not have a FileNamager!");
56
57 using namespace llvm::sys;
58 // Matches framework include patterns
59 const llvm::Regex Rule("/(.+)\\.framework/(.+)?Headers/(.+)");
60
61 const auto &FS = CI.getVirtualFileSystem();
62
63 SmallString<128> FilePath(File.begin(), File.end());
64 FS.makeAbsolute(FilePath);
65 path::remove_dots(FilePath, true);
66 FilePath = path::convert_to_slash(FilePath);
67 File = FilePath;
68
69 // Checks whether `Dir` is a strict path prefix of `File`. If so returns
70 // the prefix length. Otherwise return 0.
71 auto CheckDir = [&](llvm::StringRef Dir) -> unsigned {
72 llvm::SmallString<32> DirPath(Dir.begin(), Dir.end());
73 FS.makeAbsolute(DirPath);
74 path::remove_dots(DirPath, true);
75 Dir = DirPath;
76 for (auto NI = path::begin(File), NE = path::end(File),
77 DI = path::begin(Dir), DE = path::end(Dir);
78 /*termination condition in loop*/; ++NI, ++DI) {
79 // '.' components in File are ignored.
80 while (NI != NE && *NI == ".")
81 ++NI;
82 if (NI == NE)
83 break;
84
85 // '.' components in Dir are ignored.
86 while (DI != DE && *DI == ".")
87 ++DI;
88
89 // Dir is a prefix of File, up to '.' components and choice of path
90 // separators.
91 if (DI == DE)
92 return NI - path::begin(File);
93
94 // Consider all path separators equal.
95 if (NI->size() == 1 && DI->size() == 1 &&
96 path::is_separator(NI->front()) && path::is_separator(DI->front()))
97 continue;
98
99 // Special case Apple .sdk folders since the search path is typically a
100 // symlink like `iPhoneSimulator14.5.sdk` while the file is instead
101 // located in `iPhoneSimulator.sdk` (the real folder).
102 if (NI->endswith(".sdk") && DI->endswith(".sdk")) {
103 StringRef NBasename = path::stem(*NI);
104 StringRef DBasename = path::stem(*DI);
105 if (DBasename.startswith(NBasename))
106 continue;
107 }
108
109 if (*NI != *DI)
110 break;
111 }
112 return 0;
113 };
114
115 unsigned PrefixLength = 0;
116
117 // Go through the search paths and find the first one that is a prefix of
118 // the header.
119 for (const auto &Entry : CI.getHeaderSearchOpts().UserEntries) {
120 // Note whether the match is found in a quoted entry.
121 if (IsQuoted)
122 *IsQuoted = Entry.Group == frontend::Quoted;
123
124 if (auto EntryFile = CI.getFileManager().getOptionalFileRef(Entry.Path)) {
125 if (auto HMap = HeaderMap::Create(*EntryFile, CI.getFileManager())) {
126 // If this is a headermap entry, try to reverse lookup the full path
127 // for a spelled name before mapping.
128 StringRef SpelledFilename = HMap->reverseLookupFilename(File);
129 if (!SpelledFilename.empty())
130 return SpelledFilename.str();
131
132 // No matching mapping in this headermap, try next search entry.
133 continue;
134 }
135 }
136
137 // Entry is a directory search entry, try to check if it's a prefix of File.
138 PrefixLength = CheckDir(Entry.Path);
139 if (PrefixLength > 0) {
140 // The header is found in a framework path, construct the framework-style
141 // include name `<Framework/Header.h>`
142 if (Entry.IsFramework) {
143 SmallVector<StringRef, 4> Matches;
144 Rule.match(File, &Matches);
145 // Returned matches are always in stable order.
146 if (Matches.size() != 4)
147 return std::nullopt;
148
149 return path::convert_to_slash(
150 (Matches[1].drop_front(Matches[1].rfind('/') + 1) + "/" +
151 Matches[3])
152 .str());
153 }
154
155 // The header is found in a normal search path, strip the search path
156 // prefix to get an include name.
157 return path::convert_to_slash(File.drop_front(PrefixLength));
158 }
159 }
160
161 // Couldn't determine a include name, use full path instead.
162 return std::nullopt;
163 }
164
165 struct LocationFileChecker {
operator ()__anon1533bdf60111::LocationFileChecker166 bool operator()(SourceLocation Loc) {
167 // If the loc refers to a macro expansion we need to first get the file
168 // location of the expansion.
169 auto &SM = CI.getSourceManager();
170 auto FileLoc = SM.getFileLoc(Loc);
171 FileID FID = SM.getFileID(FileLoc);
172 if (FID.isInvalid())
173 return false;
174
175 const auto *File = SM.getFileEntryForID(FID);
176 if (!File)
177 return false;
178
179 if (KnownFileEntries.count(File))
180 return true;
181
182 if (ExternalFileEntries.count(File))
183 return false;
184
185 StringRef FileName = File->tryGetRealPathName().empty()
186 ? File->getName()
187 : File->tryGetRealPathName();
188
189 // Try to reduce the include name the same way we tried to include it.
190 bool IsQuoted = false;
191 if (auto IncludeName = getRelativeIncludeName(CI, FileName, &IsQuoted))
192 if (llvm::any_of(KnownFiles,
193 [&IsQuoted, &IncludeName](const auto &KnownFile) {
194 return KnownFile.first.equals(*IncludeName) &&
195 KnownFile.second == IsQuoted;
196 })) {
197 KnownFileEntries.insert(File);
198 return true;
199 }
200
201 // Record that the file was not found to avoid future reverse lookup for
202 // the same file.
203 ExternalFileEntries.insert(File);
204 return false;
205 }
206
LocationFileChecker__anon1533bdf60111::LocationFileChecker207 LocationFileChecker(const CompilerInstance &CI,
208 SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles)
209 : CI(CI), KnownFiles(KnownFiles), ExternalFileEntries() {
210 for (const auto &KnownFile : KnownFiles)
211 if (auto FileEntry = CI.getFileManager().getFile(KnownFile.first))
212 KnownFileEntries.insert(*FileEntry);
213 }
214
215 private:
216 const CompilerInstance &CI;
217 SmallVector<std::pair<SmallString<32>, bool>> &KnownFiles;
218 llvm::DenseSet<const FileEntry *> KnownFileEntries;
219 llvm::DenseSet<const FileEntry *> ExternalFileEntries;
220 };
221
222 class ExtractAPIConsumer : public ASTConsumer {
223 public:
ExtractAPIConsumer(ASTContext & Context,std::unique_ptr<LocationFileChecker> LCF,APISet & API)224 ExtractAPIConsumer(ASTContext &Context,
225 std::unique_ptr<LocationFileChecker> LCF, APISet &API)
226 : Visitor(Context, *LCF, API), LCF(std::move(LCF)) {}
227
HandleTranslationUnit(ASTContext & Context)228 void HandleTranslationUnit(ASTContext &Context) override {
229 // Use ExtractAPIVisitor to traverse symbol declarations in the context.
230 Visitor.TraverseDecl(Context.getTranslationUnitDecl());
231 }
232
233 private:
234 ExtractAPIVisitor Visitor;
235 std::unique_ptr<LocationFileChecker> LCF;
236 };
237
238 class MacroCallback : public PPCallbacks {
239 public:
MacroCallback(const SourceManager & SM,LocationFileChecker & LCF,APISet & API,Preprocessor & PP)240 MacroCallback(const SourceManager &SM, LocationFileChecker &LCF, APISet &API,
241 Preprocessor &PP)
242 : SM(SM), LCF(LCF), API(API), PP(PP) {}
243
MacroDefined(const Token & MacroNameToken,const MacroDirective * MD)244 void MacroDefined(const Token &MacroNameToken,
245 const MacroDirective *MD) override {
246 auto *MacroInfo = MD->getMacroInfo();
247
248 if (MacroInfo->isBuiltinMacro())
249 return;
250
251 auto SourceLoc = MacroNameToken.getLocation();
252 if (SM.isWrittenInBuiltinFile(SourceLoc) ||
253 SM.isWrittenInCommandLineFile(SourceLoc))
254 return;
255
256 PendingMacros.emplace_back(MacroNameToken, MD);
257 }
258
259 // If a macro gets undefined at some point during preprocessing of the inputs
260 // it means that it isn't an exposed API and we should therefore not add a
261 // macro definition for it.
MacroUndefined(const Token & MacroNameToken,const MacroDefinition & MD,const MacroDirective * Undef)262 void MacroUndefined(const Token &MacroNameToken, const MacroDefinition &MD,
263 const MacroDirective *Undef) override {
264 // If this macro wasn't previously defined we don't need to do anything
265 // here.
266 if (!Undef)
267 return;
268
269 llvm::erase_if(PendingMacros, [&MD, this](const PendingMacro &PM) {
270 return MD.getMacroInfo()->isIdenticalTo(*PM.MD->getMacroInfo(), PP,
271 /*Syntactically*/ false);
272 });
273 }
274
EndOfMainFile()275 void EndOfMainFile() override {
276 for (auto &PM : PendingMacros) {
277 // `isUsedForHeaderGuard` is only set when the preprocessor leaves the
278 // file so check for it here.
279 if (PM.MD->getMacroInfo()->isUsedForHeaderGuard())
280 continue;
281
282 if (!LCF(PM.MacroNameToken.getLocation()))
283 continue;
284
285 StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName();
286 PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation());
287 StringRef USR =
288 API.recordUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM);
289
290 API.addMacroDefinition(
291 Name, USR, Loc,
292 DeclarationFragmentsBuilder::getFragmentsForMacro(Name, PM.MD),
293 DeclarationFragmentsBuilder::getSubHeadingForMacro(Name),
294 SM.isInSystemHeader(PM.MacroNameToken.getLocation()));
295 }
296
297 PendingMacros.clear();
298 }
299
300 private:
301 struct PendingMacro {
302 Token MacroNameToken;
303 const MacroDirective *MD;
304
PendingMacro__anon1533bdf60111::MacroCallback::PendingMacro305 PendingMacro(const Token &MacroNameToken, const MacroDirective *MD)
306 : MacroNameToken(MacroNameToken), MD(MD) {}
307 };
308
309 const SourceManager &SM;
310 LocationFileChecker &LCF;
311 APISet &API;
312 Preprocessor &PP;
313 llvm::SmallVector<PendingMacro> PendingMacros;
314 };
315
316 } // namespace
317
318 std::unique_ptr<ASTConsumer>
CreateASTConsumer(CompilerInstance & CI,StringRef InFile)319 ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
320 OS = CreateOutputFile(CI, InFile);
321 if (!OS)
322 return nullptr;
323
324 auto ProductName = CI.getFrontendOpts().ProductName;
325
326 // Now that we have enough information about the language options and the
327 // target triple, let's create the APISet before anyone uses it.
328 API = std::make_unique<APISet>(
329 CI.getTarget().getTriple(),
330 CI.getFrontendOpts().Inputs.back().getKind().getLanguage(), ProductName);
331
332 auto LCF = std::make_unique<LocationFileChecker>(CI, KnownInputFiles);
333
334 CI.getPreprocessor().addPPCallbacks(std::make_unique<MacroCallback>(
335 CI.getSourceManager(), *LCF, *API, CI.getPreprocessor()));
336
337 // Do not include location in anonymous decls.
338 PrintingPolicy Policy = CI.getASTContext().getPrintingPolicy();
339 Policy.AnonymousTagLocations = false;
340 CI.getASTContext().setPrintingPolicy(Policy);
341
342 if (!CI.getFrontendOpts().ExtractAPIIgnoresFile.empty()) {
343 llvm::handleAllErrors(
344 APIIgnoresList::create(CI.getFrontendOpts().ExtractAPIIgnoresFile,
345 CI.getFileManager())
346 .moveInto(IgnoresList),
347 [&CI](const IgnoresFileNotFound &Err) {
348 CI.getDiagnostics().Report(
349 diag::err_extract_api_ignores_file_not_found)
350 << Err.Path;
351 });
352 }
353
354 return std::make_unique<ExtractAPIConsumer>(CI.getASTContext(),
355 std::move(LCF), *API);
356 }
357
PrepareToExecuteAction(CompilerInstance & CI)358 bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) {
359 auto &Inputs = CI.getFrontendOpts().Inputs;
360 if (Inputs.empty())
361 return true;
362
363 if (!CI.hasFileManager())
364 if (!CI.createFileManager())
365 return false;
366
367 auto Kind = Inputs[0].getKind();
368
369 // Convert the header file inputs into a single input buffer.
370 SmallString<256> HeaderContents;
371 bool IsQuoted = false;
372 for (const FrontendInputFile &FIF : Inputs) {
373 if (Kind.isObjectiveC())
374 HeaderContents += "#import";
375 else
376 HeaderContents += "#include";
377
378 StringRef FilePath = FIF.getFile();
379 if (auto RelativeName = getRelativeIncludeName(CI, FilePath, &IsQuoted)) {
380 if (IsQuoted)
381 HeaderContents += " \"";
382 else
383 HeaderContents += " <";
384
385 HeaderContents += *RelativeName;
386
387 if (IsQuoted)
388 HeaderContents += "\"\n";
389 else
390 HeaderContents += ">\n";
391 KnownInputFiles.emplace_back(static_cast<SmallString<32>>(*RelativeName),
392 IsQuoted);
393 } else {
394 HeaderContents += " \"";
395 HeaderContents += FilePath;
396 HeaderContents += "\"\n";
397 KnownInputFiles.emplace_back(FilePath, true);
398 }
399 }
400
401 if (CI.getHeaderSearchOpts().Verbose)
402 CI.getVerboseOutputStream() << getInputBufferName() << ":\n"
403 << HeaderContents << "\n";
404
405 Buffer = llvm::MemoryBuffer::getMemBufferCopy(HeaderContents,
406 getInputBufferName());
407
408 // Set that buffer up as our "real" input in the CompilerInstance.
409 Inputs.clear();
410 Inputs.emplace_back(Buffer->getMemBufferRef(), Kind, /*IsSystem*/ false);
411
412 return true;
413 }
414
EndSourceFileAction()415 void ExtractAPIAction::EndSourceFileAction() {
416 if (!OS)
417 return;
418
419 // Setup a SymbolGraphSerializer to write out collected API information in
420 // the Symbol Graph format.
421 // FIXME: Make the kind of APISerializer configurable.
422 SymbolGraphSerializer SGSerializer(*API, IgnoresList);
423 SGSerializer.serialize(*OS);
424 OS.reset();
425 }
426
427 std::unique_ptr<raw_pwrite_stream>
CreateOutputFile(CompilerInstance & CI,StringRef InFile)428 ExtractAPIAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile) {
429 std::unique_ptr<raw_pwrite_stream> OS =
430 CI.createDefaultOutputFile(/*Binary=*/false, InFile, /*Extension=*/"json",
431 /*RemoveFileOnSignal=*/false);
432 if (!OS)
433 return nullptr;
434 return OS;
435 }
436