xref: /llvm-project/clang-tools-extra/clangd/Headers.cpp (revision 1f90797f6a9d91d61e0f66b465b0467e4c66d0e0)
1 //===--- Headers.cpp - Include headers ---------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Headers.h"
10 #include "Preamble.h"
11 #include "SourceCode.h"
12 #include "support/Logger.h"
13 #include "clang/Basic/SourceLocation.h"
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Frontend/CompilerInstance.h"
16 #include "clang/Lex/DirectoryLookup.h"
17 #include "clang/Lex/HeaderSearch.h"
18 #include "clang/Lex/PPCallbacks.h"
19 #include "clang/Lex/Preprocessor.h"
20 #include "clang/Tooling/Inclusions/HeaderAnalysis.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Support/Path.h"
24 #include <cstring>
25 #include <optional>
26 #include <string>
27 
28 namespace clang {
29 namespace clangd {
30 
31 class IncludeStructure::RecordHeaders : public PPCallbacks {
32 public:
33   RecordHeaders(const CompilerInstance &CI, IncludeStructure *Out)
34       : SM(CI.getSourceManager()), Out(Out) {}
35 
36   // Record existing #includes - both written and resolved paths. Only #includes
37   // in the main file are collected.
38   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
39                           llvm::StringRef FileName, bool IsAngled,
40                           CharSourceRange /*FilenameRange*/,
41                           OptionalFileEntryRef File,
42                           llvm::StringRef /*SearchPath*/,
43                           llvm::StringRef /*RelativePath*/,
44                           const clang::Module * /*SuggestedModule*/,
45                           bool /*ModuleImported*/,
46                           SrcMgr::CharacteristicKind FileKind) override {
47     auto MainFID = SM.getMainFileID();
48     // If an include is part of the preamble patch, translate #line directives.
49     if (InBuiltinFile)
50       HashLoc = translatePreamblePatchLocation(HashLoc, SM);
51 
52     // Record main-file inclusions (including those mapped from the preamble
53     // patch).
54     if (isInsideMainFile(HashLoc, SM)) {
55       Out->MainFileIncludes.emplace_back();
56       auto &Inc = Out->MainFileIncludes.back();
57       Inc.Written =
58           (IsAngled ? "<" + FileName + ">" : "\"" + FileName + "\"").str();
59       Inc.Resolved = std::string(
60           File ? getCanonicalPath(*File, SM.getFileManager()).value_or("")
61                : "");
62       Inc.HashOffset = SM.getFileOffset(HashLoc);
63       Inc.HashLine =
64           SM.getLineNumber(SM.getFileID(HashLoc), Inc.HashOffset) - 1;
65       Inc.FileKind = FileKind;
66       Inc.Directive = IncludeTok.getIdentifierInfo()->getPPKeywordID();
67       if (File) {
68         IncludeStructure::HeaderID HID = Out->getOrCreateID(*File);
69         Inc.HeaderID = static_cast<unsigned>(HID);
70         if (IsAngled)
71           if (auto StdlibHeader = tooling::stdlib::Header::named(Inc.Written)) {
72             auto &IDs = Out->StdlibHeaders[*StdlibHeader];
73             // Few physical files for one stdlib header name, linear scan is ok.
74             if (!llvm::is_contained(IDs, HID))
75               IDs.push_back(HID);
76           }
77       }
78       Out->MainFileIncludesBySpelling[Inc.Written].push_back(
79           Out->MainFileIncludes.size() - 1);
80     }
81 
82     // Record include graph (not just for main-file includes)
83     if (File) {
84       auto IncludingFileEntry = SM.getFileEntryRefForID(SM.getFileID(HashLoc));
85       if (!IncludingFileEntry) {
86         assert(SM.getBufferName(HashLoc).starts_with("<") &&
87                "Expected #include location to be a file or <built-in>");
88         // Treat as if included from the main file.
89         IncludingFileEntry = SM.getFileEntryRefForID(MainFID);
90       }
91       auto IncludingID = Out->getOrCreateID(*IncludingFileEntry),
92            IncludedID = Out->getOrCreateID(*File);
93       Out->IncludeChildren[IncludingID].push_back(IncludedID);
94     }
95   }
96 
97   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
98                    SrcMgr::CharacteristicKind FileType,
99                    FileID PrevFID) override {
100     switch (Reason) {
101     case PPCallbacks::EnterFile:
102       ++Level;
103       if (BuiltinFile.isInvalid() && SM.isWrittenInBuiltinFile(Loc)) {
104         BuiltinFile = SM.getFileID(Loc);
105         InBuiltinFile = true;
106       }
107       break;
108     case PPCallbacks::ExitFile: {
109       --Level;
110       if (PrevFID == BuiltinFile)
111         InBuiltinFile = false;
112       break;
113     }
114     case PPCallbacks::RenameFile:
115     case PPCallbacks::SystemHeaderPragma:
116       break;
117     }
118   }
119 
120 private:
121   // Keeps track of include depth for the current file. It's 1 for main file.
122   int Level = 0;
123   bool inMainFile() const { return Level == 1; }
124 
125   const SourceManager &SM;
126   // Set after entering the <built-in> file.
127   FileID BuiltinFile;
128   // Indicates whether <built-in> file is part of include stack.
129   bool InBuiltinFile = false;
130 
131   IncludeStructure *Out;
132 };
133 
134 bool isLiteralInclude(llvm::StringRef Include) {
135   return Include.starts_with("<") || Include.starts_with("\"");
136 }
137 
138 bool HeaderFile::valid() const {
139   return (Verbatim && isLiteralInclude(File)) ||
140          (!Verbatim && llvm::sys::path::is_absolute(File));
141 }
142 
143 llvm::Expected<HeaderFile> toHeaderFile(llvm::StringRef Header,
144                                         llvm::StringRef HintPath) {
145   if (isLiteralInclude(Header))
146     return HeaderFile{Header.str(), /*Verbatim=*/true};
147   auto U = URI::parse(Header);
148   if (!U)
149     return U.takeError();
150 
151   auto IncludePath = URI::includeSpelling(*U);
152   if (!IncludePath)
153     return IncludePath.takeError();
154   if (!IncludePath->empty())
155     return HeaderFile{std::move(*IncludePath), /*Verbatim=*/true};
156 
157   auto Resolved = URI::resolve(*U, HintPath);
158   if (!Resolved)
159     return Resolved.takeError();
160   return HeaderFile{std::move(*Resolved), /*Verbatim=*/false};
161 }
162 
163 llvm::SmallVector<SymbolInclude, 1> getRankedIncludes(const Symbol &Sym) {
164   auto Includes = Sym.IncludeHeaders;
165   // Sort in descending order by reference count and header length.
166   llvm::sort(Includes, [](const Symbol::IncludeHeaderWithReferences &LHS,
167                           const Symbol::IncludeHeaderWithReferences &RHS) {
168     if (LHS.References == RHS.References)
169       return LHS.IncludeHeader.size() < RHS.IncludeHeader.size();
170     return LHS.References > RHS.References;
171   });
172   llvm::SmallVector<SymbolInclude, 1> Headers;
173   for (const auto &Include : Includes)
174     Headers.push_back({Include.IncludeHeader, Include.supportedDirectives()});
175   return Headers;
176 }
177 
178 void IncludeStructure::collect(const CompilerInstance &CI) {
179   auto &SM = CI.getSourceManager();
180   MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
181   auto Collector = std::make_unique<RecordHeaders>(CI, this);
182   CI.getPreprocessor().addPPCallbacks(std::move(Collector));
183 
184   // If we're reusing a preamble, don't repopulate SearchPathsCanonical.
185   // The entries will be the same, but canonicalizing to find out is expensive!
186   if (SearchPathsCanonical.empty()) {
187     for (const auto &Dir :
188          CI.getPreprocessor().getHeaderSearchInfo().search_dir_range()) {
189       if (Dir.getLookupType() == DirectoryLookup::LT_NormalDir)
190         SearchPathsCanonical.emplace_back(
191             SM.getFileManager().getCanonicalName(*Dir.getDirRef()));
192     }
193   }
194 }
195 
196 std::optional<IncludeStructure::HeaderID>
197 IncludeStructure::getID(const FileEntry *Entry) const {
198   // HeaderID of the main file is always 0;
199   if (Entry == MainFileEntry) {
200     return static_cast<IncludeStructure::HeaderID>(0u);
201   }
202   auto It = UIDToIndex.find(Entry->getUniqueID());
203   if (It == UIDToIndex.end())
204     return std::nullopt;
205   return It->second;
206 }
207 
208 IncludeStructure::HeaderID IncludeStructure::getOrCreateID(FileEntryRef Entry) {
209   // Main file's FileEntry was not known at IncludeStructure creation time.
210   if (&Entry.getFileEntry() == MainFileEntry) {
211     if (RealPathNames.front().empty())
212       RealPathNames.front() = MainFileEntry->tryGetRealPathName().str();
213     return MainFileID;
214   }
215   auto R = UIDToIndex.try_emplace(
216       Entry.getUniqueID(),
217       static_cast<IncludeStructure::HeaderID>(RealPathNames.size()));
218   if (R.second)
219     RealPathNames.emplace_back();
220   IncludeStructure::HeaderID Result = R.first->getSecond();
221   std::string &RealPathName = RealPathNames[static_cast<unsigned>(Result)];
222   if (RealPathName.empty())
223     RealPathName = Entry.getFileEntry().tryGetRealPathName().str();
224   return Result;
225 }
226 
227 llvm::DenseMap<IncludeStructure::HeaderID, unsigned>
228 IncludeStructure::includeDepth(HeaderID Root) const {
229   // Include depth 0 is the main file only.
230   llvm::DenseMap<HeaderID, unsigned> Result;
231   assert(static_cast<unsigned>(Root) < RealPathNames.size());
232   Result[Root] = 0;
233   std::vector<IncludeStructure::HeaderID> CurrentLevel;
234   CurrentLevel.push_back(Root);
235   llvm::DenseSet<IncludeStructure::HeaderID> Seen;
236   Seen.insert(Root);
237 
238   // Each round of BFS traversal finds the next depth level.
239   std::vector<IncludeStructure::HeaderID> PreviousLevel;
240   for (unsigned Level = 1; !CurrentLevel.empty(); ++Level) {
241     PreviousLevel.clear();
242     PreviousLevel.swap(CurrentLevel);
243     for (const auto &Parent : PreviousLevel) {
244       for (const auto &Child : IncludeChildren.lookup(Parent)) {
245         if (Seen.insert(Child).second) {
246           CurrentLevel.push_back(Child);
247           Result[Child] = Level;
248         }
249       }
250     }
251   }
252   return Result;
253 }
254 
255 llvm::SmallVector<const Inclusion *>
256 IncludeStructure::mainFileIncludesWithSpelling(llvm::StringRef Spelling) const {
257   llvm::SmallVector<const Inclusion *> Includes;
258   for (auto Idx : MainFileIncludesBySpelling.lookup(Spelling))
259     Includes.push_back(&MainFileIncludes[Idx]);
260   return Includes;
261 }
262 
263 void IncludeInserter::addExisting(const Inclusion &Inc) {
264   IncludedHeaders.insert(Inc.Written);
265   if (!Inc.Resolved.empty())
266     IncludedHeaders.insert(Inc.Resolved);
267 }
268 
269 /// FIXME(ioeric): we might not want to insert an absolute include path if the
270 /// path is not shortened.
271 bool IncludeInserter::shouldInsertInclude(
272     PathRef DeclaringHeader, const HeaderFile &InsertedHeader) const {
273   assert(InsertedHeader.valid());
274   if (!HeaderSearchInfo && !InsertedHeader.Verbatim)
275     return false;
276   if (FileName == DeclaringHeader || FileName == InsertedHeader.File)
277     return false;
278   auto Included = [&](llvm::StringRef Header) {
279     return IncludedHeaders.contains(Header);
280   };
281   return !Included(DeclaringHeader) && !Included(InsertedHeader.File);
282 }
283 
284 std::optional<std::string>
285 IncludeInserter::calculateIncludePath(const HeaderFile &InsertedHeader,
286                                       llvm::StringRef IncludingFile) const {
287   assert(InsertedHeader.valid());
288   if (InsertedHeader.Verbatim)
289     return InsertedHeader.File;
290   bool IsAngledByDefault = false;
291   std::string Suggested;
292   if (HeaderSearchInfo) {
293     Suggested = HeaderSearchInfo->suggestPathToFileForDiagnostics(
294         InsertedHeader.File, BuildDir, IncludingFile, &IsAngledByDefault);
295   } else {
296     // Calculate include relative to including file only.
297     StringRef IncludingDir = llvm::sys::path::parent_path(IncludingFile);
298     SmallString<256> RelFile(InsertedHeader.File);
299     // Replacing with "" leaves "/RelFile" if IncludingDir doesn't end in "/".
300     llvm::sys::path::replace_path_prefix(RelFile, IncludingDir, "./");
301     Suggested = llvm::sys::path::convert_to_slash(
302         llvm::sys::path::remove_leading_dotslash(RelFile));
303   }
304   // FIXME: should we allow (some limited number of) "../header.h"?
305   if (llvm::sys::path::is_absolute(Suggested))
306     return std::nullopt;
307   bool IsAngled = false;
308   for (auto Filter : AngledHeaders) {
309     if (Filter(Suggested)) {
310       IsAngled = true;
311       break;
312     }
313   }
314   bool IsQuoted = false;
315   for (auto Filter : QuotedHeaders) {
316     if (Filter(Suggested)) {
317       IsQuoted = true;
318       break;
319     }
320   }
321   // No filters apply, or both filters apply (a bug), use system default.
322   if (IsAngled == IsQuoted) {
323     // Probably a bug in the config regex.
324     if (IsAngled && IsQuoted) {
325       elog("Header '{0}' matches both quoted and angled regexes, default will "
326            "be used.",
327            Suggested);
328     }
329     IsAngled = IsAngledByDefault;
330   }
331   if (IsAngled)
332     Suggested = "<" + Suggested + ">";
333   else // if (IsQuoted)
334     Suggested = "\"" + Suggested + "\"";
335   return Suggested;
336 }
337 
338 std::optional<TextEdit>
339 IncludeInserter::insert(llvm::StringRef VerbatimHeader,
340                         tooling::IncludeDirective Directive) const {
341   std::optional<TextEdit> Edit;
342   if (auto Insertion =
343           Inserter.insert(VerbatimHeader.trim("\"<>"),
344                           VerbatimHeader.starts_with("<"), Directive))
345     Edit = replacementToEdit(Code, *Insertion);
346   return Edit;
347 }
348 
349 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Inclusion &Inc) {
350   return OS << Inc.Written << " = "
351             << (!Inc.Resolved.empty() ? Inc.Resolved : "[unresolved]")
352             << " at line" << Inc.HashLine;
353 }
354 
355 bool operator==(const Inclusion &LHS, const Inclusion &RHS) {
356   return std::tie(LHS.Directive, LHS.FileKind, LHS.HashOffset, LHS.HashLine,
357                   LHS.Resolved, LHS.Written) ==
358          std::tie(RHS.Directive, RHS.FileKind, RHS.HashOffset, RHS.HashLine,
359                   RHS.Resolved, RHS.Written);
360 }
361 
362 } // namespace clangd
363 } // namespace clang
364