xref: /llvm-project/clang-tools-extra/clangd/indexer/IndexerMain.cpp (revision 109bc024c8d741e57fa6bb5a028d8a4ed4e64a61)
1 //===--- IndexerMain.cpp -----------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // clangd-indexer is a tool to gather index data (symbols, xrefs) from source.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CompileCommands.h"
14 #include "Compiler.h"
15 #include "index/IndexAction.h"
16 #include "index/Merge.h"
17 #include "index/Ref.h"
18 #include "index/Serialization.h"
19 #include "index/Symbol.h"
20 #include "index/SymbolCollector.h"
21 #include "support/Logger.h"
22 #include "clang/Tooling/ArgumentsAdjusters.h"
23 #include "clang/Tooling/Execution.h"
24 #include "clang/Tooling/Tooling.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Signals.h"
27 #include <utility>
28 
29 namespace clang {
30 namespace clangd {
31 namespace {
32 
33 static llvm::cl::opt<IndexFileFormat>
34     Format("format", llvm::cl::desc("Format of the index to be written"),
35            llvm::cl::values(clEnumValN(IndexFileFormat::YAML, "yaml",
36                                        "human-readable YAML format"),
37                             clEnumValN(IndexFileFormat::RIFF, "binary",
38                                        "binary RIFF format")),
39            llvm::cl::init(IndexFileFormat::RIFF));
40 
41 static llvm::cl::list<std::string> QueryDriverGlobs{
42     "query-driver",
43     llvm::cl::desc(
44         "Comma separated list of globs for white-listing gcc-compatible "
45         "drivers that are safe to execute. Drivers matching any of these globs "
46         "will be used to extract system includes. e.g. "
47         "/usr/bin/**/clang-*,/path/to/repo/**/g++-*"),
48     llvm::cl::CommaSeparated,
49 };
50 
51 class IndexActionFactory : public tooling::FrontendActionFactory {
52 public:
IndexActionFactory(IndexFileIn & Result)53   IndexActionFactory(IndexFileIn &Result) : Result(Result) {}
54 
create()55   std::unique_ptr<FrontendAction> create() override {
56     SymbolCollector::Options Opts;
57     Opts.CountReferences = true;
58     Opts.FileFilter = [&](const SourceManager &SM, FileID FID) {
59       const auto F = SM.getFileEntryRefForID(FID);
60       if (!F)
61         return false; // Skip invalid files.
62       auto AbsPath = getCanonicalPath(*F, SM.getFileManager());
63       if (!AbsPath)
64         return false; // Skip files without absolute path.
65       std::lock_guard<std::mutex> Lock(FilesMu);
66       return Files.insert(*AbsPath).second; // Skip already processed files.
67     };
68     return createStaticIndexingAction(
69         Opts,
70         [&](SymbolSlab S) {
71           // Merge as we go.
72           std::lock_guard<std::mutex> Lock(SymbolsMu);
73           for (const auto &Sym : S) {
74             if (const auto *Existing = Symbols.find(Sym.ID))
75               Symbols.insert(mergeSymbol(*Existing, Sym));
76             else
77               Symbols.insert(Sym);
78           }
79         },
80         [&](RefSlab S) {
81           std::lock_guard<std::mutex> Lock(RefsMu);
82           for (const auto &Sym : S) {
83             // Deduplication happens during insertion.
84             for (const auto &Ref : Sym.second)
85               Refs.insert(Sym.first, Ref);
86           }
87         },
88         [&](RelationSlab S) {
89           std::lock_guard<std::mutex> Lock(RelsMu);
90           for (const auto &R : S) {
91             Relations.insert(R);
92           }
93         },
94         /*IncludeGraphCallback=*/nullptr);
95   }
96 
runInvocation(std::shared_ptr<CompilerInvocation> Invocation,FileManager * Files,std::shared_ptr<PCHContainerOperations> PCHContainerOps,DiagnosticConsumer * DiagConsumer)97   bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
98                      FileManager *Files,
99                      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
100                      DiagnosticConsumer *DiagConsumer) override {
101     disableUnsupportedOptions(*Invocation);
102     return tooling::FrontendActionFactory::runInvocation(
103         std::move(Invocation), Files, std::move(PCHContainerOps), DiagConsumer);
104   }
105 
106   // Awkward: we write the result in the destructor, because the executor
107   // takes ownership so it's the easiest way to get our data back out.
~IndexActionFactory()108   ~IndexActionFactory() {
109     Result.Symbols = std::move(Symbols).build();
110     Result.Refs = std::move(Refs).build();
111     Result.Relations = std::move(Relations).build();
112   }
113 
114 private:
115   IndexFileIn &Result;
116   std::mutex FilesMu;
117   llvm::StringSet<> Files;
118   std::mutex SymbolsMu;
119   SymbolSlab::Builder Symbols;
120   std::mutex RefsMu;
121   RefSlab::Builder Refs;
122   std::mutex RelsMu;
123   RelationSlab::Builder Relations;
124 };
125 
126 } // namespace
127 } // namespace clangd
128 } // namespace clang
129 
main(int argc,const char ** argv)130 int main(int argc, const char **argv) {
131   llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
132 
133   const char *Overview = R"(
134   Creates an index of symbol information etc in a whole project.
135 
136   Example usage for a project using CMake compile commands:
137 
138   $ clangd-indexer --executor=all-TUs compile_commands.json > clangd.dex
139 
140   Example usage for file sequence index without flags:
141 
142   $ clangd-indexer File1.cpp File2.cpp ... FileN.cpp > clangd.dex
143 
144   Note: only symbols from header files will be indexed.
145   )";
146 
147   auto Executor = clang::tooling::createExecutorFromCommandLineArgs(
148       argc, argv, llvm::cl::getGeneralCategory(), Overview);
149 
150   if (!Executor) {
151     llvm::errs() << llvm::toString(Executor.takeError()) << "\n";
152     return 1;
153   }
154 
155   // Collect symbols found in each translation unit, merging as we go.
156   clang::clangd::IndexFileIn Data;
157   auto Mangler = std::make_shared<clang::clangd::CommandMangler>(
158       clang::clangd::CommandMangler::detect());
159   Mangler->SystemIncludeExtractor = clang::clangd::getSystemIncludeExtractor(
160       static_cast<llvm::ArrayRef<std::string>>(
161           clang::clangd::QueryDriverGlobs));
162   auto Err = Executor->get()->execute(
163       std::make_unique<clang::clangd::IndexActionFactory>(Data),
164       clang::tooling::ArgumentsAdjuster(
165           [Mangler = std::move(Mangler)](const std::vector<std::string> &Args,
166                                          llvm::StringRef File) {
167             clang::tooling::CompileCommand Cmd;
168             Cmd.CommandLine = Args;
169             Mangler->operator()(Cmd, File);
170             return Cmd.CommandLine;
171           }));
172   if (Err) {
173     clang::clangd::elog("{0}", std::move(Err));
174   }
175 
176   // Emit collected data.
177   clang::clangd::IndexFileOut Out(Data);
178   Out.Format = clang::clangd::Format;
179   llvm::outs() << Out;
180   return 0;
181 }
182