xref: /llvm-project/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp (revision 61fe67a4017375fd675f75652e857e837f77fa51)
1 //===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a simple interactive tool which can be used to manually
10 // evaluate symbol search quality of Clangd index.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "index/Index.h"
15 #include "index/Relation.h"
16 #include "index/Serialization.h"
17 #include "index/remote/Client.h"
18 #include "llvm/ADT/ScopeExit.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/LineEditor/LineEditor.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Signals.h"
24 #include <optional>
25 
26 namespace clang {
27 namespace clangd {
28 namespace {
29 
30 llvm::cl::opt<std::string> IndexLocation(
31     llvm::cl::desc("<path to index file | remote:server.address>"),
32     llvm::cl::Positional);
33 
34 llvm::cl::opt<std::string>
35     ExecCommand("c", llvm::cl::desc("Command to execute and then exit."));
36 
37 llvm::cl::opt<std::string> ProjectRoot(
38     "project-root",
39     llvm::cl::desc(
40         "Path to the project. Required when connecting using remote index."));
41 
42 static constexpr char Overview[] = R"(
43 This is an **experimental** interactive tool to process user-provided search
44 queries over given symbol collection obtained via clangd-indexer. The
45 tool can be used to evaluate search quality of existing index implementations
46 and manually construct non-trivial test cases.
47 
48 You can connect to remote index by passing remote:address to dexp. Example:
49 
50 $ dexp remote:0.0.0.0:9000
51 
52 Type use "help" request to get information about the details.
53 )";
54 
55 void reportTime(llvm::StringRef Name, llvm::function_ref<void()> F) {
56   const auto TimerStart = std::chrono::high_resolution_clock::now();
57   F();
58   const auto TimerStop = std::chrono::high_resolution_clock::now();
59   const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>(
60       TimerStop - TimerStart);
61   llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
62 }
63 
64 std::vector<SymbolID> getSymbolIDsFromIndex(llvm::StringRef QualifiedName,
65                                             const SymbolIndex *Index) {
66   FuzzyFindRequest Request;
67   // Remove leading "::" qualifier as FuzzyFind doesn't need leading "::"
68   // qualifier for global scope.
69   bool IsGlobalScope = QualifiedName.consume_front("::");
70   auto Names = splitQualifiedName(QualifiedName);
71   if (IsGlobalScope || !Names.first.empty())
72     Request.Scopes = {std::string(Names.first)};
73   else
74     // QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"),
75     // add the global scope to the request.
76     Request.Scopes = {""};
77 
78   Request.Query = std::string(Names.second);
79   std::vector<SymbolID> SymIDs;
80   Index->fuzzyFind(Request, [&](const Symbol &Sym) {
81     std::string SymQualifiedName = (Sym.Scope + Sym.Name).str();
82     if (QualifiedName == SymQualifiedName)
83       SymIDs.push_back(Sym.ID);
84   });
85   return SymIDs;
86 }
87 
88 // REPL commands inherit from Command and contain their options as members.
89 // Creating a Command populates parser options, parseAndRun() resets them.
90 class Command {
91   // By resetting the parser options, we lost the standard -help flag.
92   llvm::cl::opt<bool, false, llvm::cl::parser<bool>> Help{
93       "help", llvm::cl::desc("Display available options"),
94       llvm::cl::ValueDisallowed, llvm::cl::cat(llvm::cl::getGeneralCategory())};
95   // FIXME: Allow commands to signal failure.
96   virtual void run() = 0;
97 
98 protected:
99   const SymbolIndex *Index;
100 
101 public:
102   virtual ~Command() = default;
103   bool parseAndRun(llvm::ArrayRef<const char *> Argv, const char *Overview,
104                    const SymbolIndex &Index) {
105     std::string ParseErrs;
106     llvm::raw_string_ostream OS(ParseErrs);
107     bool Ok = llvm::cl::ParseCommandLineOptions(Argv.size(), Argv.data(),
108                                                 Overview, &OS);
109     // must do this before opts are destroyed
110     auto Cleanup = llvm::make_scope_exit(llvm::cl::ResetCommandLineParser);
111     if (Help.getNumOccurrences() > 0) {
112       // Avoid printing parse errors in this case.
113       // (Well, in theory. A bunch get printed to llvm::errs() regardless!)
114       llvm::cl::PrintHelpMessage();
115       return true;
116     }
117 
118     llvm::outs() << OS.str();
119     if (Ok) {
120       this->Index = &Index;
121       reportTime(Argv[0], [&] { run(); });
122     }
123     return Ok;
124   }
125 };
126 
127 // FIXME(kbobyrev): Ideas for more commands:
128 // * load/swap/reload index: this would make it possible to get rid of llvm::cl
129 //   usages in the tool driver and actually use llvm::cl library in the REPL.
130 // * show posting list density histogram (our dump data somewhere so that user
131 //   could build one)
132 // * show number of tokens of each kind
133 // * print out tokens with the most dense posting lists
134 // * print out tokens with least dense posting lists
135 
136 class FuzzyFind : public Command {
137   llvm::cl::opt<std::string> Query{
138       "query",
139       llvm::cl::Positional,
140       llvm::cl::Required,
141       llvm::cl::desc("Query string to be fuzzy-matched"),
142   };
143   llvm::cl::opt<std::string> Scopes{
144       "scopes",
145       llvm::cl::desc("Allowed symbol scopes (comma-separated list)"),
146   };
147   llvm::cl::opt<unsigned> Limit{
148       "limit",
149       llvm::cl::init(10),
150       llvm::cl::desc("Max results to display"),
151   };
152 
153   void run() override {
154     FuzzyFindRequest Request;
155     Request.Limit = Limit;
156     Request.Query = Query;
157     if (Scopes.getNumOccurrences() > 0) {
158       llvm::SmallVector<llvm::StringRef> Scopes;
159       llvm::StringRef(this->Scopes).split(Scopes, ',');
160       Request.Scopes = {Scopes.begin(), Scopes.end()};
161     }
162     Request.AnyScope = Request.Scopes.empty();
163     // FIXME(kbobyrev): Print symbol final scores to see the distribution.
164     static const auto *OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
165     llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
166                                   "Symbol Name");
167     size_t Rank = 0;
168     Index->fuzzyFind(Request, [&](const Symbol &Sym) {
169       llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(),
170                                     Sym.Scope + Sym.Name);
171     });
172   }
173 };
174 
175 class Lookup : public Command {
176   llvm::cl::opt<std::string> ID{
177       "id",
178       llvm::cl::Positional,
179       llvm::cl::desc("Symbol ID to look up (hex)"),
180   };
181   llvm::cl::opt<std::string> Name{
182       "name",
183       llvm::cl::desc("Qualified name to look up."),
184   };
185 
186   void run() override {
187     if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
188       llvm::errs()
189           << "Missing required argument: please provide id or -name.\n";
190       return;
191     }
192     std::vector<SymbolID> IDs;
193     if (ID.getNumOccurrences()) {
194       auto SID = SymbolID::fromStr(ID);
195       if (!SID) {
196         llvm::errs() << llvm::toString(SID.takeError()) << "\n";
197         return;
198       }
199       IDs.push_back(*SID);
200     } else {
201       IDs = getSymbolIDsFromIndex(Name, Index);
202     }
203 
204     LookupRequest Request;
205     Request.IDs.insert(IDs.begin(), IDs.end());
206     bool FoundSymbol = false;
207     Index->lookup(Request, [&](const Symbol &Sym) {
208       FoundSymbol = true;
209       llvm::outs() << toYAML(Sym);
210     });
211     if (!FoundSymbol)
212       llvm::errs() << "not found\n";
213   }
214 };
215 
216 class Refs : public Command {
217   llvm::cl::opt<std::string> ID{
218       "id",
219       llvm::cl::Positional,
220       llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
221   };
222   llvm::cl::opt<std::string> Name{
223       "name",
224       llvm::cl::desc("Qualified name of the symbol being queried."),
225   };
226   llvm::cl::opt<std::string> Filter{
227       "filter",
228       llvm::cl::init(".*"),
229       llvm::cl::desc(
230           "Print all results from files matching this regular expression."),
231   };
232 
233   void run() override {
234     if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
235       llvm::errs()
236           << "Missing required argument: please provide id or -name.\n";
237       return;
238     }
239     std::vector<SymbolID> IDs;
240     if (ID.getNumOccurrences()) {
241       auto SID = SymbolID::fromStr(ID);
242       if (!SID) {
243         llvm::errs() << llvm::toString(SID.takeError()) << "\n";
244         return;
245       }
246       IDs.push_back(*SID);
247     } else {
248       IDs = getSymbolIDsFromIndex(Name, Index);
249       if (IDs.size() > 1) {
250         llvm::errs() << llvm::formatv(
251             "The name {0} is ambiguous, found {1} different "
252             "symbols. Please use id flag to disambiguate.\n",
253             Name, IDs.size());
254         return;
255       }
256     }
257     RefsRequest RefRequest;
258     RefRequest.IDs.insert(IDs.begin(), IDs.end());
259     llvm::Regex RegexFilter(Filter);
260     Index->refs(RefRequest, [&RegexFilter](const Ref &R) {
261       auto U = URI::parse(R.Location.FileURI);
262       if (!U) {
263         llvm::errs() << U.takeError();
264         return;
265       }
266       if (RegexFilter.match(U->body()))
267         llvm::outs() << R << "\n";
268     });
269   }
270 };
271 
272 class Relations : public Command {
273   llvm::cl::opt<std::string> ID{
274       "id",
275       llvm::cl::Positional,
276       llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
277   };
278   llvm::cl::opt<RelationKind> Relation{
279       "relation",
280       llvm::cl::desc("Relation kind for the predicate."),
281       values(clEnumValN(RelationKind::BaseOf, "base_of",
282                         "Find subclasses of a class."),
283              clEnumValN(RelationKind::OverriddenBy, "overridden_by",
284                         "Find methods that overrides a virtual method.")),
285   };
286 
287   void run() override {
288     if (ID.getNumOccurrences() == 0 || Relation.getNumOccurrences() == 0) {
289       llvm::errs()
290           << "Missing required argument: please provide id and -relation.\n";
291       return;
292     }
293     RelationsRequest Req;
294     if (ID.getNumOccurrences()) {
295       auto SID = SymbolID::fromStr(ID);
296       if (!SID) {
297         llvm::errs() << llvm::toString(SID.takeError()) << "\n";
298         return;
299       }
300       Req.Subjects.insert(*SID);
301     }
302     Req.Predicate = Relation.getValue();
303     Index->relations(Req, [](const SymbolID &SID, const Symbol &S) {
304       llvm::outs() << toYAML(S);
305     });
306   }
307 };
308 
309 class Export : public Command {
310   llvm::cl::opt<IndexFileFormat> Format{
311       "format",
312       llvm::cl::desc("Format of index export"),
313       llvm::cl::values(
314           clEnumValN(IndexFileFormat::YAML, "yaml",
315                      "human-readable YAML format"),
316           clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format")),
317       llvm::cl::init(IndexFileFormat::YAML),
318   };
319   llvm::cl::opt<std::string> OutputFile{
320       "output-file",
321       llvm::cl::Positional,
322       llvm::cl::Required,
323       llvm::cl::desc("Output file for export"),
324   };
325 
326 public:
327   void run() override {
328     using namespace clang::clangd;
329     // Read input file (as specified in global option)
330     auto Buffer = llvm::MemoryBuffer::getFile(IndexLocation);
331     if (!Buffer) {
332       llvm::errs() << llvm::formatv("Can't open {0}", IndexLocation) << "\n";
333       return;
334     }
335 
336     // Auto-detects input format when parsing
337     auto IndexIn = clang::clangd::readIndexFile(Buffer->get()->getBuffer(),
338                                                 SymbolOrigin::Static);
339     if (!IndexIn) {
340       llvm::errs() << llvm::toString(IndexIn.takeError()) << "\n";
341       return;
342     }
343 
344     // Prepare output file
345     std::error_code EC;
346     llvm::raw_fd_ostream OutputStream(OutputFile, EC);
347     if (EC) {
348       llvm::errs() << llvm::formatv("Can't open {0} for writing", OutputFile)
349                    << "\n";
350       return;
351     }
352 
353     // Export
354     clang::clangd::IndexFileOut IndexOut(IndexIn.get());
355     IndexOut.Format = Format;
356     OutputStream << IndexOut;
357   }
358 };
359 
360 struct {
361   const char *Name;
362   const char *Description;
363   std::function<std::unique_ptr<Command>()> Implementation;
364 } CommandInfo[] = {
365     {"find", "Search for symbols with fuzzyFind", std::make_unique<FuzzyFind>},
366     {"lookup", "Dump symbol details by ID or qualified name",
367      std::make_unique<Lookup>},
368     {"refs", "Find references by ID or qualified name", std::make_unique<Refs>},
369     {"relations", "Find relations by ID and relation kind",
370      std::make_unique<Relations>},
371     {"export", "Export index", std::make_unique<Export>},
372 };
373 
374 std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) {
375   return Index.starts_with("remote:")
376              ? remote::getClient(Index.drop_front(strlen("remote:")),
377                                  ProjectRoot)
378              : loadIndex(Index, SymbolOrigin::Static, /*UseDex=*/true,
379                          /*SupportContainedRefs=*/true);
380 }
381 
382 bool runCommand(std::string Request, const SymbolIndex &Index) {
383   // Split on spaces and add required null-termination.
384   std::replace(Request.begin(), Request.end(), ' ', '\0');
385   llvm::SmallVector<llvm::StringRef> Args;
386   llvm::StringRef(Request).split(Args, '\0', /*MaxSplit=*/-1,
387                                  /*KeepEmpty=*/false);
388   if (Args.empty())
389     return false;
390   if (Args.front() == "help") {
391     llvm::outs() << "dexp - Index explorer\nCommands:\n";
392     for (const auto &C : CommandInfo)
393       llvm::outs() << llvm::formatv("{0,16} - {1}\n", C.Name, C.Description);
394     llvm::outs() << "Get detailed command help with e.g. `find -help`.\n";
395     return true;
396   }
397   llvm::SmallVector<const char *> FakeArgv;
398   for (llvm::StringRef S : Args)
399     FakeArgv.push_back(S.data()); // Terminated by separator or end of string.
400 
401   for (const auto &Cmd : CommandInfo) {
402     if (Cmd.Name == Args.front())
403       return Cmd.Implementation()->parseAndRun(FakeArgv, Cmd.Description,
404                                                Index);
405   }
406   llvm::errs() << "Unknown command. Try 'help'.\n";
407   return false;
408 }
409 
410 } // namespace
411 } // namespace clangd
412 } // namespace clang
413 
414 int main(int argc, const char *argv[]) {
415   using namespace clang::clangd;
416 
417   llvm::cl::ParseCommandLineOptions(argc, argv, Overview);
418 
419   // Preserve global options when flag parser is reset, so commands can use
420   // them.
421   IndexLocation.setValue(IndexLocation, /*initial=*/true);
422   ExecCommand.setValue(ExecCommand, /*initial=*/true);
423   ProjectRoot.setValue(ProjectRoot, /*initial=*/true);
424 
425   llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands.
426   llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
427 
428   bool RemoteMode = llvm::StringRef(IndexLocation).starts_with("remote:");
429   if (RemoteMode && ProjectRoot.empty()) {
430     llvm::errs() << "--project-root is required in remote mode\n";
431     return -1;
432   }
433 
434   std::unique_ptr<SymbolIndex> Index;
435   reportTime(RemoteMode ? "Remote index client creation" : "Dex build",
436              [&]() { Index = openIndex(IndexLocation); });
437 
438   if (!Index) {
439     llvm::errs() << "Failed to open the index.\n";
440     return -1;
441   }
442 
443   if (!ExecCommand.empty())
444     return runCommand(ExecCommand, *Index) ? 0 : 1;
445 
446   llvm::LineEditor LE("dexp");
447   while (std::optional<std::string> Request = LE.readLine())
448     runCommand(std::move(*Request), *Index);
449 }
450