1 //===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a simple interactive tool which can be used to manually 10 // evaluate symbol search quality of Clangd index. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "index/Index.h" 15 #include "index/Relation.h" 16 #include "index/Serialization.h" 17 #include "index/remote/Client.h" 18 #include "llvm/ADT/ScopeExit.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/LineEditor/LineEditor.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/Signals.h" 24 #include <optional> 25 26 namespace clang { 27 namespace clangd { 28 namespace { 29 30 llvm::cl::opt<std::string> IndexLocation( 31 llvm::cl::desc("<path to index file | remote:server.address>"), 32 llvm::cl::Positional); 33 34 llvm::cl::opt<std::string> 35 ExecCommand("c", llvm::cl::desc("Command to execute and then exit.")); 36 37 llvm::cl::opt<std::string> ProjectRoot( 38 "project-root", 39 llvm::cl::desc( 40 "Path to the project. Required when connecting using remote index.")); 41 42 static constexpr char Overview[] = R"( 43 This is an **experimental** interactive tool to process user-provided search 44 queries over given symbol collection obtained via clangd-indexer. The 45 tool can be used to evaluate search quality of existing index implementations 46 and manually construct non-trivial test cases. 47 48 You can connect to remote index by passing remote:address to dexp. Example: 49 50 $ dexp remote:0.0.0.0:9000 51 52 Type use "help" request to get information about the details. 53 )"; 54 55 void reportTime(llvm::StringRef Name, llvm::function_ref<void()> F) { 56 const auto TimerStart = std::chrono::high_resolution_clock::now(); 57 F(); 58 const auto TimerStop = std::chrono::high_resolution_clock::now(); 59 const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>( 60 TimerStop - TimerStart); 61 llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration); 62 } 63 64 std::vector<SymbolID> getSymbolIDsFromIndex(llvm::StringRef QualifiedName, 65 const SymbolIndex *Index) { 66 FuzzyFindRequest Request; 67 // Remove leading "::" qualifier as FuzzyFind doesn't need leading "::" 68 // qualifier for global scope. 69 bool IsGlobalScope = QualifiedName.consume_front("::"); 70 auto Names = splitQualifiedName(QualifiedName); 71 if (IsGlobalScope || !Names.first.empty()) 72 Request.Scopes = {std::string(Names.first)}; 73 else 74 // QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"), 75 // add the global scope to the request. 76 Request.Scopes = {""}; 77 78 Request.Query = std::string(Names.second); 79 std::vector<SymbolID> SymIDs; 80 Index->fuzzyFind(Request, [&](const Symbol &Sym) { 81 std::string SymQualifiedName = (Sym.Scope + Sym.Name).str(); 82 if (QualifiedName == SymQualifiedName) 83 SymIDs.push_back(Sym.ID); 84 }); 85 return SymIDs; 86 } 87 88 // REPL commands inherit from Command and contain their options as members. 89 // Creating a Command populates parser options, parseAndRun() resets them. 90 class Command { 91 // By resetting the parser options, we lost the standard -help flag. 92 llvm::cl::opt<bool, false, llvm::cl::parser<bool>> Help{ 93 "help", llvm::cl::desc("Display available options"), 94 llvm::cl::ValueDisallowed, llvm::cl::cat(llvm::cl::getGeneralCategory())}; 95 // FIXME: Allow commands to signal failure. 96 virtual void run() = 0; 97 98 protected: 99 const SymbolIndex *Index; 100 101 public: 102 virtual ~Command() = default; 103 bool parseAndRun(llvm::ArrayRef<const char *> Argv, const char *Overview, 104 const SymbolIndex &Index) { 105 std::string ParseErrs; 106 llvm::raw_string_ostream OS(ParseErrs); 107 bool Ok = llvm::cl::ParseCommandLineOptions(Argv.size(), Argv.data(), 108 Overview, &OS); 109 // must do this before opts are destroyed 110 auto Cleanup = llvm::make_scope_exit(llvm::cl::ResetCommandLineParser); 111 if (Help.getNumOccurrences() > 0) { 112 // Avoid printing parse errors in this case. 113 // (Well, in theory. A bunch get printed to llvm::errs() regardless!) 114 llvm::cl::PrintHelpMessage(); 115 return true; 116 } 117 118 llvm::outs() << OS.str(); 119 if (Ok) { 120 this->Index = &Index; 121 reportTime(Argv[0], [&] { run(); }); 122 } 123 return Ok; 124 } 125 }; 126 127 // FIXME(kbobyrev): Ideas for more commands: 128 // * load/swap/reload index: this would make it possible to get rid of llvm::cl 129 // usages in the tool driver and actually use llvm::cl library in the REPL. 130 // * show posting list density histogram (our dump data somewhere so that user 131 // could build one) 132 // * show number of tokens of each kind 133 // * print out tokens with the most dense posting lists 134 // * print out tokens with least dense posting lists 135 136 class FuzzyFind : public Command { 137 llvm::cl::opt<std::string> Query{ 138 "query", 139 llvm::cl::Positional, 140 llvm::cl::Required, 141 llvm::cl::desc("Query string to be fuzzy-matched"), 142 }; 143 llvm::cl::opt<std::string> Scopes{ 144 "scopes", 145 llvm::cl::desc("Allowed symbol scopes (comma-separated list)"), 146 }; 147 llvm::cl::opt<unsigned> Limit{ 148 "limit", 149 llvm::cl::init(10), 150 llvm::cl::desc("Max results to display"), 151 }; 152 153 void run() override { 154 FuzzyFindRequest Request; 155 Request.Limit = Limit; 156 Request.Query = Query; 157 if (Scopes.getNumOccurrences() > 0) { 158 llvm::SmallVector<llvm::StringRef> Scopes; 159 llvm::StringRef(this->Scopes).split(Scopes, ','); 160 Request.Scopes = {Scopes.begin(), Scopes.end()}; 161 } 162 Request.AnyScope = Request.Scopes.empty(); 163 // FIXME(kbobyrev): Print symbol final scores to see the distribution. 164 static const auto *OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n"; 165 llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID", 166 "Symbol Name"); 167 size_t Rank = 0; 168 Index->fuzzyFind(Request, [&](const Symbol &Sym) { 169 llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(), 170 Sym.Scope + Sym.Name); 171 }); 172 } 173 }; 174 175 class Lookup : public Command { 176 llvm::cl::opt<std::string> ID{ 177 "id", 178 llvm::cl::Positional, 179 llvm::cl::desc("Symbol ID to look up (hex)"), 180 }; 181 llvm::cl::opt<std::string> Name{ 182 "name", 183 llvm::cl::desc("Qualified name to look up."), 184 }; 185 186 void run() override { 187 if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) { 188 llvm::errs() 189 << "Missing required argument: please provide id or -name.\n"; 190 return; 191 } 192 std::vector<SymbolID> IDs; 193 if (ID.getNumOccurrences()) { 194 auto SID = SymbolID::fromStr(ID); 195 if (!SID) { 196 llvm::errs() << llvm::toString(SID.takeError()) << "\n"; 197 return; 198 } 199 IDs.push_back(*SID); 200 } else { 201 IDs = getSymbolIDsFromIndex(Name, Index); 202 } 203 204 LookupRequest Request; 205 Request.IDs.insert(IDs.begin(), IDs.end()); 206 bool FoundSymbol = false; 207 Index->lookup(Request, [&](const Symbol &Sym) { 208 FoundSymbol = true; 209 llvm::outs() << toYAML(Sym); 210 }); 211 if (!FoundSymbol) 212 llvm::errs() << "not found\n"; 213 } 214 }; 215 216 class Refs : public Command { 217 llvm::cl::opt<std::string> ID{ 218 "id", 219 llvm::cl::Positional, 220 llvm::cl::desc("Symbol ID of the symbol being queried (hex)."), 221 }; 222 llvm::cl::opt<std::string> Name{ 223 "name", 224 llvm::cl::desc("Qualified name of the symbol being queried."), 225 }; 226 llvm::cl::opt<std::string> Filter{ 227 "filter", 228 llvm::cl::init(".*"), 229 llvm::cl::desc( 230 "Print all results from files matching this regular expression."), 231 }; 232 233 void run() override { 234 if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) { 235 llvm::errs() 236 << "Missing required argument: please provide id or -name.\n"; 237 return; 238 } 239 std::vector<SymbolID> IDs; 240 if (ID.getNumOccurrences()) { 241 auto SID = SymbolID::fromStr(ID); 242 if (!SID) { 243 llvm::errs() << llvm::toString(SID.takeError()) << "\n"; 244 return; 245 } 246 IDs.push_back(*SID); 247 } else { 248 IDs = getSymbolIDsFromIndex(Name, Index); 249 if (IDs.size() > 1) { 250 llvm::errs() << llvm::formatv( 251 "The name {0} is ambiguous, found {1} different " 252 "symbols. Please use id flag to disambiguate.\n", 253 Name, IDs.size()); 254 return; 255 } 256 } 257 RefsRequest RefRequest; 258 RefRequest.IDs.insert(IDs.begin(), IDs.end()); 259 llvm::Regex RegexFilter(Filter); 260 Index->refs(RefRequest, [&RegexFilter](const Ref &R) { 261 auto U = URI::parse(R.Location.FileURI); 262 if (!U) { 263 llvm::errs() << U.takeError(); 264 return; 265 } 266 if (RegexFilter.match(U->body())) 267 llvm::outs() << R << "\n"; 268 }); 269 } 270 }; 271 272 class Relations : public Command { 273 llvm::cl::opt<std::string> ID{ 274 "id", 275 llvm::cl::Positional, 276 llvm::cl::desc("Symbol ID of the symbol being queried (hex)."), 277 }; 278 llvm::cl::opt<RelationKind> Relation{ 279 "relation", 280 llvm::cl::desc("Relation kind for the predicate."), 281 values(clEnumValN(RelationKind::BaseOf, "base_of", 282 "Find subclasses of a class."), 283 clEnumValN(RelationKind::OverriddenBy, "overridden_by", 284 "Find methods that overrides a virtual method.")), 285 }; 286 287 void run() override { 288 if (ID.getNumOccurrences() == 0 || Relation.getNumOccurrences() == 0) { 289 llvm::errs() 290 << "Missing required argument: please provide id and -relation.\n"; 291 return; 292 } 293 RelationsRequest Req; 294 if (ID.getNumOccurrences()) { 295 auto SID = SymbolID::fromStr(ID); 296 if (!SID) { 297 llvm::errs() << llvm::toString(SID.takeError()) << "\n"; 298 return; 299 } 300 Req.Subjects.insert(*SID); 301 } 302 Req.Predicate = Relation.getValue(); 303 Index->relations(Req, [](const SymbolID &SID, const Symbol &S) { 304 llvm::outs() << toYAML(S); 305 }); 306 } 307 }; 308 309 class Export : public Command { 310 llvm::cl::opt<IndexFileFormat> Format{ 311 "format", 312 llvm::cl::desc("Format of index export"), 313 llvm::cl::values( 314 clEnumValN(IndexFileFormat::YAML, "yaml", 315 "human-readable YAML format"), 316 clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format")), 317 llvm::cl::init(IndexFileFormat::YAML), 318 }; 319 llvm::cl::opt<std::string> OutputFile{ 320 "output-file", 321 llvm::cl::Positional, 322 llvm::cl::Required, 323 llvm::cl::desc("Output file for export"), 324 }; 325 326 public: 327 void run() override { 328 using namespace clang::clangd; 329 // Read input file (as specified in global option) 330 auto Buffer = llvm::MemoryBuffer::getFile(IndexLocation); 331 if (!Buffer) { 332 llvm::errs() << llvm::formatv("Can't open {0}", IndexLocation) << "\n"; 333 return; 334 } 335 336 // Auto-detects input format when parsing 337 auto IndexIn = clang::clangd::readIndexFile(Buffer->get()->getBuffer(), 338 SymbolOrigin::Static); 339 if (!IndexIn) { 340 llvm::errs() << llvm::toString(IndexIn.takeError()) << "\n"; 341 return; 342 } 343 344 // Prepare output file 345 std::error_code EC; 346 llvm::raw_fd_ostream OutputStream(OutputFile, EC); 347 if (EC) { 348 llvm::errs() << llvm::formatv("Can't open {0} for writing", OutputFile) 349 << "\n"; 350 return; 351 } 352 353 // Export 354 clang::clangd::IndexFileOut IndexOut(IndexIn.get()); 355 IndexOut.Format = Format; 356 OutputStream << IndexOut; 357 } 358 }; 359 360 struct { 361 const char *Name; 362 const char *Description; 363 std::function<std::unique_ptr<Command>()> Implementation; 364 } CommandInfo[] = { 365 {"find", "Search for symbols with fuzzyFind", std::make_unique<FuzzyFind>}, 366 {"lookup", "Dump symbol details by ID or qualified name", 367 std::make_unique<Lookup>}, 368 {"refs", "Find references by ID or qualified name", std::make_unique<Refs>}, 369 {"relations", "Find relations by ID and relation kind", 370 std::make_unique<Relations>}, 371 {"export", "Export index", std::make_unique<Export>}, 372 }; 373 374 std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) { 375 return Index.starts_with("remote:") 376 ? remote::getClient(Index.drop_front(strlen("remote:")), 377 ProjectRoot) 378 : loadIndex(Index, SymbolOrigin::Static, /*UseDex=*/true, 379 /*SupportContainedRefs=*/true); 380 } 381 382 bool runCommand(std::string Request, const SymbolIndex &Index) { 383 // Split on spaces and add required null-termination. 384 std::replace(Request.begin(), Request.end(), ' ', '\0'); 385 llvm::SmallVector<llvm::StringRef> Args; 386 llvm::StringRef(Request).split(Args, '\0', /*MaxSplit=*/-1, 387 /*KeepEmpty=*/false); 388 if (Args.empty()) 389 return false; 390 if (Args.front() == "help") { 391 llvm::outs() << "dexp - Index explorer\nCommands:\n"; 392 for (const auto &C : CommandInfo) 393 llvm::outs() << llvm::formatv("{0,16} - {1}\n", C.Name, C.Description); 394 llvm::outs() << "Get detailed command help with e.g. `find -help`.\n"; 395 return true; 396 } 397 llvm::SmallVector<const char *> FakeArgv; 398 for (llvm::StringRef S : Args) 399 FakeArgv.push_back(S.data()); // Terminated by separator or end of string. 400 401 for (const auto &Cmd : CommandInfo) { 402 if (Cmd.Name == Args.front()) 403 return Cmd.Implementation()->parseAndRun(FakeArgv, Cmd.Description, 404 Index); 405 } 406 llvm::errs() << "Unknown command. Try 'help'.\n"; 407 return false; 408 } 409 410 } // namespace 411 } // namespace clangd 412 } // namespace clang 413 414 int main(int argc, const char *argv[]) { 415 using namespace clang::clangd; 416 417 llvm::cl::ParseCommandLineOptions(argc, argv, Overview); 418 419 // Preserve global options when flag parser is reset, so commands can use 420 // them. 421 IndexLocation.setValue(IndexLocation, /*initial=*/true); 422 ExecCommand.setValue(ExecCommand, /*initial=*/true); 423 ProjectRoot.setValue(ProjectRoot, /*initial=*/true); 424 425 llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands. 426 llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); 427 428 bool RemoteMode = llvm::StringRef(IndexLocation).starts_with("remote:"); 429 if (RemoteMode && ProjectRoot.empty()) { 430 llvm::errs() << "--project-root is required in remote mode\n"; 431 return -1; 432 } 433 434 std::unique_ptr<SymbolIndex> Index; 435 reportTime(RemoteMode ? "Remote index client creation" : "Dex build", 436 [&]() { Index = openIndex(IndexLocation); }); 437 438 if (!Index) { 439 llvm::errs() << "Failed to open the index.\n"; 440 return -1; 441 } 442 443 if (!ExecCommand.empty()) 444 return runCommand(ExecCommand, *Index) ? 0 : 1; 445 446 llvm::LineEditor LE("dexp"); 447 while (std::optional<std::string> Request = LE.readLine()) 448 runCommand(std::move(*Request), *Index); 449 } 450