xref: /llvm-project/clang-tools-extra/clang-include-fixer/FuzzySymbolIndex.h (revision 43356f56bd2ede05c70db537e3f8b50a31444487)
1*43356f56SNico Weber //===--- FuzzySymbolIndex.h - Lookup symbols for autocomplete ---*- C++ -*-===//
2*43356f56SNico Weber //
3*43356f56SNico Weber // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*43356f56SNico Weber // See https://llvm.org/LICENSE.txt for license information.
5*43356f56SNico Weber // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*43356f56SNico Weber //
7*43356f56SNico Weber //===----------------------------------------------------------------------===//
8*43356f56SNico Weber 
9*43356f56SNico Weber #ifndef LLVM_CLANG_TOOLS_EXTRA_INCLUDE_FIXER_FUZZY_SYMBOL_INDEX_H
10*43356f56SNico Weber #define LLVM_CLANG_TOOLS_EXTRA_INCLUDE_FIXER_FUZZY_SYMBOL_INDEX_H
11*43356f56SNico Weber 
12*43356f56SNico Weber #include "SymbolIndex.h"
13*43356f56SNico Weber #include "find-all-symbols/SymbolInfo.h"
14*43356f56SNico Weber #include "llvm/ADT/SmallString.h"
15*43356f56SNico Weber #include "llvm/ADT/StringRef.h"
16*43356f56SNico Weber #include "llvm/Support/Error.h"
17*43356f56SNico Weber #include <string>
18*43356f56SNico Weber #include <vector>
19*43356f56SNico Weber 
20*43356f56SNico Weber namespace clang {
21*43356f56SNico Weber namespace include_fixer {
22*43356f56SNico Weber 
23*43356f56SNico Weber // A FuzzySymbolIndex retrieves top-level symbols matching a query string.
24*43356f56SNico Weber //
25*43356f56SNico Weber // It refines the contract of SymbolIndex::search to do fuzzy matching:
26*43356f56SNico Weber // - symbol names are tokenized: "unique ptr", "string ref".
27*43356f56SNico Weber // - query must match prefixes of symbol tokens: [upt]
28*43356f56SNico Weber // - if the query has multiple tokens, splits must match: [StR], not [STr].
29*43356f56SNico Weber // Helpers for tokenization and regex matching are provided.
30*43356f56SNico Weber //
31*43356f56SNico Weber // Implementations may choose to truncate results, refuse short queries, etc.
32*43356f56SNico Weber class FuzzySymbolIndex : public SymbolIndex {
33*43356f56SNico Weber public:
34*43356f56SNico Weber   // Loads the specified clang-include-fixer database and returns an index serving it.
35*43356f56SNico Weber   static llvm::Expected<std::unique_ptr<FuzzySymbolIndex>>
36*43356f56SNico Weber   createFromYAML(llvm::StringRef File);
37*43356f56SNico Weber 
38*43356f56SNico Weber   // Helpers for implementing indexes:
39*43356f56SNico Weber 
40*43356f56SNico Weber   // Transforms a symbol name or query into a sequence of tokens.
41*43356f56SNico Weber   // - URLHandlerCallback --> [url, handler, callback]
42*43356f56SNico Weber   // - snake_case11 --> [snake, case, 11]
43*43356f56SNico Weber   // - _WTF$ --> [wtf]
44*43356f56SNico Weber   static std::vector<std::string> tokenize(llvm::StringRef Text);
45*43356f56SNico Weber 
46*43356f56SNico Weber   // Transforms query tokens into an unanchored regexp to match symbol tokens.
47*43356f56SNico Weber   // - [fe f] --> /f(\w* )?e\w* f/, matches [fee fie foe].
48*43356f56SNico Weber   static std::string queryRegexp(const std::vector<std::string> &Tokens);
49*43356f56SNico Weber };
50*43356f56SNico Weber 
51*43356f56SNico Weber } // namespace include_fixer
52*43356f56SNico Weber } // namespace clang
53*43356f56SNico Weber 
54*43356f56SNico Weber #endif // LLVM_CLANG_TOOLS_EXTRA_INCLUDE_FIXER_FUZZY_SYMBOL_INDEX_H
55