143356f56SNico Weber //===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -*- C++ -*-===//
243356f56SNico Weber //
343356f56SNico Weber // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
443356f56SNico Weber // See https://llvm.org/LICENSE.txt for license information.
543356f56SNico Weber // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
643356f56SNico Weber //
743356f56SNico Weber //===----------------------------------------------------------------------===//
843356f56SNico Weber #include "FuzzySymbolIndex.h"
943356f56SNico Weber #include "llvm/Support/Regex.h"
1043356f56SNico Weber
1143356f56SNico Weber using clang::find_all_symbols::SymbolAndSignals;
1243356f56SNico Weber using llvm::StringRef;
1343356f56SNico Weber
1443356f56SNico Weber namespace clang {
1543356f56SNico Weber namespace include_fixer {
1643356f56SNico Weber namespace {
1743356f56SNico Weber
1843356f56SNico Weber class MemSymbolIndex : public FuzzySymbolIndex {
1943356f56SNico Weber public:
MemSymbolIndex(std::vector<SymbolAndSignals> Symbols)2043356f56SNico Weber MemSymbolIndex(std::vector<SymbolAndSignals> Symbols) {
2143356f56SNico Weber for (auto &Symbol : Symbols) {
2243356f56SNico Weber auto Tokens = tokenize(Symbol.Symbol.getName());
2343356f56SNico Weber this->Symbols.emplace_back(
2443356f56SNico Weber StringRef(llvm::join(Tokens.begin(), Tokens.end(), " ")),
2543356f56SNico Weber std::move(Symbol));
2643356f56SNico Weber }
2743356f56SNico Weber }
2843356f56SNico Weber
search(StringRef Query)2943356f56SNico Weber std::vector<SymbolAndSignals> search(StringRef Query) override {
3043356f56SNico Weber auto Tokens = tokenize(Query);
3143356f56SNico Weber llvm::Regex Pattern("^" + queryRegexp(Tokens));
3243356f56SNico Weber std::vector<SymbolAndSignals> Results;
3343356f56SNico Weber for (const Entry &E : Symbols)
3443356f56SNico Weber if (Pattern.match(E.first))
3543356f56SNico Weber Results.push_back(E.second);
3643356f56SNico Weber return Results;
3743356f56SNico Weber }
3843356f56SNico Weber
3943356f56SNico Weber private:
4043356f56SNico Weber using Entry = std::pair<llvm::SmallString<32>, SymbolAndSignals>;
4143356f56SNico Weber std::vector<Entry> Symbols;
4243356f56SNico Weber };
4343356f56SNico Weber
4443356f56SNico Weber // Helpers for tokenize state machine.
4543356f56SNico Weber enum TokenizeState {
4643356f56SNico Weber EMPTY, // No pending characters.
4743356f56SNico Weber ONE_BIG, // Read one uppercase letter, could be WORD or Word.
4843356f56SNico Weber BIG_WORD, // Reading an uppercase WORD.
4943356f56SNico Weber SMALL_WORD, // Reading a lowercase word.
5043356f56SNico Weber NUMBER // Reading a number.
5143356f56SNico Weber };
5243356f56SNico Weber
5343356f56SNico Weber enum CharType { UPPER, LOWER, DIGIT, MISC };
classify(char c)5443356f56SNico Weber CharType classify(char c) {
5543356f56SNico Weber if (isupper(c))
5643356f56SNico Weber return UPPER;
5743356f56SNico Weber if (islower(c))
5843356f56SNico Weber return LOWER;
5943356f56SNico Weber if (isdigit(c))
6043356f56SNico Weber return DIGIT;
6143356f56SNico Weber return MISC;
6243356f56SNico Weber }
6343356f56SNico Weber
6443356f56SNico Weber } // namespace
6543356f56SNico Weber
tokenize(StringRef Text)6643356f56SNico Weber std::vector<std::string> FuzzySymbolIndex::tokenize(StringRef Text) {
6743356f56SNico Weber std::vector<std::string> Result;
6843356f56SNico Weber // State describes the treatment of text from Start to I.
6943356f56SNico Weber // Once text is Flush()ed into Result, we're done with it and advance Start.
7043356f56SNico Weber TokenizeState State = EMPTY;
7143356f56SNico Weber size_t Start = 0;
7243356f56SNico Weber auto Flush = [&](size_t End) {
7343356f56SNico Weber if (State != EMPTY) {
7443356f56SNico Weber Result.push_back(Text.substr(Start, End - Start).lower());
7543356f56SNico Weber State = EMPTY;
7643356f56SNico Weber }
7743356f56SNico Weber Start = End;
7843356f56SNico Weber };
7943356f56SNico Weber for (size_t I = 0; I < Text.size(); ++I) {
8043356f56SNico Weber CharType Type = classify(Text[I]);
8143356f56SNico Weber if (Type == MISC)
8243356f56SNico Weber Flush(I);
8343356f56SNico Weber else if (Type == LOWER)
8443356f56SNico Weber switch (State) {
8543356f56SNico Weber case BIG_WORD:
8643356f56SNico Weber Flush(I - 1); // FOOBar: first token is FOO, not FOOB.
87*0972a390SFangrui Song [[fallthrough]];
8843356f56SNico Weber case ONE_BIG:
8943356f56SNico Weber State = SMALL_WORD;
90*0972a390SFangrui Song [[fallthrough]];
9143356f56SNico Weber case SMALL_WORD:
9243356f56SNico Weber break;
9343356f56SNico Weber default:
9443356f56SNico Weber Flush(I);
9543356f56SNico Weber State = SMALL_WORD;
9643356f56SNico Weber }
9743356f56SNico Weber else if (Type == UPPER)
9843356f56SNico Weber switch (State) {
9943356f56SNico Weber case ONE_BIG:
10043356f56SNico Weber State = BIG_WORD;
101*0972a390SFangrui Song [[fallthrough]];
10243356f56SNico Weber case BIG_WORD:
10343356f56SNico Weber break;
10443356f56SNico Weber default:
10543356f56SNico Weber Flush(I);
10643356f56SNico Weber State = ONE_BIG;
10743356f56SNico Weber }
10843356f56SNico Weber else if (Type == DIGIT && State != NUMBER) {
10943356f56SNico Weber Flush(I);
11043356f56SNico Weber State = NUMBER;
11143356f56SNico Weber }
11243356f56SNico Weber }
11343356f56SNico Weber Flush(Text.size());
11443356f56SNico Weber return Result;
11543356f56SNico Weber }
11643356f56SNico Weber
11743356f56SNico Weber std::string
queryRegexp(const std::vector<std::string> & Tokens)11843356f56SNico Weber FuzzySymbolIndex::queryRegexp(const std::vector<std::string> &Tokens) {
11943356f56SNico Weber std::string Result;
12043356f56SNico Weber for (size_t I = 0; I < Tokens.size(); ++I) {
12143356f56SNico Weber if (I)
12243356f56SNico Weber Result.append("[[:alnum:]]* ");
12343356f56SNico Weber for (size_t J = 0; J < Tokens[I].size(); ++J) {
12443356f56SNico Weber if (J)
12543356f56SNico Weber Result.append("([[:alnum:]]* )?");
12643356f56SNico Weber Result.push_back(Tokens[I][J]);
12743356f56SNico Weber }
12843356f56SNico Weber }
12943356f56SNico Weber return Result;
13043356f56SNico Weber }
13143356f56SNico Weber
13243356f56SNico Weber llvm::Expected<std::unique_ptr<FuzzySymbolIndex>>
createFromYAML(StringRef FilePath)13343356f56SNico Weber FuzzySymbolIndex::createFromYAML(StringRef FilePath) {
13443356f56SNico Weber auto Buffer = llvm::MemoryBuffer::getFile(FilePath);
13543356f56SNico Weber if (!Buffer)
13643356f56SNico Weber return llvm::errorCodeToError(Buffer.getError());
1371c705d9cSJonas Devlieghere return std::make_unique<MemSymbolIndex>(
13843356f56SNico Weber find_all_symbols::ReadSymbolInfosFromYAML(Buffer.get()->getBuffer()));
13943356f56SNico Weber }
14043356f56SNico Weber
14143356f56SNico Weber } // namespace include_fixer
14243356f56SNico Weber } // namespace clang
143