1 //===--- Token.h - Symbol Search primitive ----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Token objects represent a characteristic of a symbol, which can be used to 11 /// perform efficient search. Tokens are keys for inverted index which are 12 /// mapped to the corresponding posting lists. 13 /// 14 /// The symbol std::cout might have the tokens: 15 /// * Scope "std::" 16 /// * Trigram "cou" 17 /// * Trigram "out" 18 /// * Type "std::ostream" 19 /// 20 //===----------------------------------------------------------------------===// 21 22 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H 23 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H 24 25 #include "llvm/ADT/Hashing.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include <string> 28 #include <vector> 29 30 namespace clang { 31 namespace clangd { 32 namespace dex { 33 34 /// A Token represents an attribute of a symbol, such as a particular trigram 35 /// present in the name (used for fuzzy search). 36 /// 37 /// Tokens can be used to perform more sophisticated search queries by 38 /// constructing complex iterator trees. 39 class Token { 40 public: 41 /// Kind specifies Token type which defines semantics for the internal 42 /// representation. Each Kind has different representation stored in Data 43 /// field. 44 // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw 45 // strings. For example, PathURI store URIs of each directory and its parents, 46 // which induces a lot of overhead because these paths tend to be long and 47 // each parent directory is a prefix. 48 enum class Kind { 49 /// Represents trigram used for fuzzy search of unqualified symbol names. 50 /// 51 /// Data contains 3 bytes with trigram contents. 52 Trigram, 53 /// Scope primitives, e.g. "symbol belongs to namespace foo::bar". 54 /// 55 /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global 56 /// scope). 57 Scope, 58 /// Path Proximity URI to symbol declaration. 59 /// 60 /// Data stores path URI of symbol declaration file or its parent. 61 /// 62 /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h" 63 /// and some amount of its parents. 64 ProximityURI, 65 /// Type of symbol (see `Symbol::Type`). 66 Type, 67 /// Internal Token type for invalid/special tokens, e.g. empty tokens for 68 /// llvm::DenseMap. 69 Sentinel, 70 }; 71 Token(Kind TokenKind,llvm::StringRef Data)72 Token(Kind TokenKind, llvm::StringRef Data) 73 : Data(Data), TokenKind(TokenKind) {} 74 75 bool operator==(const Token &Other) const { 76 return TokenKind == Other.TokenKind && Data == Other.Data; 77 } 78 79 friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) { 80 switch (T.TokenKind) { 81 case Kind::Trigram: 82 OS << "T="; 83 break; 84 case Kind::Scope: 85 OS << "S="; 86 break; 87 case Kind::ProximityURI: 88 OS << "U="; 89 break; 90 case Kind::Type: 91 OS << "Ty="; 92 break; 93 case Kind::Sentinel: 94 OS << "?="; 95 break; 96 } 97 return OS << T.Data; 98 } 99 100 private: 101 /// Representation which is unique among Token with the same Kind. 102 std::string Data; 103 Kind TokenKind; 104 hash_value(const Token & Token)105 friend llvm::hash_code hash_value(const Token &Token) { 106 return llvm::hash_combine(static_cast<int>(Token.TokenKind), Token.Data); 107 } 108 }; 109 110 } // namespace dex 111 } // namespace clangd 112 } // namespace clang 113 114 namespace llvm { 115 116 // Support Tokens as DenseMap keys. 117 template <> struct DenseMapInfo<clang::clangd::dex::Token> { 118 static inline clang::clangd::dex::Token getEmptyKey() { 119 return {clang::clangd::dex::Token::Kind::Sentinel, "EmptyKey"}; 120 } 121 122 static inline clang::clangd::dex::Token getTombstoneKey() { 123 return {clang::clangd::dex::Token::Kind::Sentinel, "TombstoneKey"}; 124 } 125 126 static unsigned getHashValue(const clang::clangd::dex::Token &Tag) { 127 return hash_value(Tag); 128 } 129 130 static bool isEqual(const clang::clangd::dex::Token &LHS, 131 const clang::clangd::dex::Token &RHS) { 132 return LHS == RHS; 133 } 134 }; 135 136 } // namespace llvm 137 138 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H 139