xref: /llvm-project/clang-tools-extra/clangd/index/dex/Token.h (revision e280b97fd7e1b555856367da1fac44b2530fd296)
1 //===--- Token.h - Symbol Search primitive ----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Token objects represent a characteristic of a symbol, which can be used to
11 /// perform efficient search. Tokens are keys for inverted index which are
12 /// mapped to the corresponding posting lists.
13 ///
14 /// The symbol std::cout might have the tokens:
15 /// * Scope "std::"
16 /// * Trigram "cou"
17 /// * Trigram "out"
18 /// * Type "std::ostream"
19 ///
20 //===----------------------------------------------------------------------===//
21 
22 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H
23 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H
24 
25 #include "llvm/ADT/Hashing.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <string>
28 #include <vector>
29 
30 namespace clang {
31 namespace clangd {
32 namespace dex {
33 
34 /// A Token represents an attribute of a symbol, such as a particular trigram
35 /// present in the name (used for fuzzy search).
36 ///
37 /// Tokens can be used to perform more sophisticated search queries by
38 /// constructing complex iterator trees.
39 class Token {
40 public:
41   /// Kind specifies Token type which defines semantics for the internal
42   /// representation. Each Kind has different representation stored in Data
43   /// field.
44   // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw
45   // strings. For example, PathURI store URIs of each directory and its parents,
46   // which induces a lot of overhead because these paths tend to be long and
47   // each parent directory is a prefix.
48   enum class Kind {
49     /// Represents trigram used for fuzzy search of unqualified symbol names.
50     ///
51     /// Data contains 3 bytes with trigram contents.
52     Trigram,
53     /// Scope primitives, e.g. "symbol belongs to namespace foo::bar".
54     ///
55     /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global
56     /// scope).
57     Scope,
58     /// Path Proximity URI to symbol declaration.
59     ///
60     /// Data stores path URI of symbol declaration file or its parent.
61     ///
62     /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h"
63     /// and some amount of its parents.
64     ProximityURI,
65     /// Type of symbol (see `Symbol::Type`).
66     Type,
67     /// Internal Token type for invalid/special tokens, e.g. empty tokens for
68     /// llvm::DenseMap.
69     Sentinel,
70   };
71 
Token(Kind TokenKind,llvm::StringRef Data)72   Token(Kind TokenKind, llvm::StringRef Data)
73       : Data(Data), TokenKind(TokenKind) {}
74 
75   bool operator==(const Token &Other) const {
76     return TokenKind == Other.TokenKind && Data == Other.Data;
77   }
78 
79   friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) {
80     switch (T.TokenKind) {
81     case Kind::Trigram:
82       OS << "T=";
83       break;
84     case Kind::Scope:
85       OS << "S=";
86       break;
87     case Kind::ProximityURI:
88       OS << "U=";
89       break;
90     case Kind::Type:
91       OS << "Ty=";
92       break;
93     case Kind::Sentinel:
94       OS << "?=";
95       break;
96     }
97     return OS << T.Data;
98   }
99 
100 private:
101   /// Representation which is unique among Token with the same Kind.
102   std::string Data;
103   Kind TokenKind;
104 
hash_value(const Token & Token)105   friend llvm::hash_code hash_value(const Token &Token) {
106     return llvm::hash_combine(static_cast<int>(Token.TokenKind), Token.Data);
107   }
108 };
109 
110 } // namespace dex
111 } // namespace clangd
112 } // namespace clang
113 
114 namespace llvm {
115 
116 // Support Tokens as DenseMap keys.
117 template <> struct DenseMapInfo<clang::clangd::dex::Token> {
118   static inline clang::clangd::dex::Token getEmptyKey() {
119     return {clang::clangd::dex::Token::Kind::Sentinel, "EmptyKey"};
120   }
121 
122   static inline clang::clangd::dex::Token getTombstoneKey() {
123     return {clang::clangd::dex::Token::Kind::Sentinel, "TombstoneKey"};
124   }
125 
126   static unsigned getHashValue(const clang::clangd::dex::Token &Tag) {
127     return hash_value(Tag);
128   }
129 
130   static bool isEqual(const clang::clangd::dex::Token &LHS,
131                       const clang::clangd::dex::Token &RHS) {
132     return LHS == RHS;
133   }
134 };
135 
136 } // namespace llvm
137 
138 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_TOKEN_H
139