xref: /llvm-project/clang-tools-extra/clangd/index/Symbol.h (revision 0659fd996784cbc2b11379380a03633fa80f7816)
1 //===--- Symbol.h ------------------------------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H
10 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H
11 
12 #include "index/SymbolID.h"
13 #include "index/SymbolLocation.h"
14 #include "index/SymbolOrigin.h"
15 #include "clang/Index/IndexSymbol.h"
16 #include "llvm/ADT/BitmaskEnum.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/Support/StringSaver.h"
19 
20 namespace clang {
21 namespace clangd {
22 
23 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
24 
25 /// The class presents a C++ symbol, e.g. class, function.
26 ///
27 /// WARNING: Symbols do not own much of their underlying data - typically
28 /// strings are owned by a SymbolSlab. They should be treated as non-owning
29 /// references. Copies are shallow.
30 ///
31 /// When adding new unowned data fields to Symbol, remember to update:
32 ///   - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage.
33 ///   - mergeSymbol in Merge.cpp, to properly combine two Symbols.
34 ///
35 /// A fully documented symbol can be split as:
36 /// size_type std::map<k, t>::count(const K& key) const
37 /// | Return  |     Scope     |Name|    Signature     |
38 /// We split up these components to allow display flexibility later.
39 struct Symbol {
40   /// The ID of the symbol.
41   SymbolID ID;
42   /// The symbol information, like symbol kind.
43   index::SymbolInfo SymInfo = index::SymbolInfo();
44   /// The unqualified name of the symbol, e.g. "bar" (for ns::bar).
45   llvm::StringRef Name;
46   /// The containing namespace. e.g. "" (global), "ns::" (top-level namespace).
47   llvm::StringRef Scope;
48   /// The location of the symbol's definition, if one was found.
49   /// This just covers the symbol name (e.g. without class/function body).
50   SymbolLocation Definition;
51   /// The location of the preferred declaration of the symbol.
52   /// This just covers the symbol name.
53   /// This may be the same as Definition.
54   ///
55   /// A C++ symbol may have multiple declarations, and we pick one to prefer.
56   ///   * For classes, the canonical declaration should be the definition.
57   ///   * For non-inline functions, the canonical declaration typically appears
58   ///     in the ".h" file corresponding to the definition.
59   SymbolLocation CanonicalDeclaration;
60   /// The number of translation units that reference this symbol from their main
61   /// file. This number is only meaningful if aggregated in an index.
62   unsigned References = 0;
63   /// Where this symbol came from. Usually an index provides a constant value.
64   SymbolOrigin Origin = SymbolOrigin::Unknown;
65   /// A brief description of the symbol that can be appended in the completion
66   /// candidate list. For example, "(X x, Y y) const" is a function signature.
67   /// Only set when the symbol is indexed for completion.
68   llvm::StringRef Signature;
69   /// Argument list in human-readable format, will be displayed to help
70   /// disambiguate between different specializations of a template. Empty for
71   /// non-specializations. Example: "<int, bool, 3>"
72   llvm::StringRef TemplateSpecializationArgs;
73   /// What to insert when completing this symbol, after the symbol name.
74   /// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function).
75   /// (When snippets are disabled, the symbol name alone is used).
76   /// Only set when the symbol is indexed for completion.
77   llvm::StringRef CompletionSnippetSuffix;
78   /// Documentation including comment for the symbol declaration.
79   llvm::StringRef Documentation;
80   /// Type when this symbol is used in an expression. (Short display form).
81   /// e.g. return type of a function, or type of a variable.
82   /// Only set when the symbol is indexed for completion.
83   llvm::StringRef ReturnType;
84 
85   /// Raw representation of the OpaqueType of the symbol, used for scoring
86   /// purposes.
87   /// Only set when the symbol is indexed for completion.
88   llvm::StringRef Type;
89 
90   enum IncludeDirective : uint8_t {
91     Invalid = 0,
92     /// `#include "header.h"`
93     Include = 1,
94     /// `#import "header.h"`
95     Import = 2,
96 
97     LLVM_MARK_AS_BITMASK_ENUM(Import)
98   };
99 
100   struct IncludeHeaderWithReferences {
101     IncludeHeaderWithReferences() = default;
102 
103     IncludeHeaderWithReferences(llvm::StringRef IncludeHeader,
104                                 uint32_t References,
105                                 IncludeDirective SupportedDirectives)
106         : IncludeHeader(IncludeHeader), References(References),
107           SupportedDirectives(SupportedDirectives) {}
108 
109     /// This can be either a URI of the header to be #include'd
110     /// for this symbol, or a literal header quoted with <> or "" that is
111     /// suitable to be included directly. When it is a URI, the exact #include
112     /// path needs to be calculated according to the URI scheme.
113     ///
114     /// Note that the include header is a canonical include for the symbol and
115     /// can be different from FileURI in the CanonicalDeclaration.
116     llvm::StringRef IncludeHeader = "";
117     /// The number of translation units that reference this symbol and include
118     /// this header. This number is only meaningful if aggregated in an index.
119     uint32_t References : 30;
120     /// Bitfield of supported directives (IncludeDirective) that can be used
121     /// when including this header.
122     uint32_t SupportedDirectives : 2;
123 
124     IncludeDirective supportedDirectives() const {
125       return static_cast<IncludeDirective>(SupportedDirectives);
126     }
127   };
128   /// One Symbol can potentially be included via different headers.
129   ///   - If we haven't seen a definition, this covers all declarations.
130   ///   - If we have seen a definition, this covers declarations visible from
131   ///   any definition.
132   /// Only set when the symbol is indexed for completion.
133   llvm::SmallVector<IncludeHeaderWithReferences, 1> IncludeHeaders;
134 
135   enum SymbolFlag : uint8_t {
136     None = 0,
137     /// Whether or not this symbol is meant to be used for the code completion.
138     /// See also isIndexedForCodeCompletion().
139     /// Note that we don't store completion information (signature, snippet,
140     /// type, includes) if the symbol is not indexed for code completion.
141     IndexedForCodeCompletion = 1 << 0,
142     /// Indicates if the symbol is deprecated.
143     Deprecated = 1 << 1,
144     /// Symbol is an implementation detail.
145     ImplementationDetail = 1 << 2,
146     /// Symbol is visible to other files (not e.g. a static helper function).
147     VisibleOutsideFile = 1 << 3,
148     /// Symbol has an attached documentation comment.
149     HasDocComment = 1 << 4
150   };
151   SymbolFlag Flags = SymbolFlag::None;
152 
153   /// FIXME: also add deprecation message and fixit?
154 };
155 
156 inline Symbol::SymbolFlag operator|(Symbol::SymbolFlag A,
157                                     Symbol::SymbolFlag B) {
158   return static_cast<Symbol::SymbolFlag>(static_cast<uint8_t>(A) |
159                                          static_cast<uint8_t>(B));
160 }
161 inline Symbol::SymbolFlag &operator|=(Symbol::SymbolFlag &A,
162                                       Symbol::SymbolFlag B) {
163   return A = A | B;
164 }
165 
166 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S);
167 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag);
168 
169 /// Invokes Callback with each StringRef& contained in the Symbol.
170 /// Useful for deduplicating backing strings.
171 template <typename Callback> void visitStrings(Symbol &S, const Callback &CB) {
172   CB(S.Name);
173   CB(S.Scope);
174   CB(S.TemplateSpecializationArgs);
175   CB(S.Signature);
176   CB(S.CompletionSnippetSuffix);
177   CB(S.Documentation);
178   CB(S.ReturnType);
179   CB(S.Type);
180   auto RawCharPointerCB = [&CB](const char *&P) {
181     llvm::StringRef S(P);
182     CB(S);
183     assert(!S.data()[S.size()] && "Visited StringRef must be null-terminated");
184     P = S.data();
185   };
186   RawCharPointerCB(S.CanonicalDeclaration.FileURI);
187   RawCharPointerCB(S.Definition.FileURI);
188 
189   for (auto &Include : S.IncludeHeaders)
190     CB(Include.IncludeHeader);
191 }
192 
193 /// Computes query-independent quality score for a Symbol.
194 /// This currently falls in the range [1, ln(#indexed documents)].
195 /// FIXME: this should probably be split into symbol -> signals
196 ///        and signals -> score, so it can be reused for Sema completions.
197 float quality(const Symbol &S);
198 
199 /// An immutable symbol container that stores a set of symbols.
200 /// The container will maintain the lifetime of the symbols.
201 class SymbolSlab {
202 public:
203   using const_iterator = std::vector<Symbol>::const_iterator;
204   using iterator = const_iterator;
205   using value_type = Symbol;
206 
207   SymbolSlab() = default;
208 
209   const_iterator begin() const { return Symbols.begin(); }
210   const_iterator end() const { return Symbols.end(); }
211   const_iterator find(const SymbolID &SymID) const;
212 
213   using size_type = size_t;
214   size_type size() const { return Symbols.size(); }
215   bool empty() const { return Symbols.empty(); }
216   // Estimates the total memory usage.
217   size_t bytes() const {
218     return sizeof(*this) + Arena.getTotalMemory() +
219            Symbols.capacity() * sizeof(Symbol);
220   }
221 
222   /// SymbolSlab::Builder is a mutable container that can 'freeze' to
223   /// SymbolSlab. The frozen SymbolSlab will use less memory.
224   class Builder {
225   public:
226     Builder() : UniqueStrings(Arena) {}
227 
228     /// Adds a symbol, overwriting any existing one with the same ID.
229     /// This is a deep copy: underlying strings will be owned by the slab.
230     void insert(const Symbol &S);
231 
232     /// Removes the symbol with an ID, if it exists.
233     void erase(const SymbolID &ID) { Symbols.erase(ID); }
234 
235     /// Returns the symbol with an ID, if it exists. Valid until insert/remove.
236     const Symbol *find(const SymbolID &ID) {
237       auto I = Symbols.find(ID);
238       return I == Symbols.end() ? nullptr : &I->second;
239     }
240 
241     /// Consumes the builder to finalize the slab.
242     SymbolSlab build() &&;
243 
244   private:
245     llvm::BumpPtrAllocator Arena;
246     /// Intern table for strings. Contents are on the arena.
247     llvm::UniqueStringSaver UniqueStrings;
248     /// Values are indices into Symbols vector.
249     llvm::DenseMap<SymbolID, Symbol> Symbols;
250   };
251 
252 private:
253   SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols)
254       : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {}
255 
256   llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not.
257   std::vector<Symbol> Symbols;  // Sorted by SymbolID to allow lookup.
258 };
259 
260 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolSlab &Slab);
261 
262 } // namespace clangd
263 } // namespace clang
264 
265 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H
266