1 //===--- Symbol.h ------------------------------------------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H 10 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H 11 12 #include "index/SymbolID.h" 13 #include "index/SymbolLocation.h" 14 #include "index/SymbolOrigin.h" 15 #include "clang/Index/IndexSymbol.h" 16 #include "llvm/ADT/BitmaskEnum.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/Support/StringSaver.h" 19 20 namespace clang { 21 namespace clangd { 22 23 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 24 25 /// The class presents a C++ symbol, e.g. class, function. 26 /// 27 /// WARNING: Symbols do not own much of their underlying data - typically 28 /// strings are owned by a SymbolSlab. They should be treated as non-owning 29 /// references. Copies are shallow. 30 /// 31 /// When adding new unowned data fields to Symbol, remember to update: 32 /// - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage. 33 /// - mergeSymbol in Merge.cpp, to properly combine two Symbols. 34 /// 35 /// A fully documented symbol can be split as: 36 /// size_type std::map<k, t>::count(const K& key) const 37 /// | Return | Scope |Name| Signature | 38 /// We split up these components to allow display flexibility later. 39 struct Symbol { 40 /// The ID of the symbol. 41 SymbolID ID; 42 /// The symbol information, like symbol kind. 43 index::SymbolInfo SymInfo = index::SymbolInfo(); 44 /// The unqualified name of the symbol, e.g. "bar" (for ns::bar). 45 llvm::StringRef Name; 46 /// The containing namespace. e.g. "" (global), "ns::" (top-level namespace). 47 llvm::StringRef Scope; 48 /// The location of the symbol's definition, if one was found. 49 /// This just covers the symbol name (e.g. without class/function body). 50 SymbolLocation Definition; 51 /// The location of the preferred declaration of the symbol. 52 /// This just covers the symbol name. 53 /// This may be the same as Definition. 54 /// 55 /// A C++ symbol may have multiple declarations, and we pick one to prefer. 56 /// * For classes, the canonical declaration should be the definition. 57 /// * For non-inline functions, the canonical declaration typically appears 58 /// in the ".h" file corresponding to the definition. 59 SymbolLocation CanonicalDeclaration; 60 /// The number of translation units that reference this symbol from their main 61 /// file. This number is only meaningful if aggregated in an index. 62 unsigned References = 0; 63 /// Where this symbol came from. Usually an index provides a constant value. 64 SymbolOrigin Origin = SymbolOrigin::Unknown; 65 /// A brief description of the symbol that can be appended in the completion 66 /// candidate list. For example, "(X x, Y y) const" is a function signature. 67 /// Only set when the symbol is indexed for completion. 68 llvm::StringRef Signature; 69 /// Argument list in human-readable format, will be displayed to help 70 /// disambiguate between different specializations of a template. Empty for 71 /// non-specializations. Example: "<int, bool, 3>" 72 llvm::StringRef TemplateSpecializationArgs; 73 /// What to insert when completing this symbol, after the symbol name. 74 /// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function). 75 /// (When snippets are disabled, the symbol name alone is used). 76 /// Only set when the symbol is indexed for completion. 77 llvm::StringRef CompletionSnippetSuffix; 78 /// Documentation including comment for the symbol declaration. 79 llvm::StringRef Documentation; 80 /// Type when this symbol is used in an expression. (Short display form). 81 /// e.g. return type of a function, or type of a variable. 82 /// Only set when the symbol is indexed for completion. 83 llvm::StringRef ReturnType; 84 85 /// Raw representation of the OpaqueType of the symbol, used for scoring 86 /// purposes. 87 /// Only set when the symbol is indexed for completion. 88 llvm::StringRef Type; 89 90 enum IncludeDirective : uint8_t { 91 Invalid = 0, 92 /// `#include "header.h"` 93 Include = 1, 94 /// `#import "header.h"` 95 Import = 2, 96 97 LLVM_MARK_AS_BITMASK_ENUM(Import) 98 }; 99 100 struct IncludeHeaderWithReferences { 101 IncludeHeaderWithReferences() = default; 102 103 IncludeHeaderWithReferences(llvm::StringRef IncludeHeader, 104 uint32_t References, 105 IncludeDirective SupportedDirectives) 106 : IncludeHeader(IncludeHeader), References(References), 107 SupportedDirectives(SupportedDirectives) {} 108 109 /// This can be either a URI of the header to be #include'd 110 /// for this symbol, or a literal header quoted with <> or "" that is 111 /// suitable to be included directly. When it is a URI, the exact #include 112 /// path needs to be calculated according to the URI scheme. 113 /// 114 /// Note that the include header is a canonical include for the symbol and 115 /// can be different from FileURI in the CanonicalDeclaration. 116 llvm::StringRef IncludeHeader = ""; 117 /// The number of translation units that reference this symbol and include 118 /// this header. This number is only meaningful if aggregated in an index. 119 uint32_t References : 30; 120 /// Bitfield of supported directives (IncludeDirective) that can be used 121 /// when including this header. 122 uint32_t SupportedDirectives : 2; 123 124 IncludeDirective supportedDirectives() const { 125 return static_cast<IncludeDirective>(SupportedDirectives); 126 } 127 }; 128 /// One Symbol can potentially be included via different headers. 129 /// - If we haven't seen a definition, this covers all declarations. 130 /// - If we have seen a definition, this covers declarations visible from 131 /// any definition. 132 /// Only set when the symbol is indexed for completion. 133 llvm::SmallVector<IncludeHeaderWithReferences, 1> IncludeHeaders; 134 135 enum SymbolFlag : uint8_t { 136 None = 0, 137 /// Whether or not this symbol is meant to be used for the code completion. 138 /// See also isIndexedForCodeCompletion(). 139 /// Note that we don't store completion information (signature, snippet, 140 /// type, includes) if the symbol is not indexed for code completion. 141 IndexedForCodeCompletion = 1 << 0, 142 /// Indicates if the symbol is deprecated. 143 Deprecated = 1 << 1, 144 /// Symbol is an implementation detail. 145 ImplementationDetail = 1 << 2, 146 /// Symbol is visible to other files (not e.g. a static helper function). 147 VisibleOutsideFile = 1 << 3, 148 /// Symbol has an attached documentation comment. 149 HasDocComment = 1 << 4 150 }; 151 SymbolFlag Flags = SymbolFlag::None; 152 153 /// FIXME: also add deprecation message and fixit? 154 }; 155 156 inline Symbol::SymbolFlag operator|(Symbol::SymbolFlag A, 157 Symbol::SymbolFlag B) { 158 return static_cast<Symbol::SymbolFlag>(static_cast<uint8_t>(A) | 159 static_cast<uint8_t>(B)); 160 } 161 inline Symbol::SymbolFlag &operator|=(Symbol::SymbolFlag &A, 162 Symbol::SymbolFlag B) { 163 return A = A | B; 164 } 165 166 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S); 167 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag); 168 169 /// Invokes Callback with each StringRef& contained in the Symbol. 170 /// Useful for deduplicating backing strings. 171 template <typename Callback> void visitStrings(Symbol &S, const Callback &CB) { 172 CB(S.Name); 173 CB(S.Scope); 174 CB(S.TemplateSpecializationArgs); 175 CB(S.Signature); 176 CB(S.CompletionSnippetSuffix); 177 CB(S.Documentation); 178 CB(S.ReturnType); 179 CB(S.Type); 180 auto RawCharPointerCB = [&CB](const char *&P) { 181 llvm::StringRef S(P); 182 CB(S); 183 assert(!S.data()[S.size()] && "Visited StringRef must be null-terminated"); 184 P = S.data(); 185 }; 186 RawCharPointerCB(S.CanonicalDeclaration.FileURI); 187 RawCharPointerCB(S.Definition.FileURI); 188 189 for (auto &Include : S.IncludeHeaders) 190 CB(Include.IncludeHeader); 191 } 192 193 /// Computes query-independent quality score for a Symbol. 194 /// This currently falls in the range [1, ln(#indexed documents)]. 195 /// FIXME: this should probably be split into symbol -> signals 196 /// and signals -> score, so it can be reused for Sema completions. 197 float quality(const Symbol &S); 198 199 /// An immutable symbol container that stores a set of symbols. 200 /// The container will maintain the lifetime of the symbols. 201 class SymbolSlab { 202 public: 203 using const_iterator = std::vector<Symbol>::const_iterator; 204 using iterator = const_iterator; 205 using value_type = Symbol; 206 207 SymbolSlab() = default; 208 209 const_iterator begin() const { return Symbols.begin(); } 210 const_iterator end() const { return Symbols.end(); } 211 const_iterator find(const SymbolID &SymID) const; 212 213 using size_type = size_t; 214 size_type size() const { return Symbols.size(); } 215 bool empty() const { return Symbols.empty(); } 216 // Estimates the total memory usage. 217 size_t bytes() const { 218 return sizeof(*this) + Arena.getTotalMemory() + 219 Symbols.capacity() * sizeof(Symbol); 220 } 221 222 /// SymbolSlab::Builder is a mutable container that can 'freeze' to 223 /// SymbolSlab. The frozen SymbolSlab will use less memory. 224 class Builder { 225 public: 226 Builder() : UniqueStrings(Arena) {} 227 228 /// Adds a symbol, overwriting any existing one with the same ID. 229 /// This is a deep copy: underlying strings will be owned by the slab. 230 void insert(const Symbol &S); 231 232 /// Removes the symbol with an ID, if it exists. 233 void erase(const SymbolID &ID) { Symbols.erase(ID); } 234 235 /// Returns the symbol with an ID, if it exists. Valid until insert/remove. 236 const Symbol *find(const SymbolID &ID) { 237 auto I = Symbols.find(ID); 238 return I == Symbols.end() ? nullptr : &I->second; 239 } 240 241 /// Consumes the builder to finalize the slab. 242 SymbolSlab build() &&; 243 244 private: 245 llvm::BumpPtrAllocator Arena; 246 /// Intern table for strings. Contents are on the arena. 247 llvm::UniqueStringSaver UniqueStrings; 248 /// Values are indices into Symbols vector. 249 llvm::DenseMap<SymbolID, Symbol> Symbols; 250 }; 251 252 private: 253 SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols) 254 : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {} 255 256 llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not. 257 std::vector<Symbol> Symbols; // Sorted by SymbolID to allow lookup. 258 }; 259 260 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolSlab &Slab); 261 262 } // namespace clangd 263 } // namespace clang 264 265 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H 266