1 //===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This defines Dex - a symbol index implementation based on query iterators 11 /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc. 12 /// While consuming more memory and having longer build stage due to 13 /// preprocessing, Dex will have substantially lower latency. It will also allow 14 /// efficient symbol searching which is crucial for operations like code 15 /// completion, and can be very important for a number of different code 16 /// transformations which will be eventually supported by Clangd. 17 /// 18 //===----------------------------------------------------------------------===// 19 20 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H 21 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H 22 23 #include "index/dex/Iterator.h" 24 #include "index/Index.h" 25 #include "index/Relation.h" 26 #include "index/dex/PostingList.h" 27 #include "index/dex/Token.h" 28 #include "llvm/ADT/StringSet.h" 29 30 namespace clang { 31 namespace clangd { 32 namespace dex { 33 34 /// In-memory Dex trigram-based index implementation. 35 class Dex : public SymbolIndex { 36 public: 37 // All data must outlive this index. 38 template <typename SymbolRange, typename RefsRange, typename RelationsRange> 39 Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, 40 bool SupportContainedRefs) 41 : Corpus(0) { 42 for (auto &&Sym : Symbols) 43 this->Symbols.push_back(&Sym); 44 for (auto &&Ref : Refs) 45 this->Refs.try_emplace(Ref.first, Ref.second); 46 for (auto &&Rel : Relations) 47 this->Relations[std::make_pair(Rel.Subject, 48 static_cast<uint8_t>(Rel.Predicate))] 49 .push_back(Rel.Object); 50 buildIndex(SupportContainedRefs); 51 } 52 // Symbols and Refs are owned by BackingData, Index takes ownership. 53 template <typename SymbolRange, typename RefsRange, typename RelationsRange, 54 typename Payload> 55 Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, 56 Payload &&BackingData, size_t BackingDataSize, bool SupportContainedRefs) 57 : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs), 58 std::forward<RelationsRange>(Relations), SupportContainedRefs) { 59 KeepAlive = std::shared_ptr<void>( 60 std::make_shared<Payload>(std::move(BackingData)), nullptr); 61 this->BackingDataSize = BackingDataSize; 62 } 63 64 template <typename SymbolRange, typename RefsRange, typename RelationsRange, 65 typename FileRange, typename Payload> 66 Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations, 67 FileRange &&Files, IndexContents IdxContents, Payload &&BackingData, 68 size_t BackingDataSize, bool SupportContainedRefs) 69 : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs), 70 std::forward<RelationsRange>(Relations), 71 std::forward<Payload>(BackingData), BackingDataSize, 72 SupportContainedRefs) { 73 this->Files = std::forward<FileRange>(Files); 74 this->IdxContents = IdxContents; 75 } 76 77 /// Builds an index from slabs. The index takes ownership of the slab. 78 static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab, 79 bool SupportContainedRefs); 80 81 bool 82 fuzzyFind(const FuzzyFindRequest &Req, 83 llvm::function_ref<void(const Symbol &)> Callback) const override; 84 85 void lookup(const LookupRequest &Req, 86 llvm::function_ref<void(const Symbol &)> Callback) const override; 87 88 bool refs(const RefsRequest &Req, 89 llvm::function_ref<void(const Ref &)> Callback) const override; 90 91 bool containedRefs(const ContainedRefsRequest &Req, 92 llvm::function_ref<void(const ContainedRefsResult &)> 93 Callback) const override; 94 95 void relations(const RelationsRequest &Req, 96 llvm::function_ref<void(const SymbolID &, const Symbol &)> 97 Callback) const override; 98 99 llvm::unique_function<IndexContents(llvm::StringRef) const> 100 indexedFiles() const override; 101 102 size_t estimateMemoryUsage() const override; 103 104 private: 105 class RevRef { 106 const Ref *Reference; 107 SymbolID Target; 108 109 public: 110 RevRef(const Ref &Reference, SymbolID Target) 111 : Reference(&Reference), Target(Target) {} 112 const Ref &ref() const { return *Reference; } 113 ContainedRefsResult containedRefsResult() const { 114 return {ref().Location, ref().Kind, Target}; 115 } 116 }; 117 118 void buildIndex(bool EnableOutgoingCalls); 119 llvm::iterator_range<std::vector<RevRef>::const_iterator> 120 lookupRevRefs(const SymbolID &Container) const; 121 std::unique_ptr<Iterator> iterator(const Token &Tok) const; 122 std::unique_ptr<Iterator> 123 createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const; 124 std::unique_ptr<Iterator> 125 createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const; 126 127 /// Stores symbols sorted in the descending order of symbol quality.. 128 std::vector<const Symbol *> Symbols; 129 /// SymbolQuality[I] is the quality of Symbols[I]. 130 std::vector<float> SymbolQuality; 131 llvm::DenseMap<SymbolID, const Symbol *> LookupTable; 132 /// Inverted index is a mapping from the search token to the posting list, 133 /// which contains all items which can be characterized by such search token. 134 /// For example, if the search token is scope "std::", the corresponding 135 /// posting list would contain all indices of symbols defined in namespace 136 /// std. Inverted index is used to retrieve posting lists which are processed 137 /// during the fuzzyFind process. 138 llvm::DenseMap<Token, PostingList> InvertedIndex; 139 dex::Corpus Corpus; 140 llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs; 141 std::vector<RevRef> RevRefs; // sorted by container ID 142 static_assert(sizeof(RelationKind) == sizeof(uint8_t), 143 "RelationKind should be of same size as a uint8_t"); 144 llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations; 145 std::shared_ptr<void> KeepAlive; // poor man's move-only std::any 146 // Set of files which were used during this index build. 147 llvm::StringSet<> Files; 148 // Contents of the index (symbols, references, etc.) 149 // This is only populated if `Files` is, which applies to some but not all 150 // consumers of this class. 151 IndexContents IdxContents = IndexContents::None; 152 // Size of memory retained by KeepAlive. 153 size_t BackingDataSize = 0; 154 }; 155 156 /// Returns Search Token for a number of parent directories of given Path. 157 /// Should be used within the index build process. 158 /// 159 /// This function is exposed for testing only. 160 llvm::SmallVector<llvm::StringRef, 5> generateProximityURIs(llvm::StringRef); 161 162 } // namespace dex 163 } // namespace clangd 164 } // namespace clang 165 166 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H 167