xref: /llvm-project/clang-tools-extra/clangd/index/dex/Dex.h (revision 26760c7b907c1012c44d15959319bfa06848e5cd)
1 //===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This defines Dex - a symbol index implementation based on query iterators
11 /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
12 /// While consuming more memory and having longer build stage due to
13 /// preprocessing, Dex will have substantially lower latency. It will also allow
14 /// efficient symbol searching which is crucial for operations like code
15 /// completion, and can be very important for a number of different code
16 /// transformations which will be eventually supported by Clangd.
17 ///
18 //===----------------------------------------------------------------------===//
19 
20 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
21 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
22 
23 #include "index/dex/Iterator.h"
24 #include "index/Index.h"
25 #include "index/Relation.h"
26 #include "index/dex/PostingList.h"
27 #include "index/dex/Token.h"
28 #include "llvm/ADT/StringSet.h"
29 
30 namespace clang {
31 namespace clangd {
32 namespace dex {
33 
34 /// In-memory Dex trigram-based index implementation.
35 class Dex : public SymbolIndex {
36 public:
37   // All data must outlive this index.
38   template <typename SymbolRange, typename RefsRange, typename RelationsRange>
39   Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
40       bool SupportContainedRefs)
41       : Corpus(0) {
42     for (auto &&Sym : Symbols)
43       this->Symbols.push_back(&Sym);
44     for (auto &&Ref : Refs)
45       this->Refs.try_emplace(Ref.first, Ref.second);
46     for (auto &&Rel : Relations)
47       this->Relations[std::make_pair(Rel.Subject,
48                                      static_cast<uint8_t>(Rel.Predicate))]
49           .push_back(Rel.Object);
50     buildIndex(SupportContainedRefs);
51   }
52   // Symbols and Refs are owned by BackingData, Index takes ownership.
53   template <typename SymbolRange, typename RefsRange, typename RelationsRange,
54             typename Payload>
55   Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
56       Payload &&BackingData, size_t BackingDataSize, bool SupportContainedRefs)
57       : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
58             std::forward<RelationsRange>(Relations), SupportContainedRefs) {
59     KeepAlive = std::shared_ptr<void>(
60         std::make_shared<Payload>(std::move(BackingData)), nullptr);
61     this->BackingDataSize = BackingDataSize;
62   }
63 
64   template <typename SymbolRange, typename RefsRange, typename RelationsRange,
65             typename FileRange, typename Payload>
66   Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
67       FileRange &&Files, IndexContents IdxContents, Payload &&BackingData,
68       size_t BackingDataSize, bool SupportContainedRefs)
69       : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
70             std::forward<RelationsRange>(Relations),
71             std::forward<Payload>(BackingData), BackingDataSize,
72             SupportContainedRefs) {
73     this->Files = std::forward<FileRange>(Files);
74     this->IdxContents = IdxContents;
75   }
76 
77   /// Builds an index from slabs. The index takes ownership of the slab.
78   static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab,
79                                             bool SupportContainedRefs);
80 
81   bool
82   fuzzyFind(const FuzzyFindRequest &Req,
83             llvm::function_ref<void(const Symbol &)> Callback) const override;
84 
85   void lookup(const LookupRequest &Req,
86               llvm::function_ref<void(const Symbol &)> Callback) const override;
87 
88   bool refs(const RefsRequest &Req,
89             llvm::function_ref<void(const Ref &)> Callback) const override;
90 
91   bool containedRefs(const ContainedRefsRequest &Req,
92                      llvm::function_ref<void(const ContainedRefsResult &)>
93                          Callback) const override;
94 
95   void relations(const RelationsRequest &Req,
96                  llvm::function_ref<void(const SymbolID &, const Symbol &)>
97                      Callback) const override;
98 
99   llvm::unique_function<IndexContents(llvm::StringRef) const>
100   indexedFiles() const override;
101 
102   size_t estimateMemoryUsage() const override;
103 
104 private:
105   class RevRef {
106     const Ref *Reference;
107     SymbolID Target;
108 
109   public:
110     RevRef(const Ref &Reference, SymbolID Target)
111         : Reference(&Reference), Target(Target) {}
112     const Ref &ref() const { return *Reference; }
113     ContainedRefsResult containedRefsResult() const {
114       return {ref().Location, ref().Kind, Target};
115     }
116   };
117 
118   void buildIndex(bool EnableOutgoingCalls);
119   llvm::iterator_range<std::vector<RevRef>::const_iterator>
120   lookupRevRefs(const SymbolID &Container) const;
121   std::unique_ptr<Iterator> iterator(const Token &Tok) const;
122   std::unique_ptr<Iterator>
123   createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const;
124   std::unique_ptr<Iterator>
125   createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const;
126 
127   /// Stores symbols sorted in the descending order of symbol quality..
128   std::vector<const Symbol *> Symbols;
129   /// SymbolQuality[I] is the quality of Symbols[I].
130   std::vector<float> SymbolQuality;
131   llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
132   /// Inverted index is a mapping from the search token to the posting list,
133   /// which contains all items which can be characterized by such search token.
134   /// For example, if the search token is scope "std::", the corresponding
135   /// posting list would contain all indices of symbols defined in namespace
136   /// std. Inverted index is used to retrieve posting lists which are processed
137   /// during the fuzzyFind process.
138   llvm::DenseMap<Token, PostingList> InvertedIndex;
139   dex::Corpus Corpus;
140   llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs;
141   std::vector<RevRef> RevRefs; // sorted by container ID
142   static_assert(sizeof(RelationKind) == sizeof(uint8_t),
143                 "RelationKind should be of same size as a uint8_t");
144   llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations;
145   std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
146   // Set of files which were used during this index build.
147   llvm::StringSet<> Files;
148   // Contents of the index (symbols, references, etc.)
149   // This is only populated if `Files` is, which applies to some but not all
150   // consumers of this class.
151   IndexContents IdxContents = IndexContents::None;
152   // Size of memory retained by KeepAlive.
153   size_t BackingDataSize = 0;
154 };
155 
156 /// Returns Search Token for a number of parent directories of given Path.
157 /// Should be used within the index build process.
158 ///
159 /// This function is exposed for testing only.
160 llvm::SmallVector<llvm::StringRef, 5> generateProximityURIs(llvm::StringRef);
161 
162 } // namespace dex
163 } // namespace clangd
164 } // namespace clang
165 
166 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
167