xref: /llvm-project/clang-tools-extra/clangd/index/dex/Iterator.cpp (revision 4a5ff88fdbd7d8646de3ec6ecfbc5fd1036dcb50)
1 //===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Iterator.h"
11 #include "llvm/Support/Casting.h"
12 #include <algorithm>
13 #include <cassert>
14 #include <numeric>
15 
16 namespace clang {
17 namespace clangd {
18 namespace dex {
19 
20 namespace {
21 
22 /// Implements Iterator over the intersection of other iterators.
23 ///
24 /// AndIterator iterates through common items among all children. It becomes
25 /// exhausted as soon as any child becomes exhausted. After each mutation, the
26 /// iterator restores the invariant: all children must point to the same item.
27 class AndIterator : public Iterator {
28 public:
29   explicit AndIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
30       : Iterator(Kind::And), Children(std::move(AllChildren)) {
31     assert(!Children.empty() && "AND iterator should have at least one child.");
32     // Establish invariants.
33     for (const auto &Child : Children)
34       ReachedEnd |= Child->reachedEnd();
35     sync();
36     // When children are sorted by the estimateSize(), sync() calls are more
37     // effective. Each sync() starts with the first child and makes sure all
38     // children point to the same element. If any child is "above" the previous
39     // ones, the algorithm resets and and advances the children to the next
40     // highest element starting from the front. When child iterators in the
41     // beginning have smaller estimated size, the sync() will have less restarts
42     // and become more effective.
43     llvm::sort(Children, [](const std::unique_ptr<Iterator> &LHS,
44                             const std::unique_ptr<Iterator> &RHS) {
45       return LHS->estimateSize() < RHS->estimateSize();
46     });
47   }
48 
49   bool reachedEnd() const override { return ReachedEnd; }
50 
51   /// Advances all children to the next common item.
52   void advance() override {
53     assert(!reachedEnd() && "AND iterator can't advance() at the end.");
54     Children.front()->advance();
55     sync();
56   }
57 
58   /// Advances all children to the next common item with DocumentID >= ID.
59   void advanceTo(DocID ID) override {
60     assert(!reachedEnd() && "AND iterator can't advanceTo() at the end.");
61     Children.front()->advanceTo(ID);
62     sync();
63   }
64 
65   DocID peek() const override { return Children.front()->peek(); }
66 
67   float consume() override {
68     assert(!reachedEnd() && "AND iterator can't consume() at the end.");
69     float Boost = 1;
70     for (const auto &Child : Children)
71       Boost *= Child->consume();
72     return Boost;
73   }
74 
75   size_t estimateSize() const override {
76     return Children.front()->estimateSize();
77   }
78 
79 private:
80   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
81     OS << "(& ";
82     auto Separator = "";
83     for (const auto &Child : Children) {
84       OS << Separator << *Child;
85       Separator = " ";
86     }
87     OS << ')';
88     return OS;
89   }
90 
91   /// Restores class invariants: each child will point to the same element after
92   /// sync.
93   void sync() {
94     ReachedEnd |= Children.front()->reachedEnd();
95     if (ReachedEnd)
96       return;
97     auto SyncID = Children.front()->peek();
98     // Indicates whether any child needs to be advanced to new SyncID.
99     bool NeedsAdvance = false;
100     do {
101       NeedsAdvance = false;
102       for (auto &Child : Children) {
103         Child->advanceTo(SyncID);
104         ReachedEnd |= Child->reachedEnd();
105         // If any child reaches end And iterator can not match any other items.
106         // In this case, just terminate the process.
107         if (ReachedEnd)
108           return;
109         // If any child goes beyond given ID (i.e. ID is not the common item),
110         // all children should be advanced to the next common item.
111         if (Child->peek() > SyncID) {
112           SyncID = Child->peek();
113           NeedsAdvance = true;
114         }
115       }
116     } while (NeedsAdvance);
117   }
118 
119   /// AndIterator owns its children and ensures that all of them point to the
120   /// same element. As soon as one child gets exhausted, AndIterator can no
121   /// longer advance and has reached its end.
122   std::vector<std::unique_ptr<Iterator>> Children;
123   /// Indicates whether any child is exhausted. It is cheaper to maintain and
124   /// update the field, rather than traversing the whole subtree in each
125   /// reachedEnd() call.
126   bool ReachedEnd = false;
127   friend Corpus; // For optimizations.
128 };
129 
130 /// Implements Iterator over the union of other iterators.
131 ///
132 /// OrIterator iterates through all items which can be pointed to by at least
133 /// one child. To preserve the sorted order, this iterator always advances the
134 /// child with smallest Child->peek() value. OrIterator becomes exhausted as
135 /// soon as all of its children are exhausted.
136 class OrIterator : public Iterator {
137 public:
138   explicit OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
139       : Iterator(Kind::Or), Children(std::move(AllChildren)) {
140     assert(!Children.empty() && "OR iterator should have at least one child.");
141   }
142 
143   /// Returns true if all children are exhausted.
144   bool reachedEnd() const override {
145     for (const auto &Child : Children)
146       if (!Child->reachedEnd())
147         return false;
148     return true;
149   }
150 
151   /// Moves each child pointing to the smallest DocID to the next item.
152   void advance() override {
153     assert(!reachedEnd() && "OR iterator can't advance() at the end.");
154     const auto SmallestID = peek();
155     for (const auto &Child : Children)
156       if (!Child->reachedEnd() && Child->peek() == SmallestID)
157         Child->advance();
158   }
159 
160   /// Advances each child to the next existing element with DocumentID >= ID.
161   void advanceTo(DocID ID) override {
162     assert(!reachedEnd() && "OR iterator can't advanceTo() at the end.");
163     for (const auto &Child : Children)
164       if (!Child->reachedEnd())
165         Child->advanceTo(ID);
166   }
167 
168   /// Returns the element under cursor of the child with smallest Child->peek()
169   /// value.
170   DocID peek() const override {
171     assert(!reachedEnd() && "OR iterator can't peek() at the end.");
172     DocID Result = std::numeric_limits<DocID>::max();
173 
174     for (const auto &Child : Children)
175       if (!Child->reachedEnd())
176         Result = std::min(Result, Child->peek());
177 
178     return Result;
179   }
180 
181   // Returns the maximum boosting score among all Children when iterator
182   // points to the current ID.
183   float consume() override {
184     assert(!reachedEnd() && "OR iterator can't consume() at the end.");
185     const DocID ID = peek();
186     float Boost = 1;
187     for (const auto &Child : Children)
188       if (!Child->reachedEnd() && Child->peek() == ID)
189         Boost = std::max(Boost, Child->consume());
190     return Boost;
191   }
192 
193   size_t estimateSize() const override {
194     size_t Size = 0;
195     for (const auto &Child : Children)
196       Size = std::max(Size, Child->estimateSize());
197     return Size;
198   }
199 
200 private:
201   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
202     OS << "(| ";
203     auto Separator = "";
204     for (const auto &Child : Children) {
205       OS << Separator << *Child;
206       Separator = " ";
207     }
208     OS << ')';
209     return OS;
210   }
211 
212   // FIXME(kbobyrev): Would storing Children in min-heap be faster?
213   std::vector<std::unique_ptr<Iterator>> Children;
214   friend Corpus; // For optimizations.
215 };
216 
217 /// TrueIterator handles PostingLists which contain all items of the index. It
218 /// stores size of the virtual posting list, and all operations are performed
219 /// in O(1).
220 class TrueIterator : public Iterator {
221 public:
222   explicit TrueIterator(DocID Size) : Iterator(Kind::True), Size(Size) {}
223 
224   bool reachedEnd() const override { return Index >= Size; }
225 
226   void advance() override {
227     assert(!reachedEnd() && "TRUE iterator can't advance() at the end.");
228     ++Index;
229   }
230 
231   void advanceTo(DocID ID) override {
232     assert(!reachedEnd() && "TRUE iterator can't advanceTo() at the end.");
233     Index = std::min(ID, Size);
234   }
235 
236   DocID peek() const override {
237     assert(!reachedEnd() && "TRUE iterator can't peek() at the end.");
238     return Index;
239   }
240 
241   float consume() override {
242     assert(!reachedEnd() && "TRUE iterator can't consume() at the end.");
243     return 1;
244   }
245 
246   size_t estimateSize() const override { return Size; }
247 
248 private:
249   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
250     return OS << "true";
251   }
252 
253   DocID Index = 0;
254   /// Size of the underlying virtual PostingList.
255   DocID Size;
256 };
257 
258 /// FalseIterator yields no results.
259 class FalseIterator : public Iterator {
260 public:
261   FalseIterator() : Iterator(Kind::False) {}
262   bool reachedEnd() const override { return true; }
263   void advance() override { assert(false); }
264   void advanceTo(DocID ID) override { assert(false); }
265   DocID peek() const override {
266     assert(false);
267     return 0;
268   }
269   float consume() override {
270     assert(false);
271     return 1;
272   }
273   size_t estimateSize() const override { return 0; }
274 
275 private:
276   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
277     return OS << "false";
278   }
279 };
280 
281 /// Boost iterator is a wrapper around its child which multiplies scores of
282 /// each retrieved item by a given factor.
283 class BoostIterator : public Iterator {
284 public:
285   BoostIterator(std::unique_ptr<Iterator> Child, float Factor)
286       : Child(std::move(Child)), Factor(Factor) {}
287 
288   bool reachedEnd() const override { return Child->reachedEnd(); }
289 
290   void advance() override { Child->advance(); }
291 
292   void advanceTo(DocID ID) override { Child->advanceTo(ID); }
293 
294   DocID peek() const override { return Child->peek(); }
295 
296   float consume() override { return Child->consume() * Factor; }
297 
298   size_t estimateSize() const override { return Child->estimateSize(); }
299 
300 private:
301   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
302     return OS << "(* " << Factor << ' ' << *Child << ')';
303   }
304 
305   std::unique_ptr<Iterator> Child;
306   float Factor;
307 };
308 
309 /// This iterator limits the number of items retrieved from the child iterator
310 /// on top of the query tree. To ensure that query tree with LIMIT iterators
311 /// inside works correctly, users have to call Root->consume(Root->peek()) each
312 /// time item is retrieved at the root of query tree.
313 class LimitIterator : public Iterator {
314 public:
315   LimitIterator(std::unique_ptr<Iterator> Child, size_t Limit)
316       : Child(std::move(Child)), Limit(Limit), ItemsLeft(Limit) {}
317 
318   bool reachedEnd() const override {
319     return ItemsLeft == 0 || Child->reachedEnd();
320   }
321 
322   void advance() override { Child->advance(); }
323 
324   void advanceTo(DocID ID) override { Child->advanceTo(ID); }
325 
326   DocID peek() const override { return Child->peek(); }
327 
328   /// Decreases the limit in case the element consumed at top of the query tree
329   /// comes from the underlying iterator.
330   float consume() override {
331     assert(!reachedEnd() && "LimitIterator can't consume() at the end.");
332     --ItemsLeft;
333     return Child->consume();
334   }
335 
336   size_t estimateSize() const override {
337     return std::min(Child->estimateSize(), Limit);
338   }
339 
340 private:
341   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
342     return OS << "(LIMIT " << Limit << " " << *Child << ')';
343   }
344 
345   std::unique_ptr<Iterator> Child;
346   size_t Limit;
347   size_t ItemsLeft;
348 };
349 
350 } // end namespace
351 
352 std::vector<std::pair<DocID, float>> consume(Iterator &It) {
353   std::vector<std::pair<DocID, float>> Result;
354   for (; !It.reachedEnd(); It.advance())
355     Result.emplace_back(It.peek(), It.consume());
356   return Result;
357 }
358 
359 std::unique_ptr<Iterator>
360 Corpus::intersect(std::vector<std::unique_ptr<Iterator>> Children) const {
361   std::vector<std::unique_ptr<Iterator>> RealChildren;
362   for (auto &Child : Children) {
363     switch (Child->kind()) {
364     case Iterator::Kind::True:
365       break; // No effect, drop the iterator.
366     case Iterator::Kind::False:
367       return std::move(Child); // Intersection is empty.
368     case Iterator::Kind::And: {
369       // Inline nested AND into parent AND.
370       auto &NewChildren = static_cast<AndIterator *>(Child.get())->Children;
371       std::move(NewChildren.begin(), NewChildren.end(),
372                 std::back_inserter(RealChildren));
373       break;
374     }
375     default:
376       RealChildren.push_back(std::move(Child));
377     }
378   }
379   switch (RealChildren.size()) {
380   case 0:
381     return all();
382   case 1:
383     return std::move(RealChildren.front());
384   default:
385     return llvm::make_unique<AndIterator>(std::move(RealChildren));
386   }
387 }
388 
389 std::unique_ptr<Iterator>
390 Corpus::unionOf(std::vector<std::unique_ptr<Iterator>> Children) const {
391   std::vector<std::unique_ptr<Iterator>> RealChildren;
392   for (auto &Child : Children) {
393     switch (Child->kind()) {
394     case Iterator::Kind::False:
395       break; // No effect, drop the iterator.
396     case Iterator::Kind::Or: {
397       // Inline nested OR into parent OR.
398       auto &NewChildren = static_cast<OrIterator *>(Child.get())->Children;
399       std::move(NewChildren.begin(), NewChildren.end(),
400                 std::back_inserter(RealChildren));
401       break;
402     }
403     case Iterator::Kind::True:
404       // Don't return all(), which would discard sibling boosts.
405     default:
406       RealChildren.push_back(std::move(Child));
407     }
408   }
409   switch (RealChildren.size()) {
410   case 0:
411     return none();
412   case 1:
413     return std::move(RealChildren.front());
414   default:
415     return llvm::make_unique<OrIterator>(std::move(RealChildren));
416   }
417 }
418 
419 std::unique_ptr<Iterator> Corpus::all() const {
420   return llvm::make_unique<TrueIterator>(Size);
421 }
422 
423 std::unique_ptr<Iterator> Corpus::none() const {
424   return llvm::make_unique<FalseIterator>();
425 }
426 
427 std::unique_ptr<Iterator> Corpus::boost(std::unique_ptr<Iterator> Child,
428                                         float Factor) const {
429   if (Factor == 1)
430     return Child;
431   if (Child->kind() == Iterator::Kind::False)
432     return Child;
433   return llvm::make_unique<BoostIterator>(std::move(Child), Factor);
434 }
435 
436 std::unique_ptr<Iterator> Corpus::limit(std::unique_ptr<Iterator> Child,
437                                         size_t Limit) const {
438   if (Child->kind() == Iterator::Kind::False)
439     return Child;
440   return llvm::make_unique<LimitIterator>(std::move(Child), Limit);
441 }
442 
443 } // namespace dex
444 } // namespace clangd
445 } // namespace clang
446