1 //===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "Iterator.h" 10 #include "llvm/Support/Casting.h" 11 #include <algorithm> 12 #include <cassert> 13 #include <numeric> 14 15 namespace clang { 16 namespace clangd { 17 namespace dex { 18 namespace { 19 20 /// Implements Iterator over the intersection of other iterators. 21 /// 22 /// AndIterator iterates through common items among all children. It becomes 23 /// exhausted as soon as any child becomes exhausted. After each mutation, the 24 /// iterator restores the invariant: all children must point to the same item. 25 class AndIterator : public Iterator { 26 public: 27 explicit AndIterator(std::vector<std::unique_ptr<Iterator>> AllChildren) 28 : Iterator(Kind::And), Children(std::move(AllChildren)) { 29 assert(!Children.empty() && "AND iterator should have at least one child."); 30 // Establish invariants. 31 for (const auto &Child : Children) 32 ReachedEnd |= Child->reachedEnd(); 33 sync(); 34 // When children are sorted by the estimateSize(), sync() calls are more 35 // effective. Each sync() starts with the first child and makes sure all 36 // children point to the same element. If any child is "above" the previous 37 // ones, the algorithm resets and and advances the children to the next 38 // highest element starting from the front. When child iterators in the 39 // beginning have smaller estimated size, the sync() will have less restarts 40 // and become more effective. 41 llvm::sort(Children, [](const std::unique_ptr<Iterator> &LHS, 42 const std::unique_ptr<Iterator> &RHS) { 43 return LHS->estimateSize() < RHS->estimateSize(); 44 }); 45 } 46 47 bool reachedEnd() const override { return ReachedEnd; } 48 49 /// Advances all children to the next common item. 50 void advance() override { 51 assert(!reachedEnd() && "AND iterator can't advance() at the end."); 52 Children.front()->advance(); 53 sync(); 54 } 55 56 /// Advances all children to the next common item with DocumentID >= ID. 57 void advanceTo(DocID ID) override { 58 assert(!reachedEnd() && "AND iterator can't advanceTo() at the end."); 59 Children.front()->advanceTo(ID); 60 sync(); 61 } 62 63 DocID peek() const override { return Children.front()->peek(); } 64 65 float consume() override { 66 assert(!reachedEnd() && "AND iterator can't consume() at the end."); 67 float Boost = 1; 68 for (const auto &Child : Children) 69 Boost *= Child->consume(); 70 return Boost; 71 } 72 73 size_t estimateSize() const override { 74 return Children.front()->estimateSize(); 75 } 76 77 private: 78 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { 79 OS << "(& "; 80 auto Separator = ""; 81 for (const auto &Child : Children) { 82 OS << Separator << *Child; 83 Separator = " "; 84 } 85 OS << ')'; 86 return OS; 87 } 88 89 /// Restores class invariants: each child will point to the same element after 90 /// sync. 91 void sync() { 92 ReachedEnd |= Children.front()->reachedEnd(); 93 if (ReachedEnd) 94 return; 95 auto SyncID = Children.front()->peek(); 96 // Indicates whether any child needs to be advanced to new SyncID. 97 bool NeedsAdvance = false; 98 do { 99 NeedsAdvance = false; 100 for (auto &Child : Children) { 101 Child->advanceTo(SyncID); 102 ReachedEnd |= Child->reachedEnd(); 103 // If any child reaches end And iterator can not match any other items. 104 // In this case, just terminate the process. 105 if (ReachedEnd) 106 return; 107 // If any child goes beyond given ID (i.e. ID is not the common item), 108 // all children should be advanced to the next common item. 109 if (Child->peek() > SyncID) { 110 SyncID = Child->peek(); 111 NeedsAdvance = true; 112 } 113 } 114 } while (NeedsAdvance); 115 } 116 117 /// AndIterator owns its children and ensures that all of them point to the 118 /// same element. As soon as one child gets exhausted, AndIterator can no 119 /// longer advance and has reached its end. 120 std::vector<std::unique_ptr<Iterator>> Children; 121 /// Indicates whether any child is exhausted. It is cheaper to maintain and 122 /// update the field, rather than traversing the whole subtree in each 123 /// reachedEnd() call. 124 bool ReachedEnd = false; 125 friend Corpus; // For optimizations. 126 }; 127 128 /// Implements Iterator over the union of other iterators. 129 /// 130 /// OrIterator iterates through all items which can be pointed to by at least 131 /// one child. To preserve the sorted order, this iterator always advances the 132 /// child with smallest Child->peek() value. OrIterator becomes exhausted as 133 /// soon as all of its children are exhausted. 134 class OrIterator : public Iterator { 135 public: 136 explicit OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren) 137 : Iterator(Kind::Or), Children(std::move(AllChildren)) { 138 assert(!Children.empty() && "OR iterator should have at least one child."); 139 } 140 141 /// Returns true if all children are exhausted. 142 bool reachedEnd() const override { 143 for (const auto &Child : Children) 144 if (!Child->reachedEnd()) 145 return false; 146 return true; 147 } 148 149 /// Moves each child pointing to the smallest DocID to the next item. 150 void advance() override { 151 assert(!reachedEnd() && "OR iterator can't advance() at the end."); 152 const auto SmallestID = peek(); 153 for (const auto &Child : Children) 154 if (!Child->reachedEnd() && Child->peek() == SmallestID) 155 Child->advance(); 156 } 157 158 /// Advances each child to the next existing element with DocumentID >= ID. 159 void advanceTo(DocID ID) override { 160 assert(!reachedEnd() && "OR iterator can't advanceTo() at the end."); 161 for (const auto &Child : Children) 162 if (!Child->reachedEnd()) 163 Child->advanceTo(ID); 164 } 165 166 /// Returns the element under cursor of the child with smallest Child->peek() 167 /// value. 168 DocID peek() const override { 169 assert(!reachedEnd() && "OR iterator can't peek() at the end."); 170 DocID Result = std::numeric_limits<DocID>::max(); 171 172 for (const auto &Child : Children) 173 if (!Child->reachedEnd()) 174 Result = std::min(Result, Child->peek()); 175 176 return Result; 177 } 178 179 // Returns the maximum boosting score among all Children when iterator 180 // points to the current ID. 181 float consume() override { 182 assert(!reachedEnd() && "OR iterator can't consume() at the end."); 183 const DocID ID = peek(); 184 float Boost = 1; 185 for (const auto &Child : Children) 186 if (!Child->reachedEnd() && Child->peek() == ID) 187 Boost = std::max(Boost, Child->consume()); 188 return Boost; 189 } 190 191 size_t estimateSize() const override { 192 size_t Size = 0; 193 for (const auto &Child : Children) 194 Size = std::max(Size, Child->estimateSize()); 195 return Size; 196 } 197 198 private: 199 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { 200 OS << "(| "; 201 auto Separator = ""; 202 for (const auto &Child : Children) { 203 OS << Separator << *Child; 204 Separator = " "; 205 } 206 OS << ')'; 207 return OS; 208 } 209 210 // FIXME(kbobyrev): Would storing Children in min-heap be faster? 211 std::vector<std::unique_ptr<Iterator>> Children; 212 friend Corpus; // For optimizations. 213 }; 214 215 /// TrueIterator handles PostingLists which contain all items of the index. It 216 /// stores size of the virtual posting list, and all operations are performed 217 /// in O(1). 218 class TrueIterator : public Iterator { 219 public: 220 explicit TrueIterator(DocID Size) : Iterator(Kind::True), Size(Size) {} 221 222 bool reachedEnd() const override { return Index >= Size; } 223 224 void advance() override { 225 assert(!reachedEnd() && "TRUE iterator can't advance() at the end."); 226 ++Index; 227 } 228 229 void advanceTo(DocID ID) override { 230 assert(!reachedEnd() && "TRUE iterator can't advanceTo() at the end."); 231 Index = std::min(ID, Size); 232 } 233 234 DocID peek() const override { 235 assert(!reachedEnd() && "TRUE iterator can't peek() at the end."); 236 return Index; 237 } 238 239 float consume() override { 240 assert(!reachedEnd() && "TRUE iterator can't consume() at the end."); 241 return 1; 242 } 243 244 size_t estimateSize() const override { return Size; } 245 246 private: 247 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { 248 return OS << "true"; 249 } 250 251 DocID Index = 0; 252 /// Size of the underlying virtual PostingList. 253 DocID Size; 254 }; 255 256 /// FalseIterator yields no results. 257 class FalseIterator : public Iterator { 258 public: 259 FalseIterator() : Iterator(Kind::False) {} 260 bool reachedEnd() const override { return true; } 261 void advance() override { assert(false); } 262 void advanceTo(DocID ID) override { assert(false); } 263 DocID peek() const override { 264 assert(false); 265 return 0; 266 } 267 float consume() override { 268 assert(false); 269 return 1; 270 } 271 size_t estimateSize() const override { return 0; } 272 273 private: 274 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { 275 return OS << "false"; 276 } 277 }; 278 279 /// Boost iterator is a wrapper around its child which multiplies scores of 280 /// each retrieved item by a given factor. 281 class BoostIterator : public Iterator { 282 public: 283 BoostIterator(std::unique_ptr<Iterator> Child, float Factor) 284 : Child(std::move(Child)), Factor(Factor) {} 285 286 bool reachedEnd() const override { return Child->reachedEnd(); } 287 288 void advance() override { Child->advance(); } 289 290 void advanceTo(DocID ID) override { Child->advanceTo(ID); } 291 292 DocID peek() const override { return Child->peek(); } 293 294 float consume() override { return Child->consume() * Factor; } 295 296 size_t estimateSize() const override { return Child->estimateSize(); } 297 298 private: 299 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { 300 return OS << "(* " << Factor << ' ' << *Child << ')'; 301 } 302 303 std::unique_ptr<Iterator> Child; 304 float Factor; 305 }; 306 307 /// This iterator limits the number of items retrieved from the child iterator 308 /// on top of the query tree. To ensure that query tree with LIMIT iterators 309 /// inside works correctly, users have to call Root->consume(Root->peek()) each 310 /// time item is retrieved at the root of query tree. 311 class LimitIterator : public Iterator { 312 public: 313 LimitIterator(std::unique_ptr<Iterator> Child, size_t Limit) 314 : Child(std::move(Child)), Limit(Limit), ItemsLeft(Limit) {} 315 316 bool reachedEnd() const override { 317 return ItemsLeft == 0 || Child->reachedEnd(); 318 } 319 320 void advance() override { Child->advance(); } 321 322 void advanceTo(DocID ID) override { Child->advanceTo(ID); } 323 324 DocID peek() const override { return Child->peek(); } 325 326 /// Decreases the limit in case the element consumed at top of the query tree 327 /// comes from the underlying iterator. 328 float consume() override { 329 assert(!reachedEnd() && "LimitIterator can't consume() at the end."); 330 --ItemsLeft; 331 return Child->consume(); 332 } 333 334 size_t estimateSize() const override { 335 return std::min(Child->estimateSize(), Limit); 336 } 337 338 private: 339 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { 340 return OS << "(LIMIT " << Limit << " " << *Child << ')'; 341 } 342 343 std::unique_ptr<Iterator> Child; 344 size_t Limit; 345 size_t ItemsLeft; 346 }; 347 348 } // end namespace 349 350 std::vector<std::pair<DocID, float>> consume(Iterator &It) { 351 std::vector<std::pair<DocID, float>> Result; 352 for (; !It.reachedEnd(); It.advance()) 353 Result.emplace_back(It.peek(), It.consume()); 354 return Result; 355 } 356 357 std::unique_ptr<Iterator> 358 Corpus::intersect(std::vector<std::unique_ptr<Iterator>> Children) const { 359 std::vector<std::unique_ptr<Iterator>> RealChildren; 360 for (auto &Child : Children) { 361 switch (Child->kind()) { 362 case Iterator::Kind::True: 363 break; // No effect, drop the iterator. 364 case Iterator::Kind::False: 365 return std::move(Child); // Intersection is empty. 366 case Iterator::Kind::And: { 367 // Inline nested AND into parent AND. 368 auto &NewChildren = static_cast<AndIterator *>(Child.get())->Children; 369 std::move(NewChildren.begin(), NewChildren.end(), 370 std::back_inserter(RealChildren)); 371 break; 372 } 373 default: 374 RealChildren.push_back(std::move(Child)); 375 } 376 } 377 switch (RealChildren.size()) { 378 case 0: 379 return all(); 380 case 1: 381 return std::move(RealChildren.front()); 382 default: 383 return std::make_unique<AndIterator>(std::move(RealChildren)); 384 } 385 } 386 387 std::unique_ptr<Iterator> 388 Corpus::unionOf(std::vector<std::unique_ptr<Iterator>> Children) const { 389 std::vector<std::unique_ptr<Iterator>> RealChildren; 390 for (auto &Child : Children) { 391 switch (Child->kind()) { 392 case Iterator::Kind::False: 393 break; // No effect, drop the iterator. 394 case Iterator::Kind::Or: { 395 // Inline nested OR into parent OR. 396 auto &NewChildren = static_cast<OrIterator *>(Child.get())->Children; 397 std::move(NewChildren.begin(), NewChildren.end(), 398 std::back_inserter(RealChildren)); 399 break; 400 } 401 case Iterator::Kind::True: 402 // Don't return all(), which would discard sibling boosts. 403 default: 404 RealChildren.push_back(std::move(Child)); 405 } 406 } 407 switch (RealChildren.size()) { 408 case 0: 409 return none(); 410 case 1: 411 return std::move(RealChildren.front()); 412 default: 413 return std::make_unique<OrIterator>(std::move(RealChildren)); 414 } 415 } 416 417 std::unique_ptr<Iterator> Corpus::all() const { 418 return std::make_unique<TrueIterator>(Size); 419 } 420 421 std::unique_ptr<Iterator> Corpus::none() const { 422 return std::make_unique<FalseIterator>(); 423 } 424 425 std::unique_ptr<Iterator> Corpus::boost(std::unique_ptr<Iterator> Child, 426 float Factor) const { 427 if (Factor == 1) 428 return Child; 429 if (Child->kind() == Iterator::Kind::False) 430 return Child; 431 return std::make_unique<BoostIterator>(std::move(Child), Factor); 432 } 433 434 std::unique_ptr<Iterator> Corpus::limit(std::unique_ptr<Iterator> Child, 435 size_t Limit) const { 436 if (Child->kind() == Iterator::Kind::False) 437 return Child; 438 return std::make_unique<LimitIterator>(std::move(Child), Limit); 439 } 440 441 } // namespace dex 442 } // namespace clangd 443 } // namespace clang 444