1 //===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "Iterator.h" 11 #include "llvm/Support/Casting.h" 12 #include <algorithm> 13 #include <cassert> 14 #include <numeric> 15 16 namespace clang { 17 namespace clangd { 18 namespace dex { 19 20 namespace { 21 22 /// Implements Iterator over the intersection of other iterators. 23 /// 24 /// AndIterator iterates through common items among all children. It becomes 25 /// exhausted as soon as any child becomes exhausted. After each mutation, the 26 /// iterator restores the invariant: all children must point to the same item. 27 class AndIterator : public Iterator { 28 public: 29 explicit AndIterator(std::vector<std::unique_ptr<Iterator>> AllChildren) 30 : Iterator(Kind::And), Children(std::move(AllChildren)) { 31 assert(!Children.empty() && "AND iterator should have at least one child."); 32 // Establish invariants. 33 for (const auto &Child : Children) 34 ReachedEnd |= Child->reachedEnd(); 35 sync(); 36 // When children are sorted by the estimateSize(), sync() calls are more 37 // effective. Each sync() starts with the first child and makes sure all 38 // children point to the same element. If any child is "above" the previous 39 // ones, the algorithm resets and and advances the children to the next 40 // highest element starting from the front. When child iterators in the 41 // beginning have smaller estimated size, the sync() will have less restarts 42 // and become more effective. 43 llvm::sort(Children, [](const std::unique_ptr<Iterator> &LHS, 44 const std::unique_ptr<Iterator> &RHS) { 45 return LHS->estimateSize() < RHS->estimateSize(); 46 }); 47 } 48 49 bool reachedEnd() const override { return ReachedEnd; } 50 51 /// Advances all children to the next common item. 52 void advance() override { 53 assert(!reachedEnd() && "AND iterator can't advance() at the end."); 54 Children.front()->advance(); 55 sync(); 56 } 57 58 /// Advances all children to the next common item with DocumentID >= ID. 59 void advanceTo(DocID ID) override { 60 assert(!reachedEnd() && "AND iterator can't advanceTo() at the end."); 61 Children.front()->advanceTo(ID); 62 sync(); 63 } 64 65 DocID peek() const override { return Children.front()->peek(); } 66 67 float consume() override { 68 assert(!reachedEnd() && "AND iterator can't consume() at the end."); 69 float Boost = 1; 70 for (const auto &Child : Children) 71 Boost *= Child->consume(); 72 return Boost; 73 } 74 75 size_t estimateSize() const override { 76 return Children.front()->estimateSize(); 77 } 78 79 private: 80 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { 81 OS << "(& "; 82 auto Separator = ""; 83 for (const auto &Child : Children) { 84 OS << Separator << *Child; 85 Separator = " "; 86 } 87 OS << ')'; 88 return OS; 89 } 90 91 /// Restores class invariants: each child will point to the same element after 92 /// sync. 93 void sync() { 94 ReachedEnd |= Children.front()->reachedEnd(); 95 if (ReachedEnd) 96 return; 97 auto SyncID = Children.front()->peek(); 98 // Indicates whether any child needs to be advanced to new SyncID. 99 bool NeedsAdvance = false; 100 do { 101 NeedsAdvance = false; 102 for (auto &Child : Children) { 103 Child->advanceTo(SyncID); 104 ReachedEnd |= Child->reachedEnd(); 105 // If any child reaches end And iterator can not match any other items. 106 // In this case, just terminate the process. 107 if (ReachedEnd) 108 return; 109 // If any child goes beyond given ID (i.e. ID is not the common item), 110 // all children should be advanced to the next common item. 111 if (Child->peek() > SyncID) { 112 SyncID = Child->peek(); 113 NeedsAdvance = true; 114 } 115 } 116 } while (NeedsAdvance); 117 } 118 119 /// AndIterator owns its children and ensures that all of them point to the 120 /// same element. As soon as one child gets exhausted, AndIterator can no 121 /// longer advance and has reached its end. 122 std::vector<std::unique_ptr<Iterator>> Children; 123 /// Indicates whether any child is exhausted. It is cheaper to maintain and 124 /// update the field, rather than traversing the whole subtree in each 125 /// reachedEnd() call. 126 bool ReachedEnd = false; 127 friend Corpus; // For optimizations. 128 }; 129 130 /// Implements Iterator over the union of other iterators. 131 /// 132 /// OrIterator iterates through all items which can be pointed to by at least 133 /// one child. To preserve the sorted order, this iterator always advances the 134 /// child with smallest Child->peek() value. OrIterator becomes exhausted as 135 /// soon as all of its children are exhausted. 136 class OrIterator : public Iterator { 137 public: 138 explicit OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren) 139 : Iterator(Kind::Or), Children(std::move(AllChildren)) { 140 assert(!Children.empty() && "OR iterator should have at least one child."); 141 } 142 143 /// Returns true if all children are exhausted. 144 bool reachedEnd() const override { 145 for (const auto &Child : Children) 146 if (!Child->reachedEnd()) 147 return false; 148 return true; 149 } 150 151 /// Moves each child pointing to the smallest DocID to the next item. 152 void advance() override { 153 assert(!reachedEnd() && "OR iterator can't advance() at the end."); 154 const auto SmallestID = peek(); 155 for (const auto &Child : Children) 156 if (!Child->reachedEnd() && Child->peek() == SmallestID) 157 Child->advance(); 158 } 159 160 /// Advances each child to the next existing element with DocumentID >= ID. 161 void advanceTo(DocID ID) override { 162 assert(!reachedEnd() && "OR iterator can't advanceTo() at the end."); 163 for (const auto &Child : Children) 164 if (!Child->reachedEnd()) 165 Child->advanceTo(ID); 166 } 167 168 /// Returns the element under cursor of the child with smallest Child->peek() 169 /// value. 170 DocID peek() const override { 171 assert(!reachedEnd() && "OR iterator can't peek() at the end."); 172 DocID Result = std::numeric_limits<DocID>::max(); 173 174 for (const auto &Child : Children) 175 if (!Child->reachedEnd()) 176 Result = std::min(Result, Child->peek()); 177 178 return Result; 179 } 180 181 // Returns the maximum boosting score among all Children when iterator 182 // points to the current ID. 183 float consume() override { 184 assert(!reachedEnd() && "OR iterator can't consume() at the end."); 185 const DocID ID = peek(); 186 float Boost = 1; 187 for (const auto &Child : Children) 188 if (!Child->reachedEnd() && Child->peek() == ID) 189 Boost = std::max(Boost, Child->consume()); 190 return Boost; 191 } 192 193 size_t estimateSize() const override { 194 size_t Size = 0; 195 for (const auto &Child : Children) 196 Size = std::max(Size, Child->estimateSize()); 197 return Size; 198 } 199 200 private: 201 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { 202 OS << "(| "; 203 auto Separator = ""; 204 for (const auto &Child : Children) { 205 OS << Separator << *Child; 206 Separator = " "; 207 } 208 OS << ')'; 209 return OS; 210 } 211 212 // FIXME(kbobyrev): Would storing Children in min-heap be faster? 213 std::vector<std::unique_ptr<Iterator>> Children; 214 friend Corpus; // For optimizations. 215 }; 216 217 /// TrueIterator handles PostingLists which contain all items of the index. It 218 /// stores size of the virtual posting list, and all operations are performed 219 /// in O(1). 220 class TrueIterator : public Iterator { 221 public: 222 explicit TrueIterator(DocID Size) : Iterator(Kind::True), Size(Size) {} 223 224 bool reachedEnd() const override { return Index >= Size; } 225 226 void advance() override { 227 assert(!reachedEnd() && "TRUE iterator can't advance() at the end."); 228 ++Index; 229 } 230 231 void advanceTo(DocID ID) override { 232 assert(!reachedEnd() && "TRUE iterator can't advanceTo() at the end."); 233 Index = std::min(ID, Size); 234 } 235 236 DocID peek() const override { 237 assert(!reachedEnd() && "TRUE iterator can't peek() at the end."); 238 return Index; 239 } 240 241 float consume() override { 242 assert(!reachedEnd() && "TRUE iterator can't consume() at the end."); 243 return 1; 244 } 245 246 size_t estimateSize() const override { return Size; } 247 248 private: 249 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { 250 return OS << "true"; 251 } 252 253 DocID Index = 0; 254 /// Size of the underlying virtual PostingList. 255 DocID Size; 256 }; 257 258 /// FalseIterator yields no results. 259 class FalseIterator : public Iterator { 260 public: 261 FalseIterator() : Iterator(Kind::False) {} 262 bool reachedEnd() const override { return true; } 263 void advance() override { assert(false); } 264 void advanceTo(DocID ID) override { assert(false); } 265 DocID peek() const override { 266 assert(false); 267 return 0; 268 } 269 float consume() override { 270 assert(false); 271 return 1; 272 } 273 size_t estimateSize() const override { return 0; } 274 275 private: 276 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { 277 return OS << "false"; 278 } 279 }; 280 281 /// Boost iterator is a wrapper around its child which multiplies scores of 282 /// each retrieved item by a given factor. 283 class BoostIterator : public Iterator { 284 public: 285 BoostIterator(std::unique_ptr<Iterator> Child, float Factor) 286 : Child(std::move(Child)), Factor(Factor) {} 287 288 bool reachedEnd() const override { return Child->reachedEnd(); } 289 290 void advance() override { Child->advance(); } 291 292 void advanceTo(DocID ID) override { Child->advanceTo(ID); } 293 294 DocID peek() const override { return Child->peek(); } 295 296 float consume() override { return Child->consume() * Factor; } 297 298 size_t estimateSize() const override { return Child->estimateSize(); } 299 300 private: 301 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { 302 return OS << "(* " << Factor << ' ' << *Child << ')'; 303 } 304 305 std::unique_ptr<Iterator> Child; 306 float Factor; 307 }; 308 309 /// This iterator limits the number of items retrieved from the child iterator 310 /// on top of the query tree. To ensure that query tree with LIMIT iterators 311 /// inside works correctly, users have to call Root->consume(Root->peek()) each 312 /// time item is retrieved at the root of query tree. 313 class LimitIterator : public Iterator { 314 public: 315 LimitIterator(std::unique_ptr<Iterator> Child, size_t Limit) 316 : Child(std::move(Child)), Limit(Limit), ItemsLeft(Limit) {} 317 318 bool reachedEnd() const override { 319 return ItemsLeft == 0 || Child->reachedEnd(); 320 } 321 322 void advance() override { Child->advance(); } 323 324 void advanceTo(DocID ID) override { Child->advanceTo(ID); } 325 326 DocID peek() const override { return Child->peek(); } 327 328 /// Decreases the limit in case the element consumed at top of the query tree 329 /// comes from the underlying iterator. 330 float consume() override { 331 assert(!reachedEnd() && "LimitIterator can't consume() at the end."); 332 --ItemsLeft; 333 return Child->consume(); 334 } 335 336 size_t estimateSize() const override { 337 return std::min(Child->estimateSize(), Limit); 338 } 339 340 private: 341 llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override { 342 return OS << "(LIMIT " << Limit << " " << *Child << ')'; 343 } 344 345 std::unique_ptr<Iterator> Child; 346 size_t Limit; 347 size_t ItemsLeft; 348 }; 349 350 } // end namespace 351 352 std::vector<std::pair<DocID, float>> consume(Iterator &It) { 353 std::vector<std::pair<DocID, float>> Result; 354 for (; !It.reachedEnd(); It.advance()) 355 Result.emplace_back(It.peek(), It.consume()); 356 return Result; 357 } 358 359 std::unique_ptr<Iterator> 360 Corpus::intersect(std::vector<std::unique_ptr<Iterator>> Children) const { 361 std::vector<std::unique_ptr<Iterator>> RealChildren; 362 for (auto &Child : Children) { 363 switch (Child->kind()) { 364 case Iterator::Kind::True: 365 break; // No effect, drop the iterator. 366 case Iterator::Kind::False: 367 return std::move(Child); // Intersection is empty. 368 case Iterator::Kind::And: { 369 // Inline nested AND into parent AND. 370 auto &NewChildren = static_cast<AndIterator *>(Child.get())->Children; 371 std::move(NewChildren.begin(), NewChildren.end(), 372 std::back_inserter(RealChildren)); 373 break; 374 } 375 default: 376 RealChildren.push_back(std::move(Child)); 377 } 378 } 379 switch (RealChildren.size()) { 380 case 0: 381 return all(); 382 case 1: 383 return std::move(RealChildren.front()); 384 default: 385 return llvm::make_unique<AndIterator>(std::move(RealChildren)); 386 } 387 } 388 389 std::unique_ptr<Iterator> 390 Corpus::unionOf(std::vector<std::unique_ptr<Iterator>> Children) const { 391 std::vector<std::unique_ptr<Iterator>> RealChildren; 392 for (auto &Child : Children) { 393 switch (Child->kind()) { 394 case Iterator::Kind::False: 395 break; // No effect, drop the iterator. 396 case Iterator::Kind::Or: { 397 // Inline nested OR into parent OR. 398 auto &NewChildren = static_cast<OrIterator *>(Child.get())->Children; 399 std::move(NewChildren.begin(), NewChildren.end(), 400 std::back_inserter(RealChildren)); 401 break; 402 } 403 case Iterator::Kind::True: 404 // Don't return all(), which would discard sibling boosts. 405 default: 406 RealChildren.push_back(std::move(Child)); 407 } 408 } 409 switch (RealChildren.size()) { 410 case 0: 411 return none(); 412 case 1: 413 return std::move(RealChildren.front()); 414 default: 415 return llvm::make_unique<OrIterator>(std::move(RealChildren)); 416 } 417 } 418 419 std::unique_ptr<Iterator> Corpus::all() const { 420 return llvm::make_unique<TrueIterator>(Size); 421 } 422 423 std::unique_ptr<Iterator> Corpus::none() const { 424 return llvm::make_unique<FalseIterator>(); 425 } 426 427 std::unique_ptr<Iterator> Corpus::boost(std::unique_ptr<Iterator> Child, 428 float Factor) const { 429 if (Factor == 1) 430 return Child; 431 if (Child->kind() == Iterator::Kind::False) 432 return Child; 433 return llvm::make_unique<BoostIterator>(std::move(Child), Factor); 434 } 435 436 std::unique_ptr<Iterator> Corpus::limit(std::unique_ptr<Iterator> Child, 437 size_t Limit) const { 438 if (Child->kind() == Iterator::Kind::False) 439 return Child; 440 return llvm::make_unique<LimitIterator>(std::move(Child), Limit); 441 } 442 443 } // namespace dex 444 } // namespace clangd 445 } // namespace clang 446