1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This is the LLVM vectorization plan. It represents a candidate for 11 /// vectorization, allowing to plan and optimize how to vectorize a given loop 12 /// before generating LLVM-IR. 13 /// The vectorizer uses vectorization plans to estimate the costs of potential 14 /// candidates and if profitable to execute the desired plan, generating vector 15 /// LLVM-IR code. 16 /// 17 //===----------------------------------------------------------------------===// 18 19 #include "VPlan.h" 20 #include "VPlanDominatorTree.h" 21 #include "llvm/ADT/DepthFirstIterator.h" 22 #include "llvm/ADT/PostOrderIterator.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Twine.h" 26 #include "llvm/Analysis/IVDescriptors.h" 27 #include "llvm/Analysis/LoopInfo.h" 28 #include "llvm/IR/BasicBlock.h" 29 #include "llvm/IR/CFG.h" 30 #include "llvm/IR/InstrTypes.h" 31 #include "llvm/IR/Instruction.h" 32 #include "llvm/IR/Instructions.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/IR/Value.h" 35 #include "llvm/Support/Casting.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include "llvm/Support/GenericDomTreeConstruction.h" 40 #include "llvm/Support/GraphWriter.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 43 #include <cassert> 44 #include <iterator> 45 #include <string> 46 #include <vector> 47 48 using namespace llvm; 49 extern cl::opt<bool> EnableVPlanNativePath; 50 51 #define DEBUG_TYPE "vplan" 52 53 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) { 54 const VPInstruction *Instr = dyn_cast<VPInstruction>(&V); 55 VPSlotTracker SlotTracker( 56 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); 57 V.print(OS, SlotTracker); 58 return OS; 59 } 60 61 Value *VPLane::getAsRuntimeExpr(IRBuilder<> &Builder, 62 const ElementCount &VF) const { 63 switch (LaneKind) { 64 case VPLane::Kind::ScalableLast: 65 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane 66 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF), 67 Builder.getInt32(VF.getKnownMinValue() - Lane)); 68 case VPLane::Kind::First: 69 return Builder.getInt32(Lane); 70 } 71 llvm_unreachable("Unknown lane kind"); 72 } 73 74 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def) 75 : SubclassID(SC), UnderlyingVal(UV), Def(Def) { 76 if (Def) 77 Def->addDefinedValue(this); 78 } 79 80 VPValue::~VPValue() { 81 assert(Users.empty() && "trying to delete a VPValue with remaining users"); 82 if (Def) 83 Def->removeDefinedValue(this); 84 } 85 86 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const { 87 if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def)) 88 R->print(OS, "", SlotTracker); 89 else 90 printAsOperand(OS, SlotTracker); 91 } 92 93 void VPValue::dump() const { 94 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def); 95 VPSlotTracker SlotTracker( 96 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); 97 print(dbgs(), SlotTracker); 98 dbgs() << "\n"; 99 } 100 101 void VPDef::dump() const { 102 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this); 103 VPSlotTracker SlotTracker( 104 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); 105 print(dbgs(), "", SlotTracker); 106 dbgs() << "\n"; 107 } 108 109 // Get the top-most entry block of \p Start. This is the entry block of the 110 // containing VPlan. This function is templated to support both const and non-const blocks 111 template <typename T> static T *getPlanEntry(T *Start) { 112 T *Next = Start; 113 T *Current = Start; 114 while ((Next = Next->getParent())) 115 Current = Next; 116 117 SmallSetVector<T *, 8> WorkList; 118 WorkList.insert(Current); 119 120 for (unsigned i = 0; i < WorkList.size(); i++) { 121 T *Current = WorkList[i]; 122 if (Current->getNumPredecessors() == 0) 123 return Current; 124 auto &Predecessors = Current->getPredecessors(); 125 WorkList.insert(Predecessors.begin(), Predecessors.end()); 126 } 127 128 llvm_unreachable("VPlan without any entry node without predecessors"); 129 } 130 131 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; } 132 133 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; } 134 135 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly. 136 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const { 137 const VPBlockBase *Block = this; 138 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 139 Block = Region->getEntry(); 140 return cast<VPBasicBlock>(Block); 141 } 142 143 VPBasicBlock *VPBlockBase::getEntryBasicBlock() { 144 VPBlockBase *Block = this; 145 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 146 Block = Region->getEntry(); 147 return cast<VPBasicBlock>(Block); 148 } 149 150 void VPBlockBase::setPlan(VPlan *ParentPlan) { 151 assert(ParentPlan->getEntry() == this && 152 "Can only set plan on its entry block."); 153 Plan = ParentPlan; 154 } 155 156 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly. 157 const VPBasicBlock *VPBlockBase::getExitBasicBlock() const { 158 const VPBlockBase *Block = this; 159 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 160 Block = Region->getExit(); 161 return cast<VPBasicBlock>(Block); 162 } 163 164 VPBasicBlock *VPBlockBase::getExitBasicBlock() { 165 VPBlockBase *Block = this; 166 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 167 Block = Region->getExit(); 168 return cast<VPBasicBlock>(Block); 169 } 170 171 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { 172 if (!Successors.empty() || !Parent) 173 return this; 174 assert(Parent->getExit() == this && 175 "Block w/o successors not the exit of its parent."); 176 return Parent->getEnclosingBlockWithSuccessors(); 177 } 178 179 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { 180 if (!Predecessors.empty() || !Parent) 181 return this; 182 assert(Parent->getEntry() == this && 183 "Block w/o predecessors not the entry of its parent."); 184 return Parent->getEnclosingBlockWithPredecessors(); 185 } 186 187 static VPValue *getSingleOperandOrNull(VPUser &U) { 188 if (U.getNumOperands() == 1) 189 return U.getOperand(0); 190 191 return nullptr; 192 } 193 194 static const VPValue *getSingleOperandOrNull(const VPUser &U) { 195 if (U.getNumOperands() == 1) 196 return U.getOperand(0); 197 198 return nullptr; 199 } 200 201 static void resetSingleOpUser(VPUser &U, VPValue *NewVal) { 202 assert(U.getNumOperands() <= 1 && "Didn't expect more than one operand!"); 203 if (!NewVal) { 204 if (U.getNumOperands() == 1) 205 U.removeLastOperand(); 206 return; 207 } 208 209 if (U.getNumOperands() == 1) 210 U.setOperand(0, NewVal); 211 else 212 U.addOperand(NewVal); 213 } 214 215 VPValue *VPBlockBase::getCondBit() { 216 return getSingleOperandOrNull(CondBitUser); 217 } 218 219 const VPValue *VPBlockBase::getCondBit() const { 220 return getSingleOperandOrNull(CondBitUser); 221 } 222 223 void VPBlockBase::setCondBit(VPValue *CV) { 224 resetSingleOpUser(CondBitUser, CV); 225 } 226 227 VPValue *VPBlockBase::getPredicate() { 228 return getSingleOperandOrNull(PredicateUser); 229 } 230 231 const VPValue *VPBlockBase::getPredicate() const { 232 return getSingleOperandOrNull(PredicateUser); 233 } 234 235 void VPBlockBase::setPredicate(VPValue *CV) { 236 resetSingleOpUser(PredicateUser, CV); 237 } 238 239 void VPBlockBase::deleteCFG(VPBlockBase *Entry) { 240 SmallVector<VPBlockBase *, 8> Blocks(depth_first(Entry)); 241 242 for (VPBlockBase *Block : Blocks) 243 delete Block; 244 } 245 246 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() { 247 iterator It = begin(); 248 while (It != end() && (isa<VPWidenPHIRecipe>(&*It) || 249 isa<VPWidenIntOrFpInductionRecipe>(&*It) || 250 isa<VPPredInstPHIRecipe>(&*It) || 251 isa<VPWidenCanonicalIVRecipe>(&*It))) 252 It++; 253 return It; 254 } 255 256 Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) { 257 if (!Def->getDef()) 258 return Def->getLiveInIRValue(); 259 260 if (hasScalarValue(Def, Instance)) { 261 return Data 262 .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)]; 263 } 264 265 assert(hasVectorValue(Def, Instance.Part)); 266 auto *VecPart = Data.PerPartOutput[Def][Instance.Part]; 267 if (!VecPart->getType()->isVectorTy()) { 268 assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar"); 269 return VecPart; 270 } 271 // TODO: Cache created scalar values. 272 Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF); 273 auto *Extract = Builder.CreateExtractElement(VecPart, Lane); 274 // set(Def, Extract, Instance); 275 return Extract; 276 } 277 278 BasicBlock * 279 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { 280 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks. 281 // Pred stands for Predessor. Prev stands for Previous - last visited/created. 282 BasicBlock *PrevBB = CFG.PrevBB; 283 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(), 284 PrevBB->getParent(), CFG.LastBB); 285 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); 286 287 // Hook up the new basic block to its predecessors. 288 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { 289 VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock(); 290 auto &PredVPSuccessors = PredVPBB->getSuccessors(); 291 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; 292 293 // In outer loop vectorization scenario, the predecessor BBlock may not yet 294 // be visited(backedge). Mark the VPBasicBlock for fixup at the end of 295 // vectorization. We do not encounter this case in inner loop vectorization 296 // as we start out by building a loop skeleton with the vector loop header 297 // and latch blocks. As a result, we never enter this function for the 298 // header block in the non VPlan-native path. 299 if (!PredBB) { 300 assert(EnableVPlanNativePath && 301 "Unexpected null predecessor in non VPlan-native path"); 302 CFG.VPBBsToFix.push_back(PredVPBB); 303 continue; 304 } 305 306 assert(PredBB && "Predecessor basic-block not found building successor."); 307 auto *PredBBTerminator = PredBB->getTerminator(); 308 LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); 309 if (isa<UnreachableInst>(PredBBTerminator)) { 310 assert(PredVPSuccessors.size() == 1 && 311 "Predecessor ending w/o branch must have single successor."); 312 PredBBTerminator->eraseFromParent(); 313 BranchInst::Create(NewBB, PredBB); 314 } else { 315 assert(PredVPSuccessors.size() == 2 && 316 "Predecessor ending with branch must have two successors."); 317 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; 318 assert(!PredBBTerminator->getSuccessor(idx) && 319 "Trying to reset an existing successor block."); 320 PredBBTerminator->setSuccessor(idx, NewBB); 321 } 322 } 323 return NewBB; 324 } 325 326 void VPBasicBlock::execute(VPTransformState *State) { 327 bool Replica = State->Instance && !State->Instance->isFirstIteration(); 328 VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; 329 VPBlockBase *SingleHPred = nullptr; 330 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. 331 332 // 1. Create an IR basic block, or reuse the last one if possible. 333 // The last IR basic block is reused, as an optimization, in three cases: 334 // A. the first VPBB reuses the loop header BB - when PrevVPBB is null; 335 // B. when the current VPBB has a single (hierarchical) predecessor which 336 // is PrevVPBB and the latter has a single (hierarchical) successor; and 337 // C. when the current VPBB is an entry of a region replica - where PrevVPBB 338 // is the exit of this region from a previous instance, or the predecessor 339 // of this region. 340 if (PrevVPBB && /* A */ 341 !((SingleHPred = getSingleHierarchicalPredecessor()) && 342 SingleHPred->getExitBasicBlock() == PrevVPBB && 343 PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */ 344 !(Replica && getPredecessors().empty())) { /* C */ 345 NewBB = createEmptyBasicBlock(State->CFG); 346 State->Builder.SetInsertPoint(NewBB); 347 // Temporarily terminate with unreachable until CFG is rewired. 348 UnreachableInst *Terminator = State->Builder.CreateUnreachable(); 349 State->Builder.SetInsertPoint(Terminator); 350 // Register NewBB in its loop. In innermost loops its the same for all BB's. 351 Loop *L = State->LI->getLoopFor(State->CFG.LastBB); 352 L->addBasicBlockToLoop(NewBB, *State->LI); 353 State->CFG.PrevBB = NewBB; 354 } 355 356 // 2. Fill the IR basic block with IR instructions. 357 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() 358 << " in BB:" << NewBB->getName() << '\n'); 359 360 State->CFG.VPBB2IRBB[this] = NewBB; 361 State->CFG.PrevVPBB = this; 362 363 for (VPRecipeBase &Recipe : Recipes) 364 Recipe.execute(*State); 365 366 VPValue *CBV; 367 if (EnableVPlanNativePath && (CBV = getCondBit())) { 368 assert(CBV->getUnderlyingValue() && 369 "Unexpected null underlying value for condition bit"); 370 371 // Condition bit value in a VPBasicBlock is used as the branch selector. In 372 // the VPlan-native path case, since all branches are uniform we generate a 373 // branch instruction using the condition value from vector lane 0 and dummy 374 // successors. The successors are fixed later when the successor blocks are 375 // visited. 376 Value *NewCond = State->get(CBV, {0, 0}); 377 378 // Replace the temporary unreachable terminator with the new conditional 379 // branch. 380 auto *CurrentTerminator = NewBB->getTerminator(); 381 assert(isa<UnreachableInst>(CurrentTerminator) && 382 "Expected to replace unreachable terminator with conditional " 383 "branch."); 384 auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond); 385 CondBr->setSuccessor(0, nullptr); 386 ReplaceInstWithInst(CurrentTerminator, CondBr); 387 } 388 389 LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB); 390 } 391 392 void VPBasicBlock::dropAllReferences(VPValue *NewValue) { 393 for (VPRecipeBase &R : Recipes) { 394 for (auto *Def : R.definedValues()) 395 Def->replaceAllUsesWith(NewValue); 396 397 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++) 398 R.setOperand(I, NewValue); 399 } 400 } 401 402 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent, 403 VPSlotTracker &SlotTracker) const { 404 O << Indent << getName() << ":\n"; 405 if (const VPValue *Pred = getPredicate()) { 406 O << Indent << "BlockPredicate:"; 407 Pred->printAsOperand(O, SlotTracker); 408 if (const auto *PredInst = dyn_cast<VPInstruction>(Pred)) 409 O << " (" << PredInst->getParent()->getName() << ")"; 410 O << '\n'; 411 } 412 413 auto RecipeIndent = Indent + " "; 414 for (const VPRecipeBase &Recipe : *this) { 415 Recipe.print(O, RecipeIndent, SlotTracker); 416 O << '\n'; 417 } 418 419 if (getSuccessors().empty()) { 420 O << Indent << "No successors\n"; 421 } else { 422 O << Indent << "Successor(s): "; 423 ListSeparator LS; 424 for (auto *Succ : getSuccessors()) 425 O << LS << Succ->getName(); 426 O << '\n'; 427 } 428 429 if (const VPValue *CBV = getCondBit()) { 430 O << Indent << "CondBit: "; 431 CBV->printAsOperand(O, SlotTracker); 432 if (const auto *CBI = dyn_cast<VPInstruction>(CBV)) 433 O << " (" << CBI->getParent()->getName() << ")"; 434 O << '\n'; 435 } 436 } 437 438 void VPRegionBlock::dropAllReferences(VPValue *NewValue) { 439 for (VPBlockBase *Block : depth_first(Entry)) 440 // Drop all references in VPBasicBlocks and replace all uses with 441 // DummyValue. 442 Block->dropAllReferences(NewValue); 443 } 444 445 void VPRegionBlock::execute(VPTransformState *State) { 446 ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry); 447 448 if (!isReplicator()) { 449 // Visit the VPBlocks connected to "this", starting from it. 450 for (VPBlockBase *Block : RPOT) { 451 if (EnableVPlanNativePath) { 452 // The inner loop vectorization path does not represent loop preheader 453 // and exit blocks as part of the VPlan. In the VPlan-native path, skip 454 // vectorizing loop preheader block. In future, we may replace this 455 // check with the check for loop preheader. 456 if (Block->getNumPredecessors() == 0) 457 continue; 458 459 // Skip vectorizing loop exit block. In future, we may replace this 460 // check with the check for loop exit. 461 if (Block->getNumSuccessors() == 0) 462 continue; 463 } 464 465 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); 466 Block->execute(State); 467 } 468 return; 469 } 470 471 assert(!State->Instance && "Replicating a Region with non-null instance."); 472 473 // Enter replicating mode. 474 State->Instance = VPIteration(0, 0); 475 476 for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) { 477 State->Instance->Part = Part; 478 assert(!State->VF.isScalable() && "VF is assumed to be non scalable."); 479 for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; 480 ++Lane) { 481 State->Instance->Lane = VPLane(Lane, VPLane::Kind::First); 482 // Visit the VPBlocks connected to \p this, starting from it. 483 for (VPBlockBase *Block : RPOT) { 484 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); 485 Block->execute(State); 486 } 487 } 488 } 489 490 // Exit replicating mode. 491 State->Instance.reset(); 492 } 493 494 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, 495 VPSlotTracker &SlotTracker) const { 496 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {"; 497 auto NewIndent = Indent + " "; 498 for (auto *BlockBase : depth_first(Entry)) { 499 O << '\n'; 500 BlockBase->print(O, NewIndent, SlotTracker); 501 } 502 O << Indent << "}\n"; 503 } 504 505 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 506 assert(!Parent && "Recipe already in some VPBasicBlock"); 507 assert(InsertPos->getParent() && 508 "Insertion position not in any VPBasicBlock"); 509 Parent = InsertPos->getParent(); 510 Parent->getRecipeList().insert(InsertPos->getIterator(), this); 511 } 512 513 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 514 assert(!Parent && "Recipe already in some VPBasicBlock"); 515 assert(InsertPos->getParent() && 516 "Insertion position not in any VPBasicBlock"); 517 Parent = InsertPos->getParent(); 518 Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); 519 } 520 521 void VPRecipeBase::removeFromParent() { 522 assert(getParent() && "Recipe not in any VPBasicBlock"); 523 getParent()->getRecipeList().remove(getIterator()); 524 Parent = nullptr; 525 } 526 527 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 528 assert(getParent() && "Recipe not in any VPBasicBlock"); 529 return getParent()->getRecipeList().erase(getIterator()); 530 } 531 532 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 533 removeFromParent(); 534 insertAfter(InsertPos); 535 } 536 537 void VPRecipeBase::moveBefore(VPBasicBlock &BB, 538 iplist<VPRecipeBase>::iterator I) { 539 assert(I == BB.end() || I->getParent() == &BB); 540 removeFromParent(); 541 Parent = &BB; 542 BB.getRecipeList().insert(I, this); 543 } 544 545 void VPInstruction::generateInstruction(VPTransformState &State, 546 unsigned Part) { 547 IRBuilder<> &Builder = State.Builder; 548 549 if (Instruction::isBinaryOp(getOpcode())) { 550 Value *A = State.get(getOperand(0), Part); 551 Value *B = State.get(getOperand(1), Part); 552 Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B); 553 State.set(this, V, Part); 554 return; 555 } 556 557 switch (getOpcode()) { 558 case VPInstruction::Not: { 559 Value *A = State.get(getOperand(0), Part); 560 Value *V = Builder.CreateNot(A); 561 State.set(this, V, Part); 562 break; 563 } 564 case VPInstruction::ICmpULE: { 565 Value *IV = State.get(getOperand(0), Part); 566 Value *TC = State.get(getOperand(1), Part); 567 Value *V = Builder.CreateICmpULE(IV, TC); 568 State.set(this, V, Part); 569 break; 570 } 571 case Instruction::Select: { 572 Value *Cond = State.get(getOperand(0), Part); 573 Value *Op1 = State.get(getOperand(1), Part); 574 Value *Op2 = State.get(getOperand(2), Part); 575 Value *V = Builder.CreateSelect(Cond, Op1, Op2); 576 State.set(this, V, Part); 577 break; 578 } 579 case VPInstruction::ActiveLaneMask: { 580 // Get first lane of vector induction variable. 581 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 582 // Get the original loop tripcount. 583 Value *ScalarTC = State.TripCount; 584 585 auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 586 auto *PredTy = FixedVectorType::get(Int1Ty, State.VF.getKnownMinValue()); 587 Instruction *Call = Builder.CreateIntrinsic( 588 Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, 589 {VIVElem0, ScalarTC}, nullptr, "active.lane.mask"); 590 State.set(this, Call, Part); 591 break; 592 } 593 default: 594 llvm_unreachable("Unsupported opcode for instruction"); 595 } 596 } 597 598 void VPInstruction::execute(VPTransformState &State) { 599 assert(!State.Instance && "VPInstruction executing an Instance"); 600 for (unsigned Part = 0; Part < State.UF; ++Part) 601 generateInstruction(State, Part); 602 } 603 604 void VPInstruction::dump() const { 605 VPSlotTracker SlotTracker(getParent()->getPlan()); 606 print(dbgs(), "", SlotTracker); 607 } 608 609 void VPInstruction::print(raw_ostream &O, const Twine &Indent, 610 VPSlotTracker &SlotTracker) const { 611 O << Indent << "EMIT "; 612 613 if (hasResult()) { 614 printAsOperand(O, SlotTracker); 615 O << " = "; 616 } 617 618 switch (getOpcode()) { 619 case VPInstruction::Not: 620 O << "not"; 621 break; 622 case VPInstruction::ICmpULE: 623 O << "icmp ule"; 624 break; 625 case VPInstruction::SLPLoad: 626 O << "combined load"; 627 break; 628 case VPInstruction::SLPStore: 629 O << "combined store"; 630 break; 631 case VPInstruction::ActiveLaneMask: 632 O << "active lane mask"; 633 break; 634 635 default: 636 O << Instruction::getOpcodeName(getOpcode()); 637 } 638 639 for (const VPValue *Operand : operands()) { 640 O << " "; 641 Operand->printAsOperand(O, SlotTracker); 642 } 643 } 644 645 /// Generate the code inside the body of the vectorized loop. Assumes a single 646 /// LoopVectorBody basic-block was created for this. Introduce additional 647 /// basic-blocks as needed, and fill them all. 648 void VPlan::execute(VPTransformState *State) { 649 // -1. Check if the backedge taken count is needed, and if so build it. 650 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { 651 Value *TC = State->TripCount; 652 IRBuilder<> Builder(State->CFG.PrevBB->getTerminator()); 653 auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1), 654 "trip.count.minus.1"); 655 auto VF = State->VF; 656 Value *VTCMO = 657 VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast"); 658 for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) 659 State->set(BackedgeTakenCount, VTCMO, Part); 660 } 661 662 // 0. Set the reverse mapping from VPValues to Values for code generation. 663 for (auto &Entry : Value2VPValue) 664 State->VPValue2Value[Entry.second] = Entry.first; 665 666 BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB; 667 BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor(); 668 assert(VectorHeaderBB && "Loop preheader does not have a single successor."); 669 670 // 1. Make room to generate basic-blocks inside loop body if needed. 671 BasicBlock *VectorLatchBB = VectorHeaderBB->splitBasicBlock( 672 VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch"); 673 Loop *L = State->LI->getLoopFor(VectorHeaderBB); 674 L->addBasicBlockToLoop(VectorLatchBB, *State->LI); 675 // Remove the edge between Header and Latch to allow other connections. 676 // Temporarily terminate with unreachable until CFG is rewired. 677 // Note: this asserts the generated code's assumption that 678 // getFirstInsertionPt() can be dereferenced into an Instruction. 679 VectorHeaderBB->getTerminator()->eraseFromParent(); 680 State->Builder.SetInsertPoint(VectorHeaderBB); 681 UnreachableInst *Terminator = State->Builder.CreateUnreachable(); 682 State->Builder.SetInsertPoint(Terminator); 683 684 // 2. Generate code in loop body. 685 State->CFG.PrevVPBB = nullptr; 686 State->CFG.PrevBB = VectorHeaderBB; 687 State->CFG.LastBB = VectorLatchBB; 688 689 for (VPBlockBase *Block : depth_first(Entry)) 690 Block->execute(State); 691 692 // Setup branch terminator successors for VPBBs in VPBBsToFix based on 693 // VPBB's successors. 694 for (auto VPBB : State->CFG.VPBBsToFix) { 695 assert(EnableVPlanNativePath && 696 "Unexpected VPBBsToFix in non VPlan-native path"); 697 BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB]; 698 assert(BB && "Unexpected null basic block for VPBB"); 699 700 unsigned Idx = 0; 701 auto *BBTerminator = BB->getTerminator(); 702 703 for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) { 704 VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock(); 705 BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]); 706 ++Idx; 707 } 708 } 709 710 // 3. Merge the temporary latch created with the last basic-block filled. 711 BasicBlock *LastBB = State->CFG.PrevBB; 712 // Connect LastBB to VectorLatchBB to facilitate their merge. 713 assert((EnableVPlanNativePath || 714 isa<UnreachableInst>(LastBB->getTerminator())) && 715 "Expected InnerLoop VPlan CFG to terminate with unreachable"); 716 assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) && 717 "Expected VPlan CFG to terminate with branch in NativePath"); 718 LastBB->getTerminator()->eraseFromParent(); 719 BranchInst::Create(VectorLatchBB, LastBB); 720 721 // Merge LastBB with Latch. 722 bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI); 723 (void)Merged; 724 assert(Merged && "Could not merge last basic block with latch."); 725 VectorLatchBB = LastBB; 726 727 // We do not attempt to preserve DT for outer loop vectorization currently. 728 if (!EnableVPlanNativePath) 729 updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB, 730 L->getExitBlock()); 731 } 732 733 // TODO: Wrap those in #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)/#endif. 734 LLVM_DUMP_METHOD 735 void VPlan::print(raw_ostream &O) const { 736 VPSlotTracker SlotTracker(this); 737 738 O << "VPlan {"; 739 for (const VPBlockBase *Block : depth_first(getEntry())) { 740 O << '\n'; 741 Block->print(O, "", SlotTracker); 742 } 743 O << "}\n"; 744 } 745 746 LLVM_DUMP_METHOD 747 void VPlan::printDOT(raw_ostream &O) const { 748 VPlanPrinter Printer(O, *this); 749 Printer.dump(); 750 } 751 752 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 753 LLVM_DUMP_METHOD 754 void VPlan::dump() const { print(dbgs()); } 755 #endif 756 757 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, 758 BasicBlock *LoopLatchBB, 759 BasicBlock *LoopExitBB) { 760 BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor(); 761 assert(LoopHeaderBB && "Loop preheader does not have a single successor."); 762 // The vector body may be more than a single basic-block by this point. 763 // Update the dominator tree information inside the vector body by propagating 764 // it from header to latch, expecting only triangular control-flow, if any. 765 BasicBlock *PostDomSucc = nullptr; 766 for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) { 767 // Get the list of successors of this block. 768 std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB)); 769 assert(Succs.size() <= 2 && 770 "Basic block in vector loop has more than 2 successors."); 771 PostDomSucc = Succs[0]; 772 if (Succs.size() == 1) { 773 assert(PostDomSucc->getSinglePredecessor() && 774 "PostDom successor has more than one predecessor."); 775 DT->addNewBlock(PostDomSucc, BB); 776 continue; 777 } 778 BasicBlock *InterimSucc = Succs[1]; 779 if (PostDomSucc->getSingleSuccessor() == InterimSucc) { 780 PostDomSucc = Succs[1]; 781 InterimSucc = Succs[0]; 782 } 783 assert(InterimSucc->getSingleSuccessor() == PostDomSucc && 784 "One successor of a basic block does not lead to the other."); 785 assert(InterimSucc->getSinglePredecessor() && 786 "Interim successor has more than one predecessor."); 787 assert(PostDomSucc->hasNPredecessors(2) && 788 "PostDom successor has more than two predecessors."); 789 DT->addNewBlock(InterimSucc, BB); 790 DT->addNewBlock(PostDomSucc, BB); 791 } 792 // Latch block is a new dominator for the loop exit. 793 DT->changeImmediateDominator(LoopExitBB, LoopLatchBB); 794 assert(DT->verify(DominatorTree::VerificationLevel::Fast)); 795 } 796 797 const Twine VPlanPrinter::getUID(const VPBlockBase *Block) { 798 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") + 799 Twine(getOrCreateBID(Block)); 800 } 801 802 const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) { 803 const std::string &Name = Block->getName(); 804 if (!Name.empty()) 805 return Name; 806 return "VPB" + Twine(getOrCreateBID(Block)); 807 } 808 809 void VPlanPrinter::dump() { 810 Depth = 1; 811 bumpIndent(0); 812 OS << "digraph VPlan {\n"; 813 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan"; 814 if (!Plan.getName().empty()) 815 OS << "\\n" << DOT::EscapeString(Plan.getName()); 816 if (Plan.BackedgeTakenCount) { 817 OS << ", where:\\n"; 818 Plan.BackedgeTakenCount->print(OS, SlotTracker); 819 OS << " := BackedgeTakenCount"; 820 } 821 OS << "\"]\n"; 822 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n"; 823 OS << "edge [fontname=Courier, fontsize=30]\n"; 824 OS << "compound=true\n"; 825 826 for (const VPBlockBase *Block : depth_first(Plan.getEntry())) 827 dumpBlock(Block); 828 829 OS << "}\n"; 830 } 831 832 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { 833 if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block)) 834 dumpBasicBlock(BasicBlock); 835 else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 836 dumpRegion(Region); 837 else 838 llvm_unreachable("Unsupported kind of VPBlock."); 839 } 840 841 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, 842 bool Hidden, const Twine &Label) { 843 // Due to "dot" we print an edge between two regions as an edge between the 844 // exit basic block and the entry basic of the respective regions. 845 const VPBlockBase *Tail = From->getExitBasicBlock(); 846 const VPBlockBase *Head = To->getEntryBasicBlock(); 847 OS << Indent << getUID(Tail) << " -> " << getUID(Head); 848 OS << " [ label=\"" << Label << '\"'; 849 if (Tail != From) 850 OS << " ltail=" << getUID(From); 851 if (Head != To) 852 OS << " lhead=" << getUID(To); 853 if (Hidden) 854 OS << "; splines=none"; 855 OS << "]\n"; 856 } 857 858 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) { 859 auto &Successors = Block->getSuccessors(); 860 if (Successors.size() == 1) 861 drawEdge(Block, Successors.front(), false, ""); 862 else if (Successors.size() == 2) { 863 drawEdge(Block, Successors.front(), false, "T"); 864 drawEdge(Block, Successors.back(), false, "F"); 865 } else { 866 unsigned SuccessorNumber = 0; 867 for (auto *Successor : Successors) 868 drawEdge(Block, Successor, false, Twine(SuccessorNumber++)); 869 } 870 } 871 872 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { 873 // Implement dot-formatted dump by performing plain-text dump into the 874 // temporary storage followed by some post-processing. 875 OS << Indent << getUID(BasicBlock) << " [label =\n"; 876 bumpIndent(1); 877 std::string Str; 878 raw_string_ostream SS(Str); 879 // Use no indentation as we need to wrap the lines into quotes ourselves. 880 BasicBlock->print(SS, "", SlotTracker); 881 882 // We need to process each line of the output separately, so split 883 // single-string plain-text dump. 884 SmallVector<StringRef, 0> Lines; 885 StringRef(Str).rtrim('\n').split(Lines, "\n"); 886 887 auto EmitLine = [&](StringRef Line, StringRef Suffix) { 888 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix; 889 }; 890 891 // Don't need the "+" after the last line. 892 for (auto Line : make_range(Lines.begin(), Lines.end() - 1)) 893 EmitLine(Line, " +\n"); 894 EmitLine(Lines.back(), "\n"); 895 896 bumpIndent(-1); 897 OS << Indent << "]\n"; 898 899 dumpEdges(BasicBlock); 900 } 901 902 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { 903 OS << Indent << "subgraph " << getUID(Region) << " {\n"; 904 bumpIndent(1); 905 OS << Indent << "fontname=Courier\n" 906 << Indent << "label=\"" 907 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ") 908 << DOT::EscapeString(Region->getName()) << "\"\n"; 909 // Dump the blocks of the region. 910 assert(Region->getEntry() && "Region contains no inner blocks."); 911 for (const VPBlockBase *Block : depth_first(Region->getEntry())) 912 dumpBlock(Block); 913 bumpIndent(-1); 914 OS << Indent << "}\n"; 915 dumpEdges(Region); 916 } 917 918 void VPlanIngredient::print(raw_ostream &O) const { 919 if (auto *Inst = dyn_cast<Instruction>(V)) { 920 if (!Inst->getType()->isVoidTy()) { 921 Inst->printAsOperand(O, false); 922 O << " = "; 923 } 924 O << Inst->getOpcodeName() << " "; 925 unsigned E = Inst->getNumOperands(); 926 if (E > 0) { 927 Inst->getOperand(0)->printAsOperand(O, false); 928 for (unsigned I = 1; I < E; ++I) 929 Inst->getOperand(I)->printAsOperand(O << ", ", false); 930 } 931 } else // !Inst 932 V->printAsOperand(O, false); 933 } 934 935 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 936 VPSlotTracker &SlotTracker) const { 937 O << Indent << "WIDEN-CALL "; 938 939 auto *CI = cast<CallInst>(getUnderlyingInstr()); 940 if (CI->getType()->isVoidTy()) 941 O << "void "; 942 else { 943 printAsOperand(O, SlotTracker); 944 O << " = "; 945 } 946 947 O << "call @" << CI->getCalledFunction()->getName() << "("; 948 printOperands(O, SlotTracker); 949 O << ")"; 950 } 951 952 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 953 VPSlotTracker &SlotTracker) const { 954 O << Indent << "WIDEN-SELECT "; 955 printAsOperand(O, SlotTracker); 956 O << " = select "; 957 getOperand(0)->printAsOperand(O, SlotTracker); 958 O << ", "; 959 getOperand(1)->printAsOperand(O, SlotTracker); 960 O << ", "; 961 getOperand(2)->printAsOperand(O, SlotTracker); 962 O << (InvariantCond ? " (condition is loop invariant)" : ""); 963 } 964 965 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 966 VPSlotTracker &SlotTracker) const { 967 O << Indent << "WIDEN "; 968 printAsOperand(O, SlotTracker); 969 O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; 970 printOperands(O, SlotTracker); 971 } 972 973 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 974 VPSlotTracker &SlotTracker) const { 975 O << Indent << "WIDEN-INDUCTION"; 976 if (getTruncInst()) { 977 O << "\\l\""; 978 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 979 O << " +\n" << Indent << "\" "; 980 getVPValue(0)->printAsOperand(O, SlotTracker); 981 } else 982 O << " " << VPlanIngredient(IV); 983 } 984 985 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 986 VPSlotTracker &SlotTracker) const { 987 O << Indent << "WIDEN-GEP "; 988 O << (IsPtrLoopInvariant ? "Inv" : "Var"); 989 size_t IndicesNumber = IsIndexLoopInvariant.size(); 990 for (size_t I = 0; I < IndicesNumber; ++I) 991 O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]"; 992 993 O << " "; 994 printAsOperand(O, SlotTracker); 995 O << " = getelementptr "; 996 printOperands(O, SlotTracker); 997 } 998 999 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1000 VPSlotTracker &SlotTracker) const { 1001 O << Indent << "WIDEN-PHI " << VPlanIngredient(getUnderlyingValue()); 1002 } 1003 1004 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 1005 VPSlotTracker &SlotTracker) const { 1006 O << Indent << "BLEND "; 1007 Phi->printAsOperand(O, false); 1008 O << " ="; 1009 if (getNumIncomingValues() == 1) { 1010 // Not a User of any mask: not really blending, this is a 1011 // single-predecessor phi. 1012 O << " "; 1013 getIncomingValue(0)->printAsOperand(O, SlotTracker); 1014 } else { 1015 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 1016 O << " "; 1017 getIncomingValue(I)->printAsOperand(O, SlotTracker); 1018 O << "/"; 1019 getMask(I)->printAsOperand(O, SlotTracker); 1020 } 1021 } 1022 } 1023 1024 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 1025 VPSlotTracker &SlotTracker) const { 1026 O << Indent << "REDUCE "; 1027 printAsOperand(O, SlotTracker); 1028 O << " = "; 1029 getChainOp()->printAsOperand(O, SlotTracker); 1030 O << " + reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) 1031 << " ("; 1032 getVecOp()->printAsOperand(O, SlotTracker); 1033 if (getCondOp()) { 1034 O << ", "; 1035 getCondOp()->printAsOperand(O, SlotTracker); 1036 } 1037 O << ")"; 1038 } 1039 1040 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 1041 VPSlotTracker &SlotTracker) const { 1042 O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 1043 1044 if (!getUnderlyingInstr()->getType()->isVoidTy()) { 1045 printAsOperand(O, SlotTracker); 1046 O << " = "; 1047 } 1048 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; 1049 printOperands(O, SlotTracker); 1050 1051 if (AlsoPack) 1052 O << " (S->V)"; 1053 } 1054 1055 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1056 VPSlotTracker &SlotTracker) const { 1057 O << Indent << "PHI-PREDICATED-INSTRUCTION "; 1058 printAsOperand(O, SlotTracker); 1059 O << " = "; 1060 printOperands(O, SlotTracker); 1061 } 1062 1063 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, 1064 VPSlotTracker &SlotTracker) const { 1065 O << Indent << "WIDEN "; 1066 1067 if (!isStore()) { 1068 getVPValue()->printAsOperand(O, SlotTracker); 1069 O << " = "; 1070 } 1071 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; 1072 1073 printOperands(O, SlotTracker); 1074 } 1075 1076 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 1077 Value *CanonicalIV = State.CanonicalIV; 1078 Type *STy = CanonicalIV->getType(); 1079 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 1080 ElementCount VF = State.VF; 1081 assert(!VF.isScalable() && "the code following assumes non scalables ECs"); 1082 Value *VStart = VF.isScalar() 1083 ? CanonicalIV 1084 : Builder.CreateVectorSplat(VF.getKnownMinValue(), 1085 CanonicalIV, "broadcast"); 1086 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 1087 SmallVector<Constant *, 8> Indices; 1088 for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) 1089 Indices.push_back( 1090 ConstantInt::get(STy, Part * VF.getKnownMinValue() + Lane)); 1091 // If VF == 1, there is only one iteration in the loop above, thus the 1092 // element pushed back into Indices is ConstantInt::get(STy, Part) 1093 Constant *VStep = 1094 VF.isScalar() ? Indices.back() : ConstantVector::get(Indices); 1095 // Add the consecutive indices to the vector value. 1096 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 1097 State.set(getVPValue(), CanonicalVectorIV, Part); 1098 } 1099 } 1100 1101 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 1102 VPSlotTracker &SlotTracker) const { 1103 O << Indent << "EMIT "; 1104 getVPValue()->printAsOperand(O, SlotTracker); 1105 O << " = WIDEN-CANONICAL-INDUCTION"; 1106 } 1107 1108 template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT); 1109 1110 void VPValue::replaceAllUsesWith(VPValue *New) { 1111 for (unsigned J = 0; J < getNumUsers();) { 1112 VPUser *User = Users[J]; 1113 unsigned NumUsers = getNumUsers(); 1114 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) 1115 if (User->getOperand(I) == this) 1116 User->setOperand(I, New); 1117 // If a user got removed after updating the current user, the next user to 1118 // update will be moved to the current position, so we only need to 1119 // increment the index if the number of users did not change. 1120 if (NumUsers == getNumUsers()) 1121 J++; 1122 } 1123 } 1124 1125 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const { 1126 if (const Value *UV = getUnderlyingValue()) { 1127 OS << "ir<"; 1128 UV->printAsOperand(OS, false); 1129 OS << ">"; 1130 return; 1131 } 1132 1133 unsigned Slot = Tracker.getSlot(this); 1134 if (Slot == unsigned(-1)) 1135 OS << "<badref>"; 1136 else 1137 OS << "vp<%" << Tracker.getSlot(this) << ">"; 1138 } 1139 1140 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const { 1141 interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) { 1142 Op->printAsOperand(O, SlotTracker); 1143 }); 1144 } 1145 1146 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region, 1147 Old2NewTy &Old2New, 1148 InterleavedAccessInfo &IAI) { 1149 ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry()); 1150 for (VPBlockBase *Base : RPOT) { 1151 visitBlock(Base, Old2New, IAI); 1152 } 1153 } 1154 1155 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, 1156 InterleavedAccessInfo &IAI) { 1157 if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) { 1158 for (VPRecipeBase &VPI : *VPBB) { 1159 if (isa<VPWidenPHIRecipe>(&VPI)) 1160 continue; 1161 assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions"); 1162 auto *VPInst = cast<VPInstruction>(&VPI); 1163 auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue()); 1164 auto *IG = IAI.getInterleaveGroup(Inst); 1165 if (!IG) 1166 continue; 1167 1168 auto NewIGIter = Old2New.find(IG); 1169 if (NewIGIter == Old2New.end()) 1170 Old2New[IG] = new InterleaveGroup<VPInstruction>( 1171 IG->getFactor(), IG->isReverse(), IG->getAlign()); 1172 1173 if (Inst == IG->getInsertPos()) 1174 Old2New[IG]->setInsertPos(VPInst); 1175 1176 InterleaveGroupMap[VPInst] = Old2New[IG]; 1177 InterleaveGroupMap[VPInst]->insertMember( 1178 VPInst, IG->getIndex(Inst), 1179 Align(IG->isReverse() ? (-1) * int(IG->getFactor()) 1180 : IG->getFactor())); 1181 } 1182 } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 1183 visitRegion(Region, Old2New, IAI); 1184 else 1185 llvm_unreachable("Unsupported kind of VPBlock."); 1186 } 1187 1188 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan, 1189 InterleavedAccessInfo &IAI) { 1190 Old2NewTy Old2New; 1191 visitRegion(cast<VPRegionBlock>(Plan.getEntry()), Old2New, IAI); 1192 } 1193 1194 void VPSlotTracker::assignSlot(const VPValue *V) { 1195 assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!"); 1196 Slots[V] = NextSlot++; 1197 } 1198 1199 void VPSlotTracker::assignSlots(const VPBlockBase *VPBB) { 1200 if (auto *Region = dyn_cast<VPRegionBlock>(VPBB)) 1201 assignSlots(Region); 1202 else 1203 assignSlots(cast<VPBasicBlock>(VPBB)); 1204 } 1205 1206 void VPSlotTracker::assignSlots(const VPRegionBlock *Region) { 1207 ReversePostOrderTraversal<const VPBlockBase *> RPOT(Region->getEntry()); 1208 for (const VPBlockBase *Block : RPOT) 1209 assignSlots(Block); 1210 } 1211 1212 void VPSlotTracker::assignSlots(const VPBasicBlock *VPBB) { 1213 for (const VPRecipeBase &Recipe : *VPBB) { 1214 for (VPValue *Def : Recipe.definedValues()) 1215 assignSlot(Def); 1216 } 1217 } 1218 1219 void VPSlotTracker::assignSlots(const VPlan &Plan) { 1220 1221 for (const VPValue *V : Plan.VPExternalDefs) 1222 assignSlot(V); 1223 1224 if (Plan.BackedgeTakenCount) 1225 assignSlot(Plan.BackedgeTakenCount); 1226 1227 ReversePostOrderTraversal<const VPBlockBase *> RPOT(Plan.getEntry()); 1228 for (const VPBlockBase *Block : RPOT) 1229 assignSlots(Block); 1230 } 1231