1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This is the LLVM vectorization plan. It represents a candidate for 11 /// vectorization, allowing to plan and optimize how to vectorize a given loop 12 /// before generating LLVM-IR. 13 /// The vectorizer uses vectorization plans to estimate the costs of potential 14 /// candidates and if profitable to execute the desired plan, generating vector 15 /// LLVM-IR code. 16 /// 17 //===----------------------------------------------------------------------===// 18 19 #include "VPlan.h" 20 #include "VPlanDominatorTree.h" 21 #include "llvm/ADT/DepthFirstIterator.h" 22 #include "llvm/ADT/PostOrderIterator.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/Twine.h" 26 #include "llvm/Analysis/IVDescriptors.h" 27 #include "llvm/Analysis/LoopInfo.h" 28 #include "llvm/IR/BasicBlock.h" 29 #include "llvm/IR/CFG.h" 30 #include "llvm/IR/IRBuilder.h" 31 #include "llvm/IR/Instruction.h" 32 #include "llvm/IR/Instructions.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/IR/Value.h" 35 #include "llvm/Support/Casting.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include "llvm/Support/GenericDomTreeConstruction.h" 40 #include "llvm/Support/GraphWriter.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 43 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 44 #include <cassert> 45 #include <string> 46 #include <vector> 47 48 using namespace llvm; 49 extern cl::opt<bool> EnableVPlanNativePath; 50 51 #define DEBUG_TYPE "vplan" 52 53 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 54 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) { 55 const VPInstruction *Instr = dyn_cast<VPInstruction>(&V); 56 VPSlotTracker SlotTracker( 57 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); 58 V.print(OS, SlotTracker); 59 return OS; 60 } 61 #endif 62 63 Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder, 64 const ElementCount &VF) const { 65 switch (LaneKind) { 66 case VPLane::Kind::ScalableLast: 67 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane 68 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF), 69 Builder.getInt32(VF.getKnownMinValue() - Lane)); 70 case VPLane::Kind::First: 71 return Builder.getInt32(Lane); 72 } 73 llvm_unreachable("Unknown lane kind"); 74 } 75 76 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def) 77 : SubclassID(SC), UnderlyingVal(UV), Def(Def) { 78 if (Def) 79 Def->addDefinedValue(this); 80 } 81 82 VPValue::~VPValue() { 83 assert(Users.empty() && "trying to delete a VPValue with remaining users"); 84 if (Def) 85 Def->removeDefinedValue(this); 86 } 87 88 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 89 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const { 90 if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def)) 91 R->print(OS, "", SlotTracker); 92 else 93 printAsOperand(OS, SlotTracker); 94 } 95 96 void VPValue::dump() const { 97 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def); 98 VPSlotTracker SlotTracker( 99 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); 100 print(dbgs(), SlotTracker); 101 dbgs() << "\n"; 102 } 103 104 void VPDef::dump() const { 105 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this); 106 VPSlotTracker SlotTracker( 107 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); 108 print(dbgs(), "", SlotTracker); 109 dbgs() << "\n"; 110 } 111 #endif 112 113 // Get the top-most entry block of \p Start. This is the entry block of the 114 // containing VPlan. This function is templated to support both const and non-const blocks 115 template <typename T> static T *getPlanEntry(T *Start) { 116 T *Next = Start; 117 T *Current = Start; 118 while ((Next = Next->getParent())) 119 Current = Next; 120 121 SmallSetVector<T *, 8> WorkList; 122 WorkList.insert(Current); 123 124 for (unsigned i = 0; i < WorkList.size(); i++) { 125 T *Current = WorkList[i]; 126 if (Current->getNumPredecessors() == 0) 127 return Current; 128 auto &Predecessors = Current->getPredecessors(); 129 WorkList.insert(Predecessors.begin(), Predecessors.end()); 130 } 131 132 llvm_unreachable("VPlan without any entry node without predecessors"); 133 } 134 135 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; } 136 137 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; } 138 139 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly. 140 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const { 141 const VPBlockBase *Block = this; 142 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 143 Block = Region->getEntry(); 144 return cast<VPBasicBlock>(Block); 145 } 146 147 VPBasicBlock *VPBlockBase::getEntryBasicBlock() { 148 VPBlockBase *Block = this; 149 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 150 Block = Region->getEntry(); 151 return cast<VPBasicBlock>(Block); 152 } 153 154 void VPBlockBase::setPlan(VPlan *ParentPlan) { 155 assert(ParentPlan->getEntry() == this && 156 "Can only set plan on its entry block."); 157 Plan = ParentPlan; 158 } 159 160 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly. 161 const VPBasicBlock *VPBlockBase::getExitBasicBlock() const { 162 const VPBlockBase *Block = this; 163 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 164 Block = Region->getExit(); 165 return cast<VPBasicBlock>(Block); 166 } 167 168 VPBasicBlock *VPBlockBase::getExitBasicBlock() { 169 VPBlockBase *Block = this; 170 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 171 Block = Region->getExit(); 172 return cast<VPBasicBlock>(Block); 173 } 174 175 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { 176 if (!Successors.empty() || !Parent) 177 return this; 178 assert(Parent->getExit() == this && 179 "Block w/o successors not the exit of its parent."); 180 return Parent->getEnclosingBlockWithSuccessors(); 181 } 182 183 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { 184 if (!Predecessors.empty() || !Parent) 185 return this; 186 assert(Parent->getEntry() == this && 187 "Block w/o predecessors not the entry of its parent."); 188 return Parent->getEnclosingBlockWithPredecessors(); 189 } 190 191 VPValue *VPBlockBase::getCondBit() { 192 return CondBitUser.getSingleOperandOrNull(); 193 } 194 195 const VPValue *VPBlockBase::getCondBit() const { 196 return CondBitUser.getSingleOperandOrNull(); 197 } 198 199 void VPBlockBase::setCondBit(VPValue *CV) { CondBitUser.resetSingleOpUser(CV); } 200 201 VPValue *VPBlockBase::getPredicate() { 202 return PredicateUser.getSingleOperandOrNull(); 203 } 204 205 const VPValue *VPBlockBase::getPredicate() const { 206 return PredicateUser.getSingleOperandOrNull(); 207 } 208 209 void VPBlockBase::setPredicate(VPValue *CV) { 210 PredicateUser.resetSingleOpUser(CV); 211 } 212 213 void VPBlockBase::deleteCFG(VPBlockBase *Entry) { 214 SmallVector<VPBlockBase *, 8> Blocks(depth_first(Entry)); 215 216 for (VPBlockBase *Block : Blocks) 217 delete Block; 218 } 219 220 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() { 221 iterator It = begin(); 222 while (It != end() && It->isPhi()) 223 It++; 224 return It; 225 } 226 227 Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) { 228 if (!Def->getDef()) 229 return Def->getLiveInIRValue(); 230 231 if (hasScalarValue(Def, Instance)) { 232 return Data 233 .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)]; 234 } 235 236 assert(hasVectorValue(Def, Instance.Part)); 237 auto *VecPart = Data.PerPartOutput[Def][Instance.Part]; 238 if (!VecPart->getType()->isVectorTy()) { 239 assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar"); 240 return VecPart; 241 } 242 // TODO: Cache created scalar values. 243 Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF); 244 auto *Extract = Builder.CreateExtractElement(VecPart, Lane); 245 // set(Def, Extract, Instance); 246 return Extract; 247 } 248 249 BasicBlock * 250 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { 251 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks. 252 // Pred stands for Predessor. Prev stands for Previous - last visited/created. 253 BasicBlock *PrevBB = CFG.PrevBB; 254 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(), 255 PrevBB->getParent(), CFG.ExitBB); 256 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); 257 258 // Hook up the new basic block to its predecessors. 259 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { 260 VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock(); 261 auto &PredVPSuccessors = PredVPBB->getSuccessors(); 262 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; 263 264 // In outer loop vectorization scenario, the predecessor BBlock may not yet 265 // be visited(backedge). Mark the VPBasicBlock for fixup at the end of 266 // vectorization. We do not encounter this case in inner loop vectorization 267 // as we start out by building a loop skeleton with the vector loop header 268 // and latch blocks. As a result, we never enter this function for the 269 // header block in the non VPlan-native path. 270 if (!PredBB) { 271 assert(EnableVPlanNativePath && 272 "Unexpected null predecessor in non VPlan-native path"); 273 CFG.VPBBsToFix.push_back(PredVPBB); 274 continue; 275 } 276 277 assert(PredBB && "Predecessor basic-block not found building successor."); 278 auto *PredBBTerminator = PredBB->getTerminator(); 279 LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); 280 if (isa<UnreachableInst>(PredBBTerminator)) { 281 assert(PredVPSuccessors.size() == 1 && 282 "Predecessor ending w/o branch must have single successor."); 283 PredBBTerminator->eraseFromParent(); 284 BranchInst::Create(NewBB, PredBB); 285 } else { 286 assert(PredVPSuccessors.size() == 2 && 287 "Predecessor ending with branch must have two successors."); 288 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; 289 assert(!PredBBTerminator->getSuccessor(idx) && 290 "Trying to reset an existing successor block."); 291 PredBBTerminator->setSuccessor(idx, NewBB); 292 } 293 } 294 return NewBB; 295 } 296 297 void VPBasicBlock::execute(VPTransformState *State) { 298 bool Replica = State->Instance && !State->Instance->isFirstIteration(); 299 VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; 300 VPBlockBase *SingleHPred = nullptr; 301 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. 302 303 // 1. Create an IR basic block, or reuse the last one if possible. 304 // The last IR basic block is reused, as an optimization, in three cases: 305 // A. the first VPBB reuses the loop header BB - when PrevVPBB is null; 306 // B. when the current VPBB has a single (hierarchical) predecessor which 307 // is PrevVPBB and the latter has a single (hierarchical) successor; and 308 // C. when the current VPBB is an entry of a region replica - where PrevVPBB 309 // is the exit of this region from a previous instance, or the predecessor 310 // of this region. 311 if (PrevVPBB && /* A */ 312 !((SingleHPred = getSingleHierarchicalPredecessor()) && 313 SingleHPred->getExitBasicBlock() == PrevVPBB && 314 PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */ 315 !(Replica && getPredecessors().empty())) { /* C */ 316 NewBB = createEmptyBasicBlock(State->CFG); 317 State->Builder.SetInsertPoint(NewBB); 318 // Temporarily terminate with unreachable until CFG is rewired. 319 UnreachableInst *Terminator = State->Builder.CreateUnreachable(); 320 // Register NewBB in its loop. In innermost loops its the same for all BB's. 321 State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI); 322 State->Builder.SetInsertPoint(Terminator); 323 State->CFG.PrevBB = NewBB; 324 } else { 325 // If the current VPBB is re-using the header block from skeleton creation, 326 // move it to the new vector loop. 327 VPBasicBlock *HeaderVPBB = 328 getPlan()->getVectorLoopRegion()->getEntryBasicBlock(); 329 if (EnableVPlanNativePath) 330 HeaderVPBB = cast<VPBasicBlock>(HeaderVPBB->getSingleSuccessor()); 331 if (this == HeaderVPBB) { 332 assert(State->CurrentVectorLoop); 333 State->LI->removeBlock(State->CFG.PrevBB); 334 State->CurrentVectorLoop->addBasicBlockToLoop(State->CFG.PrevBB, 335 *State->LI); 336 } 337 } 338 339 // 2. Fill the IR basic block with IR instructions. 340 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() 341 << " in BB:" << NewBB->getName() << '\n'); 342 343 State->CFG.VPBB2IRBB[this] = NewBB; 344 State->CFG.PrevVPBB = this; 345 346 for (VPRecipeBase &Recipe : Recipes) 347 Recipe.execute(*State); 348 349 VPValue *CBV; 350 if (EnableVPlanNativePath && (CBV = getCondBit())) { 351 assert(CBV->getUnderlyingValue() && 352 "Unexpected null underlying value for condition bit"); 353 354 // Condition bit value in a VPBasicBlock is used as the branch selector. In 355 // the VPlan-native path case, since all branches are uniform we generate a 356 // branch instruction using the condition value from vector lane 0 and dummy 357 // successors. The successors are fixed later when the successor blocks are 358 // visited. 359 Value *NewCond = State->get(CBV, {0, 0}); 360 361 // Replace the temporary unreachable terminator with the new conditional 362 // branch. 363 auto *CurrentTerminator = NewBB->getTerminator(); 364 assert(isa<UnreachableInst>(CurrentTerminator) && 365 "Expected to replace unreachable terminator with conditional " 366 "branch."); 367 auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond); 368 CondBr->setSuccessor(0, nullptr); 369 ReplaceInstWithInst(CurrentTerminator, CondBr); 370 } 371 372 LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB); 373 } 374 375 void VPBasicBlock::dropAllReferences(VPValue *NewValue) { 376 for (VPRecipeBase &R : Recipes) { 377 for (auto *Def : R.definedValues()) 378 Def->replaceAllUsesWith(NewValue); 379 380 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++) 381 R.setOperand(I, NewValue); 382 } 383 } 384 385 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) { 386 assert((SplitAt == end() || SplitAt->getParent() == this) && 387 "can only split at a position in the same block"); 388 389 SmallVector<VPBlockBase *, 2> Succs(successors()); 390 // First, disconnect the current block from its successors. 391 for (VPBlockBase *Succ : Succs) 392 VPBlockUtils::disconnectBlocks(this, Succ); 393 394 // Create new empty block after the block to split. 395 auto *SplitBlock = new VPBasicBlock(getName() + ".split"); 396 VPBlockUtils::insertBlockAfter(SplitBlock, this); 397 398 // Add successors for block to split to new block. 399 for (VPBlockBase *Succ : Succs) 400 VPBlockUtils::connectBlocks(SplitBlock, Succ); 401 402 // Finally, move the recipes starting at SplitAt to new block. 403 for (VPRecipeBase &ToMove : 404 make_early_inc_range(make_range(SplitAt, this->end()))) 405 ToMove.moveBefore(*SplitBlock, SplitBlock->end()); 406 407 return SplitBlock; 408 } 409 410 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 411 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const { 412 if (getSuccessors().empty()) { 413 O << Indent << "No successors\n"; 414 } else { 415 O << Indent << "Successor(s): "; 416 ListSeparator LS; 417 for (auto *Succ : getSuccessors()) 418 O << LS << Succ->getName(); 419 O << '\n'; 420 } 421 } 422 423 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent, 424 VPSlotTracker &SlotTracker) const { 425 O << Indent << getName() << ":\n"; 426 if (const VPValue *Pred = getPredicate()) { 427 O << Indent << "BlockPredicate:"; 428 Pred->printAsOperand(O, SlotTracker); 429 if (const auto *PredInst = dyn_cast<VPInstruction>(Pred)) 430 O << " (" << PredInst->getParent()->getName() << ")"; 431 O << '\n'; 432 } 433 434 auto RecipeIndent = Indent + " "; 435 for (const VPRecipeBase &Recipe : *this) { 436 Recipe.print(O, RecipeIndent, SlotTracker); 437 O << '\n'; 438 } 439 440 printSuccessors(O, Indent); 441 442 if (const VPValue *CBV = getCondBit()) { 443 O << Indent << "CondBit: "; 444 CBV->printAsOperand(O, SlotTracker); 445 if (const auto *CBI = dyn_cast<VPInstruction>(CBV)) 446 O << " (" << CBI->getParent()->getName() << ")"; 447 O << '\n'; 448 } 449 } 450 #endif 451 452 void VPRegionBlock::dropAllReferences(VPValue *NewValue) { 453 for (VPBlockBase *Block : depth_first(Entry)) 454 // Drop all references in VPBasicBlocks and replace all uses with 455 // DummyValue. 456 Block->dropAllReferences(NewValue); 457 } 458 459 void VPRegionBlock::execute(VPTransformState *State) { 460 ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry); 461 462 if (!isReplicator()) { 463 // Create and register the new vector loop. 464 Loop *PrevLoop = State->CurrentVectorLoop; 465 State->CurrentVectorLoop = State->LI->AllocateLoop(); 466 Loop *ParentLoop = State->LI->getLoopFor(State->CFG.VectorPreHeader); 467 468 // Insert the new loop into the loop nest and register the new basic blocks 469 // before calling any utilities such as SCEV that require valid LoopInfo. 470 if (ParentLoop) 471 ParentLoop->addChildLoop(State->CurrentVectorLoop); 472 else 473 State->LI->addTopLevelLoop(State->CurrentVectorLoop); 474 475 // Visit the VPBlocks connected to "this", starting from it. 476 for (VPBlockBase *Block : RPOT) { 477 if (EnableVPlanNativePath) { 478 // The inner loop vectorization path does not represent loop preheader 479 // and exit blocks as part of the VPlan. In the VPlan-native path, skip 480 // vectorizing loop preheader block. In future, we may replace this 481 // check with the check for loop preheader. 482 if (Block->getNumPredecessors() == 0) 483 continue; 484 485 // Skip vectorizing loop exit block. In future, we may replace this 486 // check with the check for loop exit. 487 if (Block->getNumSuccessors() == 0) 488 continue; 489 } 490 491 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); 492 Block->execute(State); 493 } 494 495 State->CurrentVectorLoop = PrevLoop; 496 return; 497 } 498 499 assert(!State->Instance && "Replicating a Region with non-null instance."); 500 501 // Enter replicating mode. 502 State->Instance = VPIteration(0, 0); 503 504 for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) { 505 State->Instance->Part = Part; 506 assert(!State->VF.isScalable() && "VF is assumed to be non scalable."); 507 for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; 508 ++Lane) { 509 State->Instance->Lane = VPLane(Lane, VPLane::Kind::First); 510 // Visit the VPBlocks connected to \p this, starting from it. 511 for (VPBlockBase *Block : RPOT) { 512 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); 513 Block->execute(State); 514 } 515 } 516 } 517 518 // Exit replicating mode. 519 State->Instance.reset(); 520 } 521 522 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 523 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, 524 VPSlotTracker &SlotTracker) const { 525 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {"; 526 auto NewIndent = Indent + " "; 527 for (auto *BlockBase : depth_first(Entry)) { 528 O << '\n'; 529 BlockBase->print(O, NewIndent, SlotTracker); 530 } 531 O << Indent << "}\n"; 532 533 printSuccessors(O, Indent); 534 } 535 #endif 536 537 bool VPRecipeBase::mayWriteToMemory() const { 538 switch (getVPDefID()) { 539 case VPWidenMemoryInstructionSC: { 540 return cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 541 } 542 case VPReplicateSC: 543 case VPWidenCallSC: 544 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 545 ->mayWriteToMemory(); 546 case VPBranchOnMaskSC: 547 return false; 548 case VPWidenIntOrFpInductionSC: 549 case VPWidenCanonicalIVSC: 550 case VPWidenPHISC: 551 case VPBlendSC: 552 case VPWidenSC: 553 case VPWidenGEPSC: 554 case VPReductionSC: 555 case VPWidenSelectSC: { 556 const Instruction *I = 557 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 558 (void)I; 559 assert((!I || !I->mayWriteToMemory()) && 560 "underlying instruction may write to memory"); 561 return false; 562 } 563 default: 564 return true; 565 } 566 } 567 568 bool VPRecipeBase::mayReadFromMemory() const { 569 switch (getVPDefID()) { 570 case VPWidenMemoryInstructionSC: { 571 return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); 572 } 573 case VPReplicateSC: 574 case VPWidenCallSC: 575 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) 576 ->mayReadFromMemory(); 577 case VPBranchOnMaskSC: 578 return false; 579 case VPWidenIntOrFpInductionSC: 580 case VPWidenCanonicalIVSC: 581 case VPWidenPHISC: 582 case VPBlendSC: 583 case VPWidenSC: 584 case VPWidenGEPSC: 585 case VPReductionSC: 586 case VPWidenSelectSC: { 587 const Instruction *I = 588 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 589 (void)I; 590 assert((!I || !I->mayReadFromMemory()) && 591 "underlying instruction may read from memory"); 592 return false; 593 } 594 default: 595 return true; 596 } 597 } 598 599 bool VPRecipeBase::mayHaveSideEffects() const { 600 switch (getVPDefID()) { 601 case VPBranchOnMaskSC: 602 return false; 603 case VPWidenIntOrFpInductionSC: 604 case VPWidenPointerInductionSC: 605 case VPWidenCanonicalIVSC: 606 case VPWidenPHISC: 607 case VPBlendSC: 608 case VPWidenSC: 609 case VPWidenGEPSC: 610 case VPReductionSC: 611 case VPWidenSelectSC: 612 case VPScalarIVStepsSC: { 613 const Instruction *I = 614 dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); 615 (void)I; 616 assert((!I || !I->mayHaveSideEffects()) && 617 "underlying instruction has side-effects"); 618 return false; 619 } 620 case VPReplicateSC: { 621 auto *R = cast<VPReplicateRecipe>(this); 622 return R->getUnderlyingInstr()->mayHaveSideEffects(); 623 } 624 default: 625 return true; 626 } 627 } 628 629 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { 630 assert(!Parent && "Recipe already in some VPBasicBlock"); 631 assert(InsertPos->getParent() && 632 "Insertion position not in any VPBasicBlock"); 633 Parent = InsertPos->getParent(); 634 Parent->getRecipeList().insert(InsertPos->getIterator(), this); 635 } 636 637 void VPRecipeBase::insertBefore(VPBasicBlock &BB, 638 iplist<VPRecipeBase>::iterator I) { 639 assert(!Parent && "Recipe already in some VPBasicBlock"); 640 assert(I == BB.end() || I->getParent() == &BB); 641 Parent = &BB; 642 BB.getRecipeList().insert(I, this); 643 } 644 645 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { 646 assert(!Parent && "Recipe already in some VPBasicBlock"); 647 assert(InsertPos->getParent() && 648 "Insertion position not in any VPBasicBlock"); 649 Parent = InsertPos->getParent(); 650 Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); 651 } 652 653 void VPRecipeBase::removeFromParent() { 654 assert(getParent() && "Recipe not in any VPBasicBlock"); 655 getParent()->getRecipeList().remove(getIterator()); 656 Parent = nullptr; 657 } 658 659 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { 660 assert(getParent() && "Recipe not in any VPBasicBlock"); 661 return getParent()->getRecipeList().erase(getIterator()); 662 } 663 664 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { 665 removeFromParent(); 666 insertAfter(InsertPos); 667 } 668 669 void VPRecipeBase::moveBefore(VPBasicBlock &BB, 670 iplist<VPRecipeBase>::iterator I) { 671 removeFromParent(); 672 insertBefore(BB, I); 673 } 674 675 void VPInstruction::generateInstruction(VPTransformState &State, 676 unsigned Part) { 677 IRBuilderBase &Builder = State.Builder; 678 Builder.SetCurrentDebugLocation(DL); 679 680 if (Instruction::isBinaryOp(getOpcode())) { 681 Value *A = State.get(getOperand(0), Part); 682 Value *B = State.get(getOperand(1), Part); 683 Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B); 684 State.set(this, V, Part); 685 return; 686 } 687 688 switch (getOpcode()) { 689 case VPInstruction::Not: { 690 Value *A = State.get(getOperand(0), Part); 691 Value *V = Builder.CreateNot(A); 692 State.set(this, V, Part); 693 break; 694 } 695 case VPInstruction::ICmpULE: { 696 Value *IV = State.get(getOperand(0), Part); 697 Value *TC = State.get(getOperand(1), Part); 698 Value *V = Builder.CreateICmpULE(IV, TC); 699 State.set(this, V, Part); 700 break; 701 } 702 case Instruction::Select: { 703 Value *Cond = State.get(getOperand(0), Part); 704 Value *Op1 = State.get(getOperand(1), Part); 705 Value *Op2 = State.get(getOperand(2), Part); 706 Value *V = Builder.CreateSelect(Cond, Op1, Op2); 707 State.set(this, V, Part); 708 break; 709 } 710 case VPInstruction::ActiveLaneMask: { 711 // Get first lane of vector induction variable. 712 Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); 713 // Get the original loop tripcount. 714 Value *ScalarTC = State.get(getOperand(1), Part); 715 716 auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); 717 auto *PredTy = VectorType::get(Int1Ty, State.VF); 718 Instruction *Call = Builder.CreateIntrinsic( 719 Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, 720 {VIVElem0, ScalarTC}, nullptr, "active.lane.mask"); 721 State.set(this, Call, Part); 722 break; 723 } 724 case VPInstruction::FirstOrderRecurrenceSplice: { 725 // Generate code to combine the previous and current values in vector v3. 726 // 727 // vector.ph: 728 // v_init = vector(..., ..., ..., a[-1]) 729 // br vector.body 730 // 731 // vector.body 732 // i = phi [0, vector.ph], [i+4, vector.body] 733 // v1 = phi [v_init, vector.ph], [v2, vector.body] 734 // v2 = a[i, i+1, i+2, i+3]; 735 // v3 = vector(v1(3), v2(0, 1, 2)) 736 737 // For the first part, use the recurrence phi (v1), otherwise v2. 738 auto *V1 = State.get(getOperand(0), 0); 739 Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); 740 if (!PartMinus1->getType()->isVectorTy()) { 741 State.set(this, PartMinus1, Part); 742 } else { 743 Value *V2 = State.get(getOperand(1), Part); 744 State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part); 745 } 746 break; 747 } 748 749 case VPInstruction::CanonicalIVIncrement: 750 case VPInstruction::CanonicalIVIncrementNUW: { 751 Value *Next = nullptr; 752 if (Part == 0) { 753 bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW; 754 auto *Phi = State.get(getOperand(0), 0); 755 // The loop step is equal to the vectorization factor (num of SIMD 756 // elements) times the unroll factor (num of SIMD instructions). 757 Value *Step = 758 createStepForVF(Builder, Phi->getType(), State.VF, State.UF); 759 Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false); 760 } else { 761 Next = State.get(this, 0); 762 } 763 764 State.set(this, Next, Part); 765 break; 766 } 767 case VPInstruction::BranchOnCount: { 768 if (Part != 0) 769 break; 770 // First create the compare. 771 Value *IV = State.get(getOperand(0), Part); 772 Value *TC = State.get(getOperand(1), Part); 773 Value *Cond = Builder.CreateICmpEQ(IV, TC); 774 775 // Now create the branch. 776 auto *Plan = getParent()->getPlan(); 777 VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); 778 VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); 779 if (Header->empty()) { 780 assert(EnableVPlanNativePath && 781 "empty entry block only expected in VPlanNativePath"); 782 Header = cast<VPBasicBlock>(Header->getSingleSuccessor()); 783 } 784 // TODO: Once the exit block is modeled in VPlan, use it instead of going 785 // through State.CFG.ExitBB. 786 BasicBlock *Exit = State.CFG.ExitBB; 787 788 Builder.CreateCondBr(Cond, Exit, State.CFG.VPBB2IRBB[Header]); 789 Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); 790 break; 791 } 792 default: 793 llvm_unreachable("Unsupported opcode for instruction"); 794 } 795 } 796 797 void VPInstruction::execute(VPTransformState &State) { 798 assert(!State.Instance && "VPInstruction executing an Instance"); 799 IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); 800 State.Builder.setFastMathFlags(FMF); 801 for (unsigned Part = 0; Part < State.UF; ++Part) 802 generateInstruction(State, Part); 803 } 804 805 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 806 void VPInstruction::dump() const { 807 VPSlotTracker SlotTracker(getParent()->getPlan()); 808 print(dbgs(), "", SlotTracker); 809 } 810 811 void VPInstruction::print(raw_ostream &O, const Twine &Indent, 812 VPSlotTracker &SlotTracker) const { 813 O << Indent << "EMIT "; 814 815 if (hasResult()) { 816 printAsOperand(O, SlotTracker); 817 O << " = "; 818 } 819 820 switch (getOpcode()) { 821 case VPInstruction::Not: 822 O << "not"; 823 break; 824 case VPInstruction::ICmpULE: 825 O << "icmp ule"; 826 break; 827 case VPInstruction::SLPLoad: 828 O << "combined load"; 829 break; 830 case VPInstruction::SLPStore: 831 O << "combined store"; 832 break; 833 case VPInstruction::ActiveLaneMask: 834 O << "active lane mask"; 835 break; 836 case VPInstruction::FirstOrderRecurrenceSplice: 837 O << "first-order splice"; 838 break; 839 case VPInstruction::CanonicalIVIncrement: 840 O << "VF * UF + "; 841 break; 842 case VPInstruction::CanonicalIVIncrementNUW: 843 O << "VF * UF +(nuw) "; 844 break; 845 case VPInstruction::BranchOnCount: 846 O << "branch-on-count "; 847 break; 848 default: 849 O << Instruction::getOpcodeName(getOpcode()); 850 } 851 852 O << FMF; 853 854 for (const VPValue *Operand : operands()) { 855 O << " "; 856 Operand->printAsOperand(O, SlotTracker); 857 } 858 859 if (DL) { 860 O << ", !dbg "; 861 DL.print(O); 862 } 863 } 864 #endif 865 866 void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { 867 // Make sure the VPInstruction is a floating-point operation. 868 assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul || 869 Opcode == Instruction::FNeg || Opcode == Instruction::FSub || 870 Opcode == Instruction::FDiv || Opcode == Instruction::FRem || 871 Opcode == Instruction::FCmp) && 872 "this op can't take fast-math flags"); 873 FMF = FMFNew; 874 } 875 876 void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, 877 Value *CanonicalIVStartValue, 878 VPTransformState &State) { 879 // Check if the trip count is needed, and if so build it. 880 if (TripCount && TripCount->getNumUsers()) { 881 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 882 State.set(TripCount, TripCountV, Part); 883 } 884 885 // Check if the backedge taken count is needed, and if so build it. 886 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { 887 IRBuilder<> Builder(State.CFG.VectorPreHeader->getTerminator()); 888 auto *TCMO = Builder.CreateSub(TripCountV, 889 ConstantInt::get(TripCountV->getType(), 1), 890 "trip.count.minus.1"); 891 auto VF = State.VF; 892 Value *VTCMO = 893 VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast"); 894 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 895 State.set(BackedgeTakenCount, VTCMO, Part); 896 } 897 898 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 899 State.set(&VectorTripCount, VectorTripCountV, Part); 900 901 // When vectorizing the epilogue loop, the canonical induction start value 902 // needs to be changed from zero to the value after the main vector loop. 903 if (CanonicalIVStartValue) { 904 VPValue *VPV = new VPValue(CanonicalIVStartValue); 905 addExternalDef(VPV); 906 auto *IV = getCanonicalIV(); 907 assert(all_of(IV->users(), 908 [](const VPUser *U) { 909 if (isa<VPScalarIVStepsRecipe>(U)) 910 return true; 911 auto *VPI = cast<VPInstruction>(U); 912 return VPI->getOpcode() == 913 VPInstruction::CanonicalIVIncrement || 914 VPI->getOpcode() == 915 VPInstruction::CanonicalIVIncrementNUW; 916 }) && 917 "the canonical IV should only be used by its increments or " 918 "ScalarIVSteps when " 919 "resetting the start value"); 920 IV->setOperand(0, VPV); 921 } 922 } 923 924 /// Generate the code inside the body of the vectorized loop. Assumes a single 925 /// LoopVectorBody basic-block was created for this. Introduce additional 926 /// basic-blocks as needed, and fill them all. 927 void VPlan::execute(VPTransformState *State) { 928 // Set the reverse mapping from VPValues to Values for code generation. 929 for (auto &Entry : Value2VPValue) 930 State->VPValue2Value[Entry.second] = Entry.first; 931 932 // Initialize CFG state. 933 State->CFG.PrevVPBB = nullptr; 934 BasicBlock *VectorHeaderBB = State->CFG.VectorPreHeader->getSingleSuccessor(); 935 State->CFG.PrevBB = VectorHeaderBB; 936 State->CFG.ExitBB = VectorHeaderBB->getSingleSuccessor(); 937 State->CurrentVectorLoop = State->LI->getLoopFor(VectorHeaderBB); 938 939 // Remove the edge between Header and Latch to allow other connections. 940 // Temporarily terminate with unreachable until CFG is rewired. 941 // Note: this asserts the generated code's assumption that 942 // getFirstInsertionPt() can be dereferenced into an Instruction. 943 VectorHeaderBB->getTerminator()->eraseFromParent(); 944 State->Builder.SetInsertPoint(VectorHeaderBB); 945 UnreachableInst *Terminator = State->Builder.CreateUnreachable(); 946 State->Builder.SetInsertPoint(Terminator); 947 948 // Generate code in loop body. 949 for (VPBlockBase *Block : depth_first(Entry)) 950 Block->execute(State); 951 952 // Setup branch terminator successors for VPBBs in VPBBsToFix based on 953 // VPBB's successors. 954 for (auto VPBB : State->CFG.VPBBsToFix) { 955 assert(EnableVPlanNativePath && 956 "Unexpected VPBBsToFix in non VPlan-native path"); 957 BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB]; 958 assert(BB && "Unexpected null basic block for VPBB"); 959 960 unsigned Idx = 0; 961 auto *BBTerminator = BB->getTerminator(); 962 963 for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) { 964 VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock(); 965 BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]); 966 ++Idx; 967 } 968 } 969 970 BasicBlock *VectorLatchBB = State->CFG.PrevBB; 971 972 // Fix the latch value of canonical, reduction and first-order recurrences 973 // phis in the vector loop. 974 VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); 975 if (Header->empty()) { 976 assert(EnableVPlanNativePath); 977 Header = cast<VPBasicBlock>(Header->getSingleSuccessor()); 978 } 979 for (VPRecipeBase &R : Header->phis()) { 980 // Skip phi-like recipes that generate their backedege values themselves. 981 if (isa<VPWidenPHIRecipe>(&R)) 982 continue; 983 984 if (isa<VPWidenPointerInductionRecipe>(&R) || 985 isa<VPWidenIntOrFpInductionRecipe>(&R)) { 986 PHINode *Phi = nullptr; 987 if (isa<VPWidenIntOrFpInductionRecipe>(&R)) { 988 Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0)); 989 } else { 990 auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R); 991 // TODO: Split off the case that all users of a pointer phi are scalar 992 // from the VPWidenPointerInductionRecipe. 993 if (all_of(WidenPhi->users(), [WidenPhi](const VPUser *U) { 994 return cast<VPRecipeBase>(U)->usesScalars(WidenPhi); 995 })) 996 continue; 997 998 auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0)); 999 Phi = cast<PHINode>(GEP->getPointerOperand()); 1000 } 1001 1002 Phi->setIncomingBlock(1, VectorLatchBB); 1003 1004 // Move the last step to the end of the latch block. This ensures 1005 // consistent placement of all induction updates. 1006 Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1)); 1007 Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode()); 1008 continue; 1009 } 1010 1011 auto *PhiR = cast<VPHeaderPHIRecipe>(&R); 1012 // For canonical IV, first-order recurrences and in-order reduction phis, 1013 // only a single part is generated, which provides the last part from the 1014 // previous iteration. For non-ordered reductions all UF parts are 1015 // generated. 1016 bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) || 1017 isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) || 1018 cast<VPReductionPHIRecipe>(PhiR)->isOrdered(); 1019 unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF; 1020 1021 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 1022 Value *Phi = State->get(PhiR, Part); 1023 Value *Val = State->get(PhiR->getBackedgeValue(), 1024 SinglePartNeeded ? State->UF - 1 : Part); 1025 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB); 1026 } 1027 } 1028 1029 // We do not attempt to preserve DT for outer loop vectorization currently. 1030 if (!EnableVPlanNativePath) 1031 updateDominatorTree(State->DT, VectorHeaderBB, VectorLatchBB, 1032 State->CFG.ExitBB); 1033 } 1034 1035 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1036 LLVM_DUMP_METHOD 1037 void VPlan::print(raw_ostream &O) const { 1038 VPSlotTracker SlotTracker(this); 1039 1040 O << "VPlan '" << Name << "' {"; 1041 1042 if (VectorTripCount.getNumUsers() > 0) { 1043 O << "\nLive-in "; 1044 VectorTripCount.printAsOperand(O, SlotTracker); 1045 O << " = vector-trip-count\n"; 1046 } 1047 1048 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { 1049 O << "\nLive-in "; 1050 BackedgeTakenCount->printAsOperand(O, SlotTracker); 1051 O << " = backedge-taken count\n"; 1052 } 1053 1054 for (const VPBlockBase *Block : depth_first(getEntry())) { 1055 O << '\n'; 1056 Block->print(O, "", SlotTracker); 1057 } 1058 O << "}\n"; 1059 } 1060 1061 LLVM_DUMP_METHOD 1062 void VPlan::printDOT(raw_ostream &O) const { 1063 VPlanPrinter Printer(O, *this); 1064 Printer.dump(); 1065 } 1066 1067 LLVM_DUMP_METHOD 1068 void VPlan::dump() const { print(dbgs()); } 1069 #endif 1070 1071 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB, 1072 BasicBlock *LoopLatchBB, 1073 BasicBlock *LoopExitBB) { 1074 // The vector body may be more than a single basic-block by this point. 1075 // Update the dominator tree information inside the vector body by propagating 1076 // it from header to latch, expecting only triangular control-flow, if any. 1077 BasicBlock *PostDomSucc = nullptr; 1078 for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) { 1079 // Get the list of successors of this block. 1080 std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB)); 1081 assert(Succs.size() <= 2 && 1082 "Basic block in vector loop has more than 2 successors."); 1083 PostDomSucc = Succs[0]; 1084 if (Succs.size() == 1) { 1085 assert(PostDomSucc->getSinglePredecessor() && 1086 "PostDom successor has more than one predecessor."); 1087 DT->addNewBlock(PostDomSucc, BB); 1088 continue; 1089 } 1090 BasicBlock *InterimSucc = Succs[1]; 1091 if (PostDomSucc->getSingleSuccessor() == InterimSucc) { 1092 PostDomSucc = Succs[1]; 1093 InterimSucc = Succs[0]; 1094 } 1095 assert(InterimSucc->getSingleSuccessor() == PostDomSucc && 1096 "One successor of a basic block does not lead to the other."); 1097 assert(InterimSucc->getSinglePredecessor() && 1098 "Interim successor has more than one predecessor."); 1099 assert(PostDomSucc->hasNPredecessors(2) && 1100 "PostDom successor has more than two predecessors."); 1101 DT->addNewBlock(InterimSucc, BB); 1102 DT->addNewBlock(PostDomSucc, BB); 1103 } 1104 // Latch block is a new dominator for the loop exit. 1105 DT->changeImmediateDominator(LoopExitBB, LoopLatchBB); 1106 assert(DT->verify(DominatorTree::VerificationLevel::Fast)); 1107 } 1108 1109 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1110 Twine VPlanPrinter::getUID(const VPBlockBase *Block) { 1111 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") + 1112 Twine(getOrCreateBID(Block)); 1113 } 1114 1115 Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) { 1116 const std::string &Name = Block->getName(); 1117 if (!Name.empty()) 1118 return Name; 1119 return "VPB" + Twine(getOrCreateBID(Block)); 1120 } 1121 1122 void VPlanPrinter::dump() { 1123 Depth = 1; 1124 bumpIndent(0); 1125 OS << "digraph VPlan {\n"; 1126 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan"; 1127 if (!Plan.getName().empty()) 1128 OS << "\\n" << DOT::EscapeString(Plan.getName()); 1129 if (Plan.BackedgeTakenCount) { 1130 OS << ", where:\\n"; 1131 Plan.BackedgeTakenCount->print(OS, SlotTracker); 1132 OS << " := BackedgeTakenCount"; 1133 } 1134 OS << "\"]\n"; 1135 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n"; 1136 OS << "edge [fontname=Courier, fontsize=30]\n"; 1137 OS << "compound=true\n"; 1138 1139 for (const VPBlockBase *Block : depth_first(Plan.getEntry())) 1140 dumpBlock(Block); 1141 1142 OS << "}\n"; 1143 } 1144 1145 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { 1146 if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block)) 1147 dumpBasicBlock(BasicBlock); 1148 else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 1149 dumpRegion(Region); 1150 else 1151 llvm_unreachable("Unsupported kind of VPBlock."); 1152 } 1153 1154 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, 1155 bool Hidden, const Twine &Label) { 1156 // Due to "dot" we print an edge between two regions as an edge between the 1157 // exit basic block and the entry basic of the respective regions. 1158 const VPBlockBase *Tail = From->getExitBasicBlock(); 1159 const VPBlockBase *Head = To->getEntryBasicBlock(); 1160 OS << Indent << getUID(Tail) << " -> " << getUID(Head); 1161 OS << " [ label=\"" << Label << '\"'; 1162 if (Tail != From) 1163 OS << " ltail=" << getUID(From); 1164 if (Head != To) 1165 OS << " lhead=" << getUID(To); 1166 if (Hidden) 1167 OS << "; splines=none"; 1168 OS << "]\n"; 1169 } 1170 1171 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) { 1172 auto &Successors = Block->getSuccessors(); 1173 if (Successors.size() == 1) 1174 drawEdge(Block, Successors.front(), false, ""); 1175 else if (Successors.size() == 2) { 1176 drawEdge(Block, Successors.front(), false, "T"); 1177 drawEdge(Block, Successors.back(), false, "F"); 1178 } else { 1179 unsigned SuccessorNumber = 0; 1180 for (auto *Successor : Successors) 1181 drawEdge(Block, Successor, false, Twine(SuccessorNumber++)); 1182 } 1183 } 1184 1185 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { 1186 // Implement dot-formatted dump by performing plain-text dump into the 1187 // temporary storage followed by some post-processing. 1188 OS << Indent << getUID(BasicBlock) << " [label =\n"; 1189 bumpIndent(1); 1190 std::string Str; 1191 raw_string_ostream SS(Str); 1192 // Use no indentation as we need to wrap the lines into quotes ourselves. 1193 BasicBlock->print(SS, "", SlotTracker); 1194 1195 // We need to process each line of the output separately, so split 1196 // single-string plain-text dump. 1197 SmallVector<StringRef, 0> Lines; 1198 StringRef(Str).rtrim('\n').split(Lines, "\n"); 1199 1200 auto EmitLine = [&](StringRef Line, StringRef Suffix) { 1201 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix; 1202 }; 1203 1204 // Don't need the "+" after the last line. 1205 for (auto Line : make_range(Lines.begin(), Lines.end() - 1)) 1206 EmitLine(Line, " +\n"); 1207 EmitLine(Lines.back(), "\n"); 1208 1209 bumpIndent(-1); 1210 OS << Indent << "]\n"; 1211 1212 dumpEdges(BasicBlock); 1213 } 1214 1215 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { 1216 OS << Indent << "subgraph " << getUID(Region) << " {\n"; 1217 bumpIndent(1); 1218 OS << Indent << "fontname=Courier\n" 1219 << Indent << "label=\"" 1220 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ") 1221 << DOT::EscapeString(Region->getName()) << "\"\n"; 1222 // Dump the blocks of the region. 1223 assert(Region->getEntry() && "Region contains no inner blocks."); 1224 for (const VPBlockBase *Block : depth_first(Region->getEntry())) 1225 dumpBlock(Block); 1226 bumpIndent(-1); 1227 OS << Indent << "}\n"; 1228 dumpEdges(Region); 1229 } 1230 1231 void VPlanIngredient::print(raw_ostream &O) const { 1232 if (auto *Inst = dyn_cast<Instruction>(V)) { 1233 if (!Inst->getType()->isVoidTy()) { 1234 Inst->printAsOperand(O, false); 1235 O << " = "; 1236 } 1237 O << Inst->getOpcodeName() << " "; 1238 unsigned E = Inst->getNumOperands(); 1239 if (E > 0) { 1240 Inst->getOperand(0)->printAsOperand(O, false); 1241 for (unsigned I = 1; I < E; ++I) 1242 Inst->getOperand(I)->printAsOperand(O << ", ", false); 1243 } 1244 } else // !Inst 1245 V->printAsOperand(O, false); 1246 } 1247 1248 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, 1249 VPSlotTracker &SlotTracker) const { 1250 O << Indent << "WIDEN-CALL "; 1251 1252 auto *CI = cast<CallInst>(getUnderlyingInstr()); 1253 if (CI->getType()->isVoidTy()) 1254 O << "void "; 1255 else { 1256 printAsOperand(O, SlotTracker); 1257 O << " = "; 1258 } 1259 1260 O << "call @" << CI->getCalledFunction()->getName() << "("; 1261 printOperands(O, SlotTracker); 1262 O << ")"; 1263 } 1264 1265 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, 1266 VPSlotTracker &SlotTracker) const { 1267 O << Indent << "WIDEN-SELECT "; 1268 printAsOperand(O, SlotTracker); 1269 O << " = select "; 1270 getOperand(0)->printAsOperand(O, SlotTracker); 1271 O << ", "; 1272 getOperand(1)->printAsOperand(O, SlotTracker); 1273 O << ", "; 1274 getOperand(2)->printAsOperand(O, SlotTracker); 1275 O << (InvariantCond ? " (condition is loop invariant)" : ""); 1276 } 1277 1278 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, 1279 VPSlotTracker &SlotTracker) const { 1280 O << Indent << "WIDEN "; 1281 printAsOperand(O, SlotTracker); 1282 O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; 1283 printOperands(O, SlotTracker); 1284 } 1285 1286 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, 1287 VPSlotTracker &SlotTracker) const { 1288 O << Indent << "WIDEN-INDUCTION"; 1289 if (getTruncInst()) { 1290 O << "\\l\""; 1291 O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; 1292 O << " +\n" << Indent << "\" "; 1293 getVPValue(0)->printAsOperand(O, SlotTracker); 1294 } else 1295 O << " " << VPlanIngredient(IV); 1296 } 1297 1298 void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, 1299 VPSlotTracker &SlotTracker) const { 1300 O << Indent << "EMIT "; 1301 printAsOperand(O, SlotTracker); 1302 O << " = WIDEN-POINTER-INDUCTION "; 1303 getStartValue()->printAsOperand(O, SlotTracker); 1304 O << ", " << *IndDesc.getStep(); 1305 } 1306 1307 #endif 1308 1309 bool VPWidenIntOrFpInductionRecipe::isCanonical() const { 1310 auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); 1311 auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep()); 1312 return StartC && StartC->isZero() && StepC && StepC->isOne(); 1313 } 1314 1315 VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const { 1316 return cast<VPCanonicalIVPHIRecipe>(getOperand(0)); 1317 } 1318 1319 bool VPScalarIVStepsRecipe::isCanonical() const { 1320 auto *CanIV = getCanonicalIV(); 1321 // The start value of the steps-recipe must match the start value of the 1322 // canonical induction and it must step by 1. 1323 if (CanIV->getStartValue() != getStartValue()) 1324 return false; 1325 auto *StepVPV = getStepValue(); 1326 if (StepVPV->getDef()) 1327 return false; 1328 auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue()); 1329 return StepC && StepC->isOne(); 1330 } 1331 1332 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1333 void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, 1334 VPSlotTracker &SlotTracker) const { 1335 O << Indent; 1336 printAsOperand(O, SlotTracker); 1337 O << Indent << "= SCALAR-STEPS "; 1338 printOperands(O, SlotTracker); 1339 } 1340 1341 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, 1342 VPSlotTracker &SlotTracker) const { 1343 O << Indent << "WIDEN-GEP "; 1344 O << (IsPtrLoopInvariant ? "Inv" : "Var"); 1345 size_t IndicesNumber = IsIndexLoopInvariant.size(); 1346 for (size_t I = 0; I < IndicesNumber; ++I) 1347 O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]"; 1348 1349 O << " "; 1350 printAsOperand(O, SlotTracker); 1351 O << " = getelementptr "; 1352 printOperands(O, SlotTracker); 1353 } 1354 1355 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1356 VPSlotTracker &SlotTracker) const { 1357 O << Indent << "WIDEN-PHI "; 1358 1359 auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); 1360 // Unless all incoming values are modeled in VPlan print the original PHI 1361 // directly. 1362 // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming 1363 // values as VPValues. 1364 if (getNumOperands() != OriginalPhi->getNumOperands()) { 1365 O << VPlanIngredient(OriginalPhi); 1366 return; 1367 } 1368 1369 printAsOperand(O, SlotTracker); 1370 O << " = phi "; 1371 printOperands(O, SlotTracker); 1372 } 1373 1374 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, 1375 VPSlotTracker &SlotTracker) const { 1376 O << Indent << "BLEND "; 1377 Phi->printAsOperand(O, false); 1378 O << " ="; 1379 if (getNumIncomingValues() == 1) { 1380 // Not a User of any mask: not really blending, this is a 1381 // single-predecessor phi. 1382 O << " "; 1383 getIncomingValue(0)->printAsOperand(O, SlotTracker); 1384 } else { 1385 for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { 1386 O << " "; 1387 getIncomingValue(I)->printAsOperand(O, SlotTracker); 1388 O << "/"; 1389 getMask(I)->printAsOperand(O, SlotTracker); 1390 } 1391 } 1392 } 1393 1394 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, 1395 VPSlotTracker &SlotTracker) const { 1396 O << Indent << "REDUCE "; 1397 printAsOperand(O, SlotTracker); 1398 O << " = "; 1399 getChainOp()->printAsOperand(O, SlotTracker); 1400 O << " +"; 1401 if (isa<FPMathOperator>(getUnderlyingInstr())) 1402 O << getUnderlyingInstr()->getFastMathFlags(); 1403 O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " ("; 1404 getVecOp()->printAsOperand(O, SlotTracker); 1405 if (getCondOp()) { 1406 O << ", "; 1407 getCondOp()->printAsOperand(O, SlotTracker); 1408 } 1409 O << ")"; 1410 } 1411 1412 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, 1413 VPSlotTracker &SlotTracker) const { 1414 O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); 1415 1416 if (!getUnderlyingInstr()->getType()->isVoidTy()) { 1417 printAsOperand(O, SlotTracker); 1418 O << " = "; 1419 } 1420 O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; 1421 printOperands(O, SlotTracker); 1422 1423 if (AlsoPack) 1424 O << " (S->V)"; 1425 } 1426 1427 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1428 VPSlotTracker &SlotTracker) const { 1429 O << Indent << "PHI-PREDICATED-INSTRUCTION "; 1430 printAsOperand(O, SlotTracker); 1431 O << " = "; 1432 printOperands(O, SlotTracker); 1433 } 1434 1435 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, 1436 VPSlotTracker &SlotTracker) const { 1437 O << Indent << "WIDEN "; 1438 1439 if (!isStore()) { 1440 printAsOperand(O, SlotTracker); 1441 O << " = "; 1442 } 1443 O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; 1444 1445 printOperands(O, SlotTracker); 1446 } 1447 #endif 1448 1449 void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { 1450 Value *Start = getStartValue()->getLiveInIRValue(); 1451 PHINode *EntryPart = PHINode::Create( 1452 Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); 1453 EntryPart->addIncoming(Start, State.CFG.VectorPreHeader); 1454 EntryPart->setDebugLoc(DL); 1455 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1456 State.set(this, EntryPart, Part); 1457 } 1458 1459 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1460 void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1461 VPSlotTracker &SlotTracker) const { 1462 O << Indent << "EMIT "; 1463 printAsOperand(O, SlotTracker); 1464 O << " = CANONICAL-INDUCTION"; 1465 } 1466 #endif 1467 1468 void VPExpandSCEVRecipe::execute(VPTransformState &State) { 1469 assert(!State.Instance && "cannot be used in per-lane"); 1470 const DataLayout &DL = 1471 State.CFG.VectorPreHeader->getModule()->getDataLayout(); 1472 SCEVExpander Exp(SE, DL, "induction"); 1473 Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), 1474 State.CFG.VectorPreHeader->getTerminator()); 1475 1476 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) 1477 State.set(this, Res, Part); 1478 } 1479 1480 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1481 void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, 1482 VPSlotTracker &SlotTracker) const { 1483 O << Indent << "EMIT "; 1484 getVPSingleValue()->printAsOperand(O, SlotTracker); 1485 O << " = EXPAND SCEV " << *Expr; 1486 } 1487 #endif 1488 1489 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { 1490 Value *CanonicalIV = State.get(getOperand(0), 0); 1491 Type *STy = CanonicalIV->getType(); 1492 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 1493 ElementCount VF = State.VF; 1494 Value *VStart = VF.isScalar() 1495 ? CanonicalIV 1496 : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); 1497 for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { 1498 Value *VStep = createStepForVF(Builder, STy, VF, Part); 1499 if (VF.isVector()) { 1500 VStep = Builder.CreateVectorSplat(VF, VStep); 1501 VStep = Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); 1502 } 1503 Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); 1504 State.set(this, CanonicalVectorIV, Part); 1505 } 1506 } 1507 1508 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1509 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, 1510 VPSlotTracker &SlotTracker) const { 1511 O << Indent << "EMIT "; 1512 printAsOperand(O, SlotTracker); 1513 O << " = WIDEN-CANONICAL-INDUCTION "; 1514 printOperands(O, SlotTracker); 1515 } 1516 #endif 1517 1518 void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { 1519 auto &Builder = State.Builder; 1520 // Create a vector from the initial value. 1521 auto *VectorInit = getStartValue()->getLiveInIRValue(); 1522 1523 Type *VecTy = State.VF.isScalar() 1524 ? VectorInit->getType() 1525 : VectorType::get(VectorInit->getType(), State.VF); 1526 1527 if (State.VF.isVector()) { 1528 auto *IdxTy = Builder.getInt32Ty(); 1529 auto *One = ConstantInt::get(IdxTy, 1); 1530 IRBuilder<>::InsertPointGuard Guard(Builder); 1531 Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); 1532 auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); 1533 auto *LastIdx = Builder.CreateSub(RuntimeVF, One); 1534 VectorInit = Builder.CreateInsertElement( 1535 PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); 1536 } 1537 1538 // Create a phi node for the new recurrence. 1539 PHINode *EntryPart = PHINode::Create( 1540 VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); 1541 EntryPart->addIncoming(VectorInit, State.CFG.VectorPreHeader); 1542 State.set(this, EntryPart, 0); 1543 } 1544 1545 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1546 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, 1547 VPSlotTracker &SlotTracker) const { 1548 O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; 1549 printAsOperand(O, SlotTracker); 1550 O << " = phi "; 1551 printOperands(O, SlotTracker); 1552 } 1553 #endif 1554 1555 void VPReductionPHIRecipe::execute(VPTransformState &State) { 1556 PHINode *PN = cast<PHINode>(getUnderlyingValue()); 1557 auto &Builder = State.Builder; 1558 1559 // In order to support recurrences we need to be able to vectorize Phi nodes. 1560 // Phi nodes have cycles, so we need to vectorize them in two stages. This is 1561 // stage #1: We create a new vector PHI node with no incoming edges. We'll use 1562 // this value when we vectorize all of the instructions that use the PHI. 1563 bool ScalarPHI = State.VF.isScalar() || IsInLoop; 1564 Type *VecTy = 1565 ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); 1566 1567 BasicBlock *HeaderBB = State.CFG.PrevBB; 1568 assert(State.CurrentVectorLoop->getHeader() == HeaderBB && 1569 "recipe must be in the vector loop header"); 1570 unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; 1571 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 1572 Value *EntryPart = 1573 PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt()); 1574 State.set(this, EntryPart, Part); 1575 } 1576 1577 // Reductions do not have to start at zero. They can start with 1578 // any loop invariant values. 1579 VPValue *StartVPV = getStartValue(); 1580 Value *StartV = StartVPV->getLiveInIRValue(); 1581 1582 Value *Iden = nullptr; 1583 RecurKind RK = RdxDesc.getRecurrenceKind(); 1584 if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || 1585 RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) { 1586 // MinMax reduction have the start value as their identify. 1587 if (ScalarPHI) { 1588 Iden = StartV; 1589 } else { 1590 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 1591 Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); 1592 StartV = Iden = 1593 Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); 1594 } 1595 } else { 1596 Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), 1597 RdxDesc.getFastMathFlags()); 1598 1599 if (!ScalarPHI) { 1600 Iden = Builder.CreateVectorSplat(State.VF, Iden); 1601 IRBuilderBase::InsertPointGuard IPBuilder(Builder); 1602 Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator()); 1603 Constant *Zero = Builder.getInt32(0); 1604 StartV = Builder.CreateInsertElement(Iden, StartV, Zero); 1605 } 1606 } 1607 1608 for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { 1609 Value *EntryPart = State.get(this, Part); 1610 // Make sure to add the reduction start value only to the 1611 // first unroll part. 1612 Value *StartVal = (Part == 0) ? StartV : Iden; 1613 cast<PHINode>(EntryPart)->addIncoming(StartVal, State.CFG.VectorPreHeader); 1614 } 1615 } 1616 1617 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1618 void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, 1619 VPSlotTracker &SlotTracker) const { 1620 O << Indent << "WIDEN-REDUCTION-PHI "; 1621 1622 printAsOperand(O, SlotTracker); 1623 O << " = phi "; 1624 printOperands(O, SlotTracker); 1625 } 1626 #endif 1627 1628 template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT); 1629 1630 void VPValue::replaceAllUsesWith(VPValue *New) { 1631 for (unsigned J = 0; J < getNumUsers();) { 1632 VPUser *User = Users[J]; 1633 unsigned NumUsers = getNumUsers(); 1634 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) 1635 if (User->getOperand(I) == this) 1636 User->setOperand(I, New); 1637 // If a user got removed after updating the current user, the next user to 1638 // update will be moved to the current position, so we only need to 1639 // increment the index if the number of users did not change. 1640 if (NumUsers == getNumUsers()) 1641 J++; 1642 } 1643 } 1644 1645 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1646 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const { 1647 if (const Value *UV = getUnderlyingValue()) { 1648 OS << "ir<"; 1649 UV->printAsOperand(OS, false); 1650 OS << ">"; 1651 return; 1652 } 1653 1654 unsigned Slot = Tracker.getSlot(this); 1655 if (Slot == unsigned(-1)) 1656 OS << "<badref>"; 1657 else 1658 OS << "vp<%" << Tracker.getSlot(this) << ">"; 1659 } 1660 1661 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const { 1662 interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) { 1663 Op->printAsOperand(O, SlotTracker); 1664 }); 1665 } 1666 #endif 1667 1668 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region, 1669 Old2NewTy &Old2New, 1670 InterleavedAccessInfo &IAI) { 1671 ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry()); 1672 for (VPBlockBase *Base : RPOT) { 1673 visitBlock(Base, Old2New, IAI); 1674 } 1675 } 1676 1677 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, 1678 InterleavedAccessInfo &IAI) { 1679 if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) { 1680 for (VPRecipeBase &VPI : *VPBB) { 1681 if (isa<VPHeaderPHIRecipe>(&VPI)) 1682 continue; 1683 assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions"); 1684 auto *VPInst = cast<VPInstruction>(&VPI); 1685 auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue()); 1686 auto *IG = IAI.getInterleaveGroup(Inst); 1687 if (!IG) 1688 continue; 1689 1690 auto NewIGIter = Old2New.find(IG); 1691 if (NewIGIter == Old2New.end()) 1692 Old2New[IG] = new InterleaveGroup<VPInstruction>( 1693 IG->getFactor(), IG->isReverse(), IG->getAlign()); 1694 1695 if (Inst == IG->getInsertPos()) 1696 Old2New[IG]->setInsertPos(VPInst); 1697 1698 InterleaveGroupMap[VPInst] = Old2New[IG]; 1699 InterleaveGroupMap[VPInst]->insertMember( 1700 VPInst, IG->getIndex(Inst), 1701 Align(IG->isReverse() ? (-1) * int(IG->getFactor()) 1702 : IG->getFactor())); 1703 } 1704 } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 1705 visitRegion(Region, Old2New, IAI); 1706 else 1707 llvm_unreachable("Unsupported kind of VPBlock."); 1708 } 1709 1710 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan, 1711 InterleavedAccessInfo &IAI) { 1712 Old2NewTy Old2New; 1713 visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI); 1714 } 1715 1716 void VPSlotTracker::assignSlot(const VPValue *V) { 1717 assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!"); 1718 Slots[V] = NextSlot++; 1719 } 1720 1721 void VPSlotTracker::assignSlots(const VPlan &Plan) { 1722 1723 for (const VPValue *V : Plan.VPExternalDefs) 1724 assignSlot(V); 1725 1726 assignSlot(&Plan.VectorTripCount); 1727 if (Plan.BackedgeTakenCount) 1728 assignSlot(Plan.BackedgeTakenCount); 1729 1730 ReversePostOrderTraversal< 1731 VPBlockRecursiveTraversalWrapper<const VPBlockBase *>> 1732 RPOT(VPBlockRecursiveTraversalWrapper<const VPBlockBase *>( 1733 Plan.getEntry())); 1734 for (const VPBasicBlock *VPBB : 1735 VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT)) 1736 for (const VPRecipeBase &Recipe : *VPBB) 1737 for (VPValue *Def : Recipe.definedValues()) 1738 assignSlot(Def); 1739 } 1740 1741 bool vputils::onlyFirstLaneUsed(VPValue *Def) { 1742 return all_of(Def->users(), [Def](VPUser *U) { 1743 return cast<VPRecipeBase>(U)->onlyFirstLaneUsed(Def); 1744 }); 1745 } 1746