1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This is the LLVM vectorization plan. It represents a candidate for 11 /// vectorization, allowing to plan and optimize how to vectorize a given loop 12 /// before generating LLVM-IR. 13 /// The vectorizer uses vectorization plans to estimate the costs of potential 14 /// candidates and if profitable to execute the desired plan, generating vector 15 /// LLVM-IR code. 16 /// 17 //===----------------------------------------------------------------------===// 18 19 #include "VPlan.h" 20 #include "LoopVectorizationPlanner.h" 21 #include "VPlanCFG.h" 22 #include "VPlanDominatorTree.h" 23 #include "VPlanPatternMatch.h" 24 #include "VPlanTransforms.h" 25 #include "VPlanUtils.h" 26 #include "llvm/ADT/PostOrderIterator.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/StringExtras.h" 30 #include "llvm/ADT/Twine.h" 31 #include "llvm/Analysis/DomTreeUpdater.h" 32 #include "llvm/Analysis/LoopInfo.h" 33 #include "llvm/IR/BasicBlock.h" 34 #include "llvm/IR/CFG.h" 35 #include "llvm/IR/IRBuilder.h" 36 #include "llvm/IR/Instruction.h" 37 #include "llvm/IR/Instructions.h" 38 #include "llvm/IR/Type.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/GraphWriter.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 46 #include "llvm/Transforms/Utils/LoopVersioning.h" 47 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 48 #include <cassert> 49 #include <string> 50 #include <vector> 51 52 using namespace llvm; 53 using namespace llvm::VPlanPatternMatch; 54 55 namespace llvm { 56 extern cl::opt<bool> EnableVPlanNativePath; 57 } 58 extern cl::opt<unsigned> ForceTargetInstructionCost; 59 60 static cl::opt<bool> PrintVPlansInDotFormat( 61 "vplan-print-in-dot-format", cl::Hidden, 62 cl::desc("Use dot format instead of plain text when dumping VPlans")); 63 64 #define DEBUG_TYPE "vplan" 65 66 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 67 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) { 68 const VPInstruction *Instr = dyn_cast<VPInstruction>(&V); 69 VPSlotTracker SlotTracker( 70 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); 71 V.print(OS, SlotTracker); 72 return OS; 73 } 74 #endif 75 76 Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder, 77 const ElementCount &VF) const { 78 switch (LaneKind) { 79 case VPLane::Kind::ScalableLast: 80 // Lane = RuntimeVF - VF.getKnownMinValue() + Lane 81 return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF), 82 Builder.getInt32(VF.getKnownMinValue() - Lane)); 83 case VPLane::Kind::First: 84 return Builder.getInt32(Lane); 85 } 86 llvm_unreachable("Unknown lane kind"); 87 } 88 89 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def) 90 : SubclassID(SC), UnderlyingVal(UV), Def(Def) { 91 if (Def) 92 Def->addDefinedValue(this); 93 } 94 95 VPValue::~VPValue() { 96 assert(Users.empty() && "trying to delete a VPValue with remaining users"); 97 if (Def) 98 Def->removeDefinedValue(this); 99 } 100 101 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 102 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const { 103 if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def)) 104 R->print(OS, "", SlotTracker); 105 else 106 printAsOperand(OS, SlotTracker); 107 } 108 109 void VPValue::dump() const { 110 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def); 111 VPSlotTracker SlotTracker( 112 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); 113 print(dbgs(), SlotTracker); 114 dbgs() << "\n"; 115 } 116 117 void VPDef::dump() const { 118 const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this); 119 VPSlotTracker SlotTracker( 120 (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); 121 print(dbgs(), "", SlotTracker); 122 dbgs() << "\n"; 123 } 124 #endif 125 126 VPRecipeBase *VPValue::getDefiningRecipe() { 127 return cast_or_null<VPRecipeBase>(Def); 128 } 129 130 const VPRecipeBase *VPValue::getDefiningRecipe() const { 131 return cast_or_null<VPRecipeBase>(Def); 132 } 133 134 // Get the top-most entry block of \p Start. This is the entry block of the 135 // containing VPlan. This function is templated to support both const and non-const blocks 136 template <typename T> static T *getPlanEntry(T *Start) { 137 T *Next = Start; 138 T *Current = Start; 139 while ((Next = Next->getParent())) 140 Current = Next; 141 142 SmallSetVector<T *, 8> WorkList; 143 WorkList.insert(Current); 144 145 for (unsigned i = 0; i < WorkList.size(); i++) { 146 T *Current = WorkList[i]; 147 if (Current->getNumPredecessors() == 0) 148 return Current; 149 auto &Predecessors = Current->getPredecessors(); 150 WorkList.insert(Predecessors.begin(), Predecessors.end()); 151 } 152 153 llvm_unreachable("VPlan without any entry node without predecessors"); 154 } 155 156 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; } 157 158 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; } 159 160 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly. 161 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const { 162 const VPBlockBase *Block = this; 163 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 164 Block = Region->getEntry(); 165 return cast<VPBasicBlock>(Block); 166 } 167 168 VPBasicBlock *VPBlockBase::getEntryBasicBlock() { 169 VPBlockBase *Block = this; 170 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 171 Block = Region->getEntry(); 172 return cast<VPBasicBlock>(Block); 173 } 174 175 void VPBlockBase::setPlan(VPlan *ParentPlan) { 176 assert( 177 (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) && 178 "Can only set plan on its entry or preheader block."); 179 Plan = ParentPlan; 180 } 181 182 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly. 183 const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const { 184 const VPBlockBase *Block = this; 185 while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 186 Block = Region->getExiting(); 187 return cast<VPBasicBlock>(Block); 188 } 189 190 VPBasicBlock *VPBlockBase::getExitingBasicBlock() { 191 VPBlockBase *Block = this; 192 while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 193 Block = Region->getExiting(); 194 return cast<VPBasicBlock>(Block); 195 } 196 197 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { 198 if (!Successors.empty() || !Parent) 199 return this; 200 assert(Parent->getExiting() == this && 201 "Block w/o successors not the exiting block of its parent."); 202 return Parent->getEnclosingBlockWithSuccessors(); 203 } 204 205 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { 206 if (!Predecessors.empty() || !Parent) 207 return this; 208 assert(Parent->getEntry() == this && 209 "Block w/o predecessors not the entry of its parent."); 210 return Parent->getEnclosingBlockWithPredecessors(); 211 } 212 213 void VPBlockBase::deleteCFG(VPBlockBase *Entry) { 214 for (VPBlockBase *Block : to_vector(vp_depth_first_shallow(Entry))) 215 delete Block; 216 } 217 218 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() { 219 iterator It = begin(); 220 while (It != end() && It->isPhi()) 221 It++; 222 return It; 223 } 224 225 VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, 226 DominatorTree *DT, IRBuilderBase &Builder, 227 InnerLoopVectorizer *ILV, VPlan *Plan) 228 : VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan), 229 LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {} 230 231 Value *VPTransformState::get(VPValue *Def, const VPLane &Lane) { 232 if (Def->isLiveIn()) 233 return Def->getLiveInIRValue(); 234 235 if (hasScalarValue(Def, Lane)) 236 return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)]; 237 238 if (!Lane.isFirstLane() && vputils::isUniformAfterVectorization(Def) && 239 hasScalarValue(Def, VPLane::getFirstLane())) { 240 return Data.VPV2Scalars[Def][0]; 241 } 242 243 assert(hasVectorValue(Def)); 244 auto *VecPart = Data.VPV2Vector[Def]; 245 if (!VecPart->getType()->isVectorTy()) { 246 assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar"); 247 return VecPart; 248 } 249 // TODO: Cache created scalar values. 250 Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF); 251 auto *Extract = Builder.CreateExtractElement(VecPart, LaneV); 252 // set(Def, Extract, Instance); 253 return Extract; 254 } 255 256 Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) { 257 if (NeedsScalar) { 258 assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) || 259 !vputils::onlyFirstLaneUsed(Def) || 260 (hasScalarValue(Def, VPLane(0)) && 261 Data.VPV2Scalars[Def].size() == 1)) && 262 "Trying to access a single scalar per part but has multiple scalars " 263 "per part."); 264 return get(Def, VPLane(0)); 265 } 266 267 // If Values have been set for this Def return the one relevant for \p Part. 268 if (hasVectorValue(Def)) 269 return Data.VPV2Vector[Def]; 270 271 auto GetBroadcastInstrs = [this, Def](Value *V) { 272 bool SafeToHoist = Def->isDefinedOutsideLoopRegions(); 273 if (VF.isScalar()) 274 return V; 275 // Place the code for broadcasting invariant variables in the new preheader. 276 IRBuilder<>::InsertPointGuard Guard(Builder); 277 if (SafeToHoist) { 278 BasicBlock *LoopVectorPreHeader = 279 CFG.VPBB2IRBB[Plan->getVectorPreheader()]; 280 if (LoopVectorPreHeader) 281 Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); 282 } 283 284 // Place the code for broadcasting invariant variables in the new preheader. 285 // Broadcast the scalar into all locations in the vector. 286 Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast"); 287 288 return Shuf; 289 }; 290 291 if (!hasScalarValue(Def, {0})) { 292 assert(Def->isLiveIn() && "expected a live-in"); 293 Value *IRV = Def->getLiveInIRValue(); 294 Value *B = GetBroadcastInstrs(IRV); 295 set(Def, B); 296 return B; 297 } 298 299 Value *ScalarValue = get(Def, VPLane(0)); 300 // If we aren't vectorizing, we can just copy the scalar map values over 301 // to the vector map. 302 if (VF.isScalar()) { 303 set(Def, ScalarValue); 304 return ScalarValue; 305 } 306 307 bool IsUniform = vputils::isUniformAfterVectorization(Def); 308 309 VPLane LastLane(IsUniform ? 0 : VF.getKnownMinValue() - 1); 310 // Check if there is a scalar value for the selected lane. 311 if (!hasScalarValue(Def, LastLane)) { 312 // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and 313 // VPExpandSCEVRecipes can also be uniform. 314 assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDefiningRecipe()) || 315 isa<VPScalarIVStepsRecipe>(Def->getDefiningRecipe()) || 316 isa<VPExpandSCEVRecipe>(Def->getDefiningRecipe())) && 317 "unexpected recipe found to be invariant"); 318 IsUniform = true; 319 LastLane = 0; 320 } 321 322 auto *LastInst = cast<Instruction>(get(Def, LastLane)); 323 // Set the insert point after the last scalarized instruction or after the 324 // last PHI, if LastInst is a PHI. This ensures the insertelement sequence 325 // will directly follow the scalar definitions. 326 auto OldIP = Builder.saveIP(); 327 auto NewIP = 328 isa<PHINode>(LastInst) 329 ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI()) 330 : std::next(BasicBlock::iterator(LastInst)); 331 Builder.SetInsertPoint(&*NewIP); 332 333 // However, if we are vectorizing, we need to construct the vector values. 334 // If the value is known to be uniform after vectorization, we can just 335 // broadcast the scalar value corresponding to lane zero. Otherwise, we 336 // construct the vector values using insertelement instructions. Since the 337 // resulting vectors are stored in State, we will only generate the 338 // insertelements once. 339 Value *VectorValue = nullptr; 340 if (IsUniform) { 341 VectorValue = GetBroadcastInstrs(ScalarValue); 342 set(Def, VectorValue); 343 } else { 344 // Initialize packing with insertelements to start from undef. 345 assert(!VF.isScalable() && "VF is assumed to be non scalable."); 346 Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF)); 347 set(Def, Undef); 348 for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) 349 packScalarIntoVectorValue(Def, Lane); 350 VectorValue = get(Def); 351 } 352 Builder.restoreIP(OldIP); 353 return VectorValue; 354 } 355 356 BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) { 357 VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion(); 358 return VPBB2IRBB[LoopRegion->getPreheaderVPBB()]; 359 } 360 361 void VPTransformState::addNewMetadata(Instruction *To, 362 const Instruction *Orig) { 363 // If the loop was versioned with memchecks, add the corresponding no-alias 364 // metadata. 365 if (LVer && (isa<LoadInst>(Orig) || isa<StoreInst>(Orig))) 366 LVer->annotateInstWithNoAlias(To, Orig); 367 } 368 369 void VPTransformState::addMetadata(Value *To, Instruction *From) { 370 // No source instruction to transfer metadata from? 371 if (!From) 372 return; 373 374 if (Instruction *ToI = dyn_cast<Instruction>(To)) { 375 propagateMetadata(ToI, From); 376 addNewMetadata(ToI, From); 377 } 378 } 379 380 void VPTransformState::setDebugLocFrom(DebugLoc DL) { 381 const DILocation *DIL = DL; 382 // When a FSDiscriminator is enabled, we don't need to add the multiply 383 // factors to the discriminators. 384 if (DIL && 385 Builder.GetInsertBlock() 386 ->getParent() 387 ->shouldEmitDebugInfoForProfiling() && 388 !EnableFSDiscriminator) { 389 // FIXME: For scalable vectors, assume vscale=1. 390 unsigned UF = Plan->getUF(); 391 auto NewDIL = 392 DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue()); 393 if (NewDIL) 394 Builder.SetCurrentDebugLocation(*NewDIL); 395 else 396 LLVM_DEBUG(dbgs() << "Failed to create new discriminator: " 397 << DIL->getFilename() << " Line: " << DIL->getLine()); 398 } else 399 Builder.SetCurrentDebugLocation(DIL); 400 } 401 402 void VPTransformState::packScalarIntoVectorValue(VPValue *Def, 403 const VPLane &Lane) { 404 Value *ScalarInst = get(Def, Lane); 405 Value *VectorValue = get(Def); 406 VectorValue = Builder.CreateInsertElement(VectorValue, ScalarInst, 407 Lane.getAsRuntimeExpr(Builder, VF)); 408 set(Def, VectorValue); 409 } 410 411 BasicBlock * 412 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { 413 // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks. 414 // Pred stands for Predessor. Prev stands for Previous - last visited/created. 415 BasicBlock *PrevBB = CFG.PrevBB; 416 BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(), 417 PrevBB->getParent(), CFG.ExitBB); 418 LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); 419 420 // Hook up the new basic block to its predecessors. 421 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { 422 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock(); 423 auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); 424 BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; 425 426 assert(PredBB && "Predecessor basic-block not found building successor."); 427 auto *PredBBTerminator = PredBB->getTerminator(); 428 LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); 429 430 auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator); 431 if (isa<UnreachableInst>(PredBBTerminator)) { 432 assert(PredVPSuccessors.size() == 1 && 433 "Predecessor ending w/o branch must have single successor."); 434 DebugLoc DL = PredBBTerminator->getDebugLoc(); 435 PredBBTerminator->eraseFromParent(); 436 auto *Br = BranchInst::Create(NewBB, PredBB); 437 Br->setDebugLoc(DL); 438 } else if (TermBr && !TermBr->isConditional()) { 439 TermBr->setSuccessor(0, NewBB); 440 } else { 441 // Set each forward successor here when it is created, excluding 442 // backedges. A backward successor is set when the branch is created. 443 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; 444 assert(!TermBr->getSuccessor(idx) && 445 "Trying to reset an existing successor block."); 446 TermBr->setSuccessor(idx, NewBB); 447 } 448 CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, NewBB}}); 449 } 450 return NewBB; 451 } 452 453 void VPIRBasicBlock::execute(VPTransformState *State) { 454 assert(getHierarchicalSuccessors().size() <= 2 && 455 "VPIRBasicBlock can have at most two successors at the moment!"); 456 State->Builder.SetInsertPoint(IRBB->getTerminator()); 457 executeRecipes(State, IRBB); 458 // Create a branch instruction to terminate IRBB if one was not created yet 459 // and is needed. 460 if (getSingleSuccessor() && isa<UnreachableInst>(IRBB->getTerminator())) { 461 auto *Br = State->Builder.CreateBr(IRBB); 462 Br->setOperand(0, nullptr); 463 IRBB->getTerminator()->eraseFromParent(); 464 } else { 465 assert( 466 (getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) && 467 "other blocks must be terminated by a branch"); 468 } 469 470 for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { 471 VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock(); 472 BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB]; 473 assert(PredBB && "Predecessor basic-block not found building successor."); 474 LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); 475 476 auto *PredBBTerminator = PredBB->getTerminator(); 477 auto *TermBr = cast<BranchInst>(PredBBTerminator); 478 // Set each forward successor here when it is created, excluding 479 // backedges. A backward successor is set when the branch is created. 480 const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); 481 unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; 482 assert((!TermBr->getSuccessor(idx) || TermBr->getSuccessor(idx) == IRBB) && 483 "Trying to reset an existing successor block."); 484 TermBr->setSuccessor(idx, IRBB); 485 State->CFG.DTU.applyUpdates({{DominatorTree::Insert, PredBB, IRBB}}); 486 } 487 } 488 489 void VPBasicBlock::execute(VPTransformState *State) { 490 bool Replica = bool(State->Lane); 491 VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; 492 VPBlockBase *SingleHPred = nullptr; 493 BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. 494 495 auto IsLoopRegion = [](VPBlockBase *BB) { 496 auto *R = dyn_cast<VPRegionBlock>(BB); 497 return R && !R->isReplicator(); 498 }; 499 500 // 1. Create an IR basic block. 501 if (PrevVPBB && /* A */ 502 !((SingleHPred = getSingleHierarchicalPredecessor()) && 503 SingleHPred->getExitingBasicBlock() == PrevVPBB && 504 PrevVPBB->getSingleHierarchicalSuccessor() && 505 (SingleHPred->getParent() == getEnclosingLoopRegion() && 506 !IsLoopRegion(SingleHPred))) && /* B */ 507 !(Replica && getPredecessors().empty())) { /* C */ 508 // The last IR basic block is reused, as an optimization, in three cases: 509 // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null; 510 // B. when the current VPBB has a single (hierarchical) predecessor which 511 // is PrevVPBB and the latter has a single (hierarchical) successor which 512 // both are in the same non-replicator region; and 513 // C. when the current VPBB is an entry of a region replica - where PrevVPBB 514 // is the exiting VPBB of this region from a previous instance, or the 515 // predecessor of this region. 516 517 NewBB = createEmptyBasicBlock(State->CFG); 518 State->Builder.SetInsertPoint(NewBB); 519 // Temporarily terminate with unreachable until CFG is rewired. 520 UnreachableInst *Terminator = State->Builder.CreateUnreachable(); 521 // Register NewBB in its loop. In innermost loops its the same for all 522 // BB's. 523 if (State->CurrentVectorLoop) 524 State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI); 525 State->Builder.SetInsertPoint(Terminator); 526 State->CFG.PrevBB = NewBB; 527 } 528 529 // 2. Fill the IR basic block with IR instructions. 530 executeRecipes(State, NewBB); 531 } 532 533 void VPBasicBlock::dropAllReferences(VPValue *NewValue) { 534 for (VPRecipeBase &R : Recipes) { 535 for (auto *Def : R.definedValues()) 536 Def->replaceAllUsesWith(NewValue); 537 538 for (unsigned I = 0, E = R.getNumOperands(); I != E; I++) 539 R.setOperand(I, NewValue); 540 } 541 } 542 543 void VPBasicBlock::executeRecipes(VPTransformState *State, BasicBlock *BB) { 544 LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() 545 << " in BB:" << BB->getName() << '\n'); 546 547 State->CFG.VPBB2IRBB[this] = BB; 548 State->CFG.PrevVPBB = this; 549 550 for (VPRecipeBase &Recipe : Recipes) 551 Recipe.execute(*State); 552 553 LLVM_DEBUG(dbgs() << "LV: filled BB:" << *BB); 554 } 555 556 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) { 557 assert((SplitAt == end() || SplitAt->getParent() == this) && 558 "can only split at a position in the same block"); 559 560 SmallVector<VPBlockBase *, 2> Succs(successors()); 561 // First, disconnect the current block from its successors. 562 for (VPBlockBase *Succ : Succs) 563 VPBlockUtils::disconnectBlocks(this, Succ); 564 565 // Create new empty block after the block to split. 566 auto *SplitBlock = new VPBasicBlock(getName() + ".split"); 567 VPBlockUtils::insertBlockAfter(SplitBlock, this); 568 569 // Add successors for block to split to new block. 570 for (VPBlockBase *Succ : Succs) 571 VPBlockUtils::connectBlocks(SplitBlock, Succ); 572 573 // Finally, move the recipes starting at SplitAt to new block. 574 for (VPRecipeBase &ToMove : 575 make_early_inc_range(make_range(SplitAt, this->end()))) 576 ToMove.moveBefore(*SplitBlock, SplitBlock->end()); 577 578 return SplitBlock; 579 } 580 581 /// Return the enclosing loop region for region \p P. The templated version is 582 /// used to support both const and non-const block arguments. 583 template <typename T> static T *getEnclosingLoopRegionForRegion(T *P) { 584 if (P && P->isReplicator()) { 585 P = P->getParent(); 586 assert(!cast<VPRegionBlock>(P)->isReplicator() && 587 "unexpected nested replicate regions"); 588 } 589 return P; 590 } 591 592 VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() { 593 return getEnclosingLoopRegionForRegion(getParent()); 594 } 595 596 const VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() const { 597 return getEnclosingLoopRegionForRegion(getParent()); 598 } 599 600 static bool hasConditionalTerminator(const VPBasicBlock *VPBB) { 601 if (VPBB->empty()) { 602 assert( 603 VPBB->getNumSuccessors() < 2 && 604 "block with multiple successors doesn't have a recipe as terminator"); 605 return false; 606 } 607 608 const VPRecipeBase *R = &VPBB->back(); 609 bool IsCondBranch = isa<VPBranchOnMaskRecipe>(R) || 610 match(R, m_BranchOnCond(m_VPValue())) || 611 match(R, m_BranchOnCount(m_VPValue(), m_VPValue())); 612 (void)IsCondBranch; 613 614 if (VPBB->getNumSuccessors() >= 2 || 615 (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) { 616 assert(IsCondBranch && "block with multiple successors not terminated by " 617 "conditional branch recipe"); 618 619 return true; 620 } 621 622 assert( 623 !IsCondBranch && 624 "block with 0 or 1 successors terminated by conditional branch recipe"); 625 return false; 626 } 627 628 VPRecipeBase *VPBasicBlock::getTerminator() { 629 if (hasConditionalTerminator(this)) 630 return &back(); 631 return nullptr; 632 } 633 634 const VPRecipeBase *VPBasicBlock::getTerminator() const { 635 if (hasConditionalTerminator(this)) 636 return &back(); 637 return nullptr; 638 } 639 640 bool VPBasicBlock::isExiting() const { 641 return getParent() && getParent()->getExitingBasicBlock() == this; 642 } 643 644 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 645 void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const { 646 if (getSuccessors().empty()) { 647 O << Indent << "No successors\n"; 648 } else { 649 O << Indent << "Successor(s): "; 650 ListSeparator LS; 651 for (auto *Succ : getSuccessors()) 652 O << LS << Succ->getName(); 653 O << '\n'; 654 } 655 } 656 657 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent, 658 VPSlotTracker &SlotTracker) const { 659 O << Indent << getName() << ":\n"; 660 661 auto RecipeIndent = Indent + " "; 662 for (const VPRecipeBase &Recipe : *this) { 663 Recipe.print(O, RecipeIndent, SlotTracker); 664 O << '\n'; 665 } 666 667 printSuccessors(O, Indent); 668 } 669 #endif 670 671 static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry); 672 673 // Clone the CFG for all nodes reachable from \p Entry, this includes cloning 674 // the blocks and their recipes. Operands of cloned recipes will NOT be updated. 675 // Remapping of operands must be done separately. Returns a pair with the new 676 // entry and exiting blocks of the cloned region. If \p Entry isn't part of a 677 // region, return nullptr for the exiting block. 678 static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) { 679 DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks; 680 VPBlockBase *Exiting = nullptr; 681 bool InRegion = Entry->getParent(); 682 // First, clone blocks reachable from Entry. 683 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) { 684 VPBlockBase *NewBB = BB->clone(); 685 Old2NewVPBlocks[BB] = NewBB; 686 if (InRegion && BB->getNumSuccessors() == 0) { 687 assert(!Exiting && "Multiple exiting blocks?"); 688 Exiting = BB; 689 } 690 } 691 assert((!InRegion || Exiting) && "regions must have a single exiting block"); 692 693 // Second, update the predecessors & successors of the cloned blocks. 694 for (VPBlockBase *BB : vp_depth_first_shallow(Entry)) { 695 VPBlockBase *NewBB = Old2NewVPBlocks[BB]; 696 SmallVector<VPBlockBase *> NewPreds; 697 for (VPBlockBase *Pred : BB->getPredecessors()) { 698 NewPreds.push_back(Old2NewVPBlocks[Pred]); 699 } 700 NewBB->setPredecessors(NewPreds); 701 SmallVector<VPBlockBase *> NewSuccs; 702 for (VPBlockBase *Succ : BB->successors()) { 703 NewSuccs.push_back(Old2NewVPBlocks[Succ]); 704 } 705 NewBB->setSuccessors(NewSuccs); 706 } 707 708 #if !defined(NDEBUG) 709 // Verify that the order of predecessors and successors matches in the cloned 710 // version. 711 for (const auto &[OldBB, NewBB] : 712 zip(vp_depth_first_shallow(Entry), 713 vp_depth_first_shallow(Old2NewVPBlocks[Entry]))) { 714 for (const auto &[OldPred, NewPred] : 715 zip(OldBB->getPredecessors(), NewBB->getPredecessors())) 716 assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors"); 717 718 for (const auto &[OldSucc, NewSucc] : 719 zip(OldBB->successors(), NewBB->successors())) 720 assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors"); 721 } 722 #endif 723 724 return std::make_pair(Old2NewVPBlocks[Entry], 725 Exiting ? Old2NewVPBlocks[Exiting] : nullptr); 726 } 727 728 VPRegionBlock *VPRegionBlock::clone() { 729 const auto &[NewEntry, NewExiting] = cloneFrom(getEntry()); 730 auto *NewRegion = 731 new VPRegionBlock(NewEntry, NewExiting, getName(), isReplicator()); 732 for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry)) 733 Block->setParent(NewRegion); 734 return NewRegion; 735 } 736 737 void VPRegionBlock::dropAllReferences(VPValue *NewValue) { 738 for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) 739 // Drop all references in VPBasicBlocks and replace all uses with 740 // DummyValue. 741 Block->dropAllReferences(NewValue); 742 } 743 744 void VPRegionBlock::execute(VPTransformState *State) { 745 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> 746 RPOT(Entry); 747 748 if (!isReplicator()) { 749 // Create and register the new vector loop. 750 Loop *PrevLoop = State->CurrentVectorLoop; 751 State->CurrentVectorLoop = State->LI->AllocateLoop(); 752 BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()]; 753 Loop *ParentLoop = State->LI->getLoopFor(VectorPH); 754 755 // Insert the new loop into the loop nest and register the new basic blocks 756 // before calling any utilities such as SCEV that require valid LoopInfo. 757 if (ParentLoop) 758 ParentLoop->addChildLoop(State->CurrentVectorLoop); 759 else 760 State->LI->addTopLevelLoop(State->CurrentVectorLoop); 761 762 // Visit the VPBlocks connected to "this", starting from it. 763 for (VPBlockBase *Block : RPOT) { 764 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); 765 Block->execute(State); 766 } 767 768 State->CurrentVectorLoop = PrevLoop; 769 return; 770 } 771 772 assert(!State->Lane && "Replicating a Region with non-null instance."); 773 774 // Enter replicating mode. 775 assert(!State->VF.isScalable() && "VF is assumed to be non scalable."); 776 State->Lane = VPLane(0); 777 for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; 778 ++Lane) { 779 State->Lane = VPLane(Lane, VPLane::Kind::First); 780 // Visit the VPBlocks connected to \p this, starting from it. 781 for (VPBlockBase *Block : RPOT) { 782 LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); 783 Block->execute(State); 784 } 785 } 786 787 // Exit replicating mode. 788 State->Lane.reset(); 789 } 790 791 InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) { 792 InstructionCost Cost = 0; 793 for (VPRecipeBase &R : Recipes) 794 Cost += R.cost(VF, Ctx); 795 return Cost; 796 } 797 798 InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) { 799 if (!isReplicator()) { 800 InstructionCost Cost = 0; 801 for (VPBlockBase *Block : vp_depth_first_shallow(getEntry())) 802 Cost += Block->cost(VF, Ctx); 803 InstructionCost BackedgeCost = 804 ForceTargetInstructionCost.getNumOccurrences() 805 ? InstructionCost(ForceTargetInstructionCost.getNumOccurrences()) 806 : Ctx.TTI.getCFInstrCost(Instruction::Br, TTI::TCK_RecipThroughput); 807 LLVM_DEBUG(dbgs() << "Cost of " << BackedgeCost << " for VF " << VF 808 << ": vector loop backedge\n"); 809 Cost += BackedgeCost; 810 return Cost; 811 } 812 813 // Compute the cost of a replicate region. Replicating isn't supported for 814 // scalable vectors, return an invalid cost for them. 815 // TODO: Discard scalable VPlans with replicate recipes earlier after 816 // construction. 817 if (VF.isScalable()) 818 return InstructionCost::getInvalid(); 819 820 // First compute the cost of the conditionally executed recipes, followed by 821 // account for the branching cost, except if the mask is a header mask or 822 // uniform condition. 823 using namespace llvm::VPlanPatternMatch; 824 VPBasicBlock *Then = cast<VPBasicBlock>(getEntry()->getSuccessors()[0]); 825 InstructionCost ThenCost = Then->cost(VF, Ctx); 826 827 // For the scalar case, we may not always execute the original predicated 828 // block, Thus, scale the block's cost by the probability of executing it. 829 if (VF.isScalar()) 830 return ThenCost / getReciprocalPredBlockProb(); 831 832 return ThenCost; 833 } 834 835 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 836 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, 837 VPSlotTracker &SlotTracker) const { 838 O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {"; 839 auto NewIndent = Indent + " "; 840 for (auto *BlockBase : vp_depth_first_shallow(Entry)) { 841 O << '\n'; 842 BlockBase->print(O, NewIndent, SlotTracker); 843 } 844 O << Indent << "}\n"; 845 846 printSuccessors(O, Indent); 847 } 848 #endif 849 850 VPlan::~VPlan() { 851 if (Entry) { 852 VPValue DummyValue; 853 for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) 854 Block->dropAllReferences(&DummyValue); 855 856 VPBlockBase::deleteCFG(Entry); 857 858 Preheader->dropAllReferences(&DummyValue); 859 delete Preheader; 860 } 861 for (VPValue *VPV : VPLiveInsToFree) 862 delete VPV; 863 if (BackedgeTakenCount) 864 delete BackedgeTakenCount; 865 } 866 867 VPIRBasicBlock *VPIRBasicBlock::fromBasicBlock(BasicBlock *IRBB) { 868 auto *VPIRBB = new VPIRBasicBlock(IRBB); 869 for (Instruction &I : 870 make_range(IRBB->begin(), IRBB->getTerminator()->getIterator())) 871 VPIRBB->appendRecipe(new VPIRInstruction(I)); 872 return VPIRBB; 873 } 874 875 VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, 876 PredicatedScalarEvolution &PSE, 877 bool RequiresScalarEpilogueCheck, 878 bool TailFolded, Loop *TheLoop) { 879 VPIRBasicBlock *Entry = 880 VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader()); 881 VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); 882 VPIRBasicBlock *ScalarHeader = 883 VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader()); 884 auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader); 885 886 // Create SCEV and VPValue for the trip count. 887 888 // Currently only loops with countable exits are vectorized, but calling 889 // getSymbolicMaxBackedgeTakenCount allows enablement work for loops with 890 // uncountable exits whilst also ensuring the symbolic maximum and known 891 // back-edge taken count remain identical for loops with countable exits. 892 const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); 893 assert((!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) && 894 BackedgeTakenCountSCEV == PSE.getBackedgeTakenCount()) && 895 "Invalid loop count"); 896 ScalarEvolution &SE = *PSE.getSE(); 897 const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV, 898 InductionTy, TheLoop); 899 Plan->TripCount = 900 vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE); 901 902 // Create VPRegionBlock, with empty header and latch blocks, to be filled 903 // during processing later. 904 VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); 905 VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); 906 VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); 907 auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop", 908 false /*isReplicator*/); 909 910 VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader); 911 VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); 912 VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); 913 914 VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); 915 VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader); 916 if (!RequiresScalarEpilogueCheck) { 917 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); 918 return Plan; 919 } 920 921 // If needed, add a check in the middle block to see if we have completed 922 // all of the iterations in the first vector loop. Three cases: 923 // 1) If (N - N%VF) == N, then we *don't* need to run the remainder. 924 // Thus if tail is to be folded, we know we don't need to run the 925 // remainder and we can set the condition to true. 926 // 2) If we require a scalar epilogue, there is no conditional branch as 927 // we unconditionally branch to the scalar preheader. Do nothing. 928 // 3) Otherwise, construct a runtime check. 929 BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock(); 930 auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock); 931 // The connection order corresponds to the operands of the conditional branch. 932 VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); 933 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); 934 935 auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator(); 936 // Here we use the same DebugLoc as the scalar loop latch terminator instead 937 // of the corresponding compare because they may have ended up with 938 // different line numbers and we want to avoid awkward line stepping while 939 // debugging. Eg. if the compare has got a line number inside the loop. 940 VPBuilder Builder(MiddleVPBB); 941 VPValue *Cmp = 942 TailFolded 943 ? Plan->getOrAddLiveIn(ConstantInt::getTrue( 944 IntegerType::getInt1Ty(TripCount->getType()->getContext()))) 945 : Builder.createICmp(CmpInst::ICMP_EQ, Plan->getTripCount(), 946 &Plan->getVectorTripCount(), 947 ScalarLatchTerm->getDebugLoc(), "cmp.n"); 948 Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, 949 ScalarLatchTerm->getDebugLoc()); 950 return Plan; 951 } 952 953 void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, 954 Value *CanonicalIVStartValue, 955 VPTransformState &State) { 956 Type *TCTy = TripCountV->getType(); 957 // Check if the backedge taken count is needed, and if so build it. 958 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { 959 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 960 auto *TCMO = Builder.CreateSub(TripCountV, ConstantInt::get(TCTy, 1), 961 "trip.count.minus.1"); 962 BackedgeTakenCount->setUnderlyingValue(TCMO); 963 } 964 965 VectorTripCount.setUnderlyingValue(VectorTripCountV); 966 967 IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); 968 // FIXME: Model VF * UF computation completely in VPlan. 969 assert(VFxUF.getNumUsers() && "VFxUF expected to always have users"); 970 unsigned UF = getUF(); 971 if (VF.getNumUsers()) { 972 Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF); 973 VF.setUnderlyingValue(RuntimeVF); 974 VFxUF.setUnderlyingValue( 975 UF > 1 ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, UF)) 976 : RuntimeVF); 977 } else { 978 VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF)); 979 } 980 981 // When vectorizing the epilogue loop, the canonical induction start value 982 // needs to be changed from zero to the value after the main vector loop. 983 // FIXME: Improve modeling for canonical IV start values in the epilogue loop. 984 if (CanonicalIVStartValue) { 985 VPValue *VPV = getOrAddLiveIn(CanonicalIVStartValue); 986 auto *IV = getCanonicalIV(); 987 assert(all_of(IV->users(), 988 [](const VPUser *U) { 989 return isa<VPScalarIVStepsRecipe>(U) || 990 isa<VPScalarCastRecipe>(U) || 991 isa<VPDerivedIVRecipe>(U) || 992 cast<VPInstruction>(U)->getOpcode() == 993 Instruction::Add; 994 }) && 995 "the canonical IV should only be used by its increment or " 996 "ScalarIVSteps when resetting the start value"); 997 IV->setOperand(0, VPV); 998 } 999 } 1000 1001 /// Replace \p VPBB with a VPIRBasicBlock wrapping \p IRBB. All recipes from \p 1002 /// VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must 1003 /// have a single predecessor, which is rewired to the new VPIRBasicBlock. All 1004 /// successors of VPBB, if any, are rewired to the new VPIRBasicBlock. 1005 static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) { 1006 VPIRBasicBlock *IRVPBB = VPIRBasicBlock::fromBasicBlock(IRBB); 1007 for (auto &R : make_early_inc_range(*VPBB)) { 1008 assert(!R.isPhi() && "Tried to move phi recipe to end of block"); 1009 R.moveBefore(*IRVPBB, IRVPBB->end()); 1010 } 1011 1012 VPBlockUtils::reassociateBlocks(VPBB, IRVPBB); 1013 1014 delete VPBB; 1015 } 1016 1017 /// Generate the code inside the preheader and body of the vectorized loop. 1018 /// Assumes a single pre-header basic-block was created for this. Introduce 1019 /// additional basic-blocks as needed, and fill them all. 1020 void VPlan::execute(VPTransformState *State) { 1021 // Initialize CFG state. 1022 State->CFG.PrevVPBB = nullptr; 1023 State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor(); 1024 BasicBlock *VectorPreHeader = State->CFG.PrevBB; 1025 State->Builder.SetInsertPoint(VectorPreHeader->getTerminator()); 1026 1027 // Disconnect VectorPreHeader from ExitBB in both the CFG and DT. 1028 cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr); 1029 State->CFG.DTU.applyUpdates( 1030 {{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}}); 1031 1032 // Replace regular VPBB's for the middle and scalar preheader blocks with 1033 // VPIRBasicBlocks wrapping their IR blocks. The IR blocks are created during 1034 // skeleton creation, so we can only create the VPIRBasicBlocks now during 1035 // VPlan execution rather than earlier during VPlan construction. 1036 BasicBlock *MiddleBB = State->CFG.ExitBB; 1037 VPBasicBlock *MiddleVPBB = 1038 cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor()); 1039 BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor(); 1040 replaceVPBBWithIRVPBB(getScalarPreheader(), ScalarPh); 1041 replaceVPBBWithIRVPBB(MiddleVPBB, MiddleBB); 1042 1043 // Disconnect the middle block from its single successor (the scalar loop 1044 // header) in both the CFG and DT. The branch will be recreated during VPlan 1045 // execution. 1046 auto *BrInst = new UnreachableInst(MiddleBB->getContext()); 1047 BrInst->insertBefore(MiddleBB->getTerminator()); 1048 MiddleBB->getTerminator()->eraseFromParent(); 1049 State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}}); 1050 // Disconnect scalar preheader and scalar header, as the dominator tree edge will be updated as part of VPlan execution. This allows keeping the DTU logic generic during VPlan execution. 1051 State->CFG.DTU.applyUpdates( 1052 {{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}}); 1053 1054 // Generate code in the loop pre-header and body. 1055 for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) 1056 Block->execute(State); 1057 1058 VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock(); 1059 BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB]; 1060 1061 // Fix the latch value of canonical, reduction and first-order recurrences 1062 // phis in the vector loop. 1063 VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); 1064 for (VPRecipeBase &R : Header->phis()) { 1065 // Skip phi-like recipes that generate their backedege values themselves. 1066 if (isa<VPWidenPHIRecipe>(&R)) 1067 continue; 1068 1069 if (isa<VPWidenPointerInductionRecipe>(&R) || 1070 isa<VPWidenIntOrFpInductionRecipe>(&R)) { 1071 PHINode *Phi = nullptr; 1072 if (isa<VPWidenIntOrFpInductionRecipe>(&R)) { 1073 Phi = cast<PHINode>(State->get(R.getVPSingleValue())); 1074 } else { 1075 auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R); 1076 assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) && 1077 "recipe generating only scalars should have been replaced"); 1078 auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi)); 1079 Phi = cast<PHINode>(GEP->getPointerOperand()); 1080 } 1081 1082 Phi->setIncomingBlock(1, VectorLatchBB); 1083 1084 // Move the last step to the end of the latch block. This ensures 1085 // consistent placement of all induction updates. 1086 Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1)); 1087 Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode()); 1088 1089 // Use the steps for the last part as backedge value for the induction. 1090 if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) 1091 Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand())); 1092 continue; 1093 } 1094 1095 auto *PhiR = cast<VPHeaderPHIRecipe>(&R); 1096 bool NeedsScalar = 1097 isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) || 1098 (isa<VPReductionPHIRecipe>(PhiR) && 1099 cast<VPReductionPHIRecipe>(PhiR)->isInLoop()); 1100 Value *Phi = State->get(PhiR, NeedsScalar); 1101 Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar); 1102 cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB); 1103 } 1104 1105 State->CFG.DTU.flush(); 1106 assert(State->CFG.DTU.getDomTree().verify( 1107 DominatorTree::VerificationLevel::Fast) && 1108 "DT not preserved correctly"); 1109 } 1110 1111 InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) { 1112 // For now only return the cost of the vector loop region, ignoring any other 1113 // blocks, like the preheader or middle blocks. 1114 return getVectorLoopRegion()->cost(VF, Ctx); 1115 } 1116 1117 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1118 void VPlan::printLiveIns(raw_ostream &O) const { 1119 VPSlotTracker SlotTracker(this); 1120 1121 if (VF.getNumUsers() > 0) { 1122 O << "\nLive-in "; 1123 VF.printAsOperand(O, SlotTracker); 1124 O << " = VF"; 1125 } 1126 1127 if (VFxUF.getNumUsers() > 0) { 1128 O << "\nLive-in "; 1129 VFxUF.printAsOperand(O, SlotTracker); 1130 O << " = VF * UF"; 1131 } 1132 1133 if (VectorTripCount.getNumUsers() > 0) { 1134 O << "\nLive-in "; 1135 VectorTripCount.printAsOperand(O, SlotTracker); 1136 O << " = vector-trip-count"; 1137 } 1138 1139 if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { 1140 O << "\nLive-in "; 1141 BackedgeTakenCount->printAsOperand(O, SlotTracker); 1142 O << " = backedge-taken count"; 1143 } 1144 1145 O << "\n"; 1146 if (TripCount->isLiveIn()) 1147 O << "Live-in "; 1148 TripCount->printAsOperand(O, SlotTracker); 1149 O << " = original trip-count"; 1150 O << "\n"; 1151 } 1152 1153 LLVM_DUMP_METHOD 1154 void VPlan::print(raw_ostream &O) const { 1155 VPSlotTracker SlotTracker(this); 1156 1157 O << "VPlan '" << getName() << "' {"; 1158 1159 printLiveIns(O); 1160 1161 if (!getPreheader()->empty()) { 1162 O << "\n"; 1163 getPreheader()->print(O, "", SlotTracker); 1164 } 1165 1166 for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) { 1167 O << '\n'; 1168 Block->print(O, "", SlotTracker); 1169 } 1170 1171 O << "}\n"; 1172 } 1173 1174 std::string VPlan::getName() const { 1175 std::string Out; 1176 raw_string_ostream RSO(Out); 1177 RSO << Name << " for "; 1178 if (!VFs.empty()) { 1179 RSO << "VF={" << VFs[0]; 1180 for (ElementCount VF : drop_begin(VFs)) 1181 RSO << "," << VF; 1182 RSO << "},"; 1183 } 1184 1185 if (UFs.empty()) { 1186 RSO << "UF>=1"; 1187 } else { 1188 RSO << "UF={" << UFs[0]; 1189 for (unsigned UF : drop_begin(UFs)) 1190 RSO << "," << UF; 1191 RSO << "}"; 1192 } 1193 1194 return Out; 1195 } 1196 1197 LLVM_DUMP_METHOD 1198 void VPlan::printDOT(raw_ostream &O) const { 1199 VPlanPrinter Printer(O, *this); 1200 Printer.dump(); 1201 } 1202 1203 LLVM_DUMP_METHOD 1204 void VPlan::dump() const { print(dbgs()); } 1205 #endif 1206 1207 static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, 1208 DenseMap<VPValue *, VPValue *> &Old2NewVPValues) { 1209 // Update the operands of all cloned recipes starting at NewEntry. This 1210 // traverses all reachable blocks. This is done in two steps, to handle cycles 1211 // in PHI recipes. 1212 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> 1213 OldDeepRPOT(Entry); 1214 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> 1215 NewDeepRPOT(NewEntry); 1216 // First, collect all mappings from old to new VPValues defined by cloned 1217 // recipes. 1218 for (const auto &[OldBB, NewBB] : 1219 zip(VPBlockUtils::blocksOnly<VPBasicBlock>(OldDeepRPOT), 1220 VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT))) { 1221 assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() && 1222 "blocks must have the same number of recipes"); 1223 for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) { 1224 assert(OldR.getNumOperands() == NewR.getNumOperands() && 1225 "recipes must have the same number of operands"); 1226 assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() && 1227 "recipes must define the same number of operands"); 1228 for (const auto &[OldV, NewV] : 1229 zip(OldR.definedValues(), NewR.definedValues())) 1230 Old2NewVPValues[OldV] = NewV; 1231 } 1232 } 1233 1234 // Update all operands to use cloned VPValues. 1235 for (VPBasicBlock *NewBB : 1236 VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT)) { 1237 for (VPRecipeBase &NewR : *NewBB) 1238 for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) { 1239 VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I)); 1240 NewR.setOperand(I, NewOp); 1241 } 1242 } 1243 } 1244 1245 VPlan *VPlan::duplicate() { 1246 // Clone blocks. 1247 VPBasicBlock *NewPreheader = Preheader->clone(); 1248 const auto &[NewEntry, __] = cloneFrom(Entry); 1249 1250 BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock(); 1251 VPIRBasicBlock *NewScalarHeader = cast<VPIRBasicBlock>(*find_if( 1252 vp_depth_first_shallow(NewEntry), [ScalarHeaderIRBB](VPBlockBase *VPB) { 1253 auto *VPIRBB = dyn_cast<VPIRBasicBlock>(VPB); 1254 return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB; 1255 })); 1256 // Create VPlan, clone live-ins and remap operands in the cloned blocks. 1257 auto *NewPlan = 1258 new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader); 1259 DenseMap<VPValue *, VPValue *> Old2NewVPValues; 1260 for (VPValue *OldLiveIn : VPLiveInsToFree) { 1261 Old2NewVPValues[OldLiveIn] = 1262 NewPlan->getOrAddLiveIn(OldLiveIn->getLiveInIRValue()); 1263 } 1264 Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount; 1265 Old2NewVPValues[&VF] = &NewPlan->VF; 1266 Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF; 1267 if (BackedgeTakenCount) { 1268 NewPlan->BackedgeTakenCount = new VPValue(); 1269 Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount; 1270 } 1271 assert(TripCount && "trip count must be set"); 1272 if (TripCount->isLiveIn()) 1273 Old2NewVPValues[TripCount] = 1274 NewPlan->getOrAddLiveIn(TripCount->getLiveInIRValue()); 1275 // else NewTripCount will be created and inserted into Old2NewVPValues when 1276 // TripCount is cloned. In any case NewPlan->TripCount is updated below. 1277 1278 remapOperands(Preheader, NewPreheader, Old2NewVPValues); 1279 remapOperands(Entry, NewEntry, Old2NewVPValues); 1280 1281 // Initialize remaining fields of cloned VPlan. 1282 NewPlan->VFs = VFs; 1283 NewPlan->UFs = UFs; 1284 // TODO: Adjust names. 1285 NewPlan->Name = Name; 1286 assert(Old2NewVPValues.contains(TripCount) && 1287 "TripCount must have been added to Old2NewVPValues"); 1288 NewPlan->TripCount = Old2NewVPValues[TripCount]; 1289 return NewPlan; 1290 } 1291 1292 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1293 1294 Twine VPlanPrinter::getUID(const VPBlockBase *Block) { 1295 return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") + 1296 Twine(getOrCreateBID(Block)); 1297 } 1298 1299 Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) { 1300 const std::string &Name = Block->getName(); 1301 if (!Name.empty()) 1302 return Name; 1303 return "VPB" + Twine(getOrCreateBID(Block)); 1304 } 1305 1306 void VPlanPrinter::dump() { 1307 Depth = 1; 1308 bumpIndent(0); 1309 OS << "digraph VPlan {\n"; 1310 OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan"; 1311 if (!Plan.getName().empty()) 1312 OS << "\\n" << DOT::EscapeString(Plan.getName()); 1313 1314 { 1315 // Print live-ins. 1316 std::string Str; 1317 raw_string_ostream SS(Str); 1318 Plan.printLiveIns(SS); 1319 SmallVector<StringRef, 0> Lines; 1320 StringRef(Str).rtrim('\n').split(Lines, "\n"); 1321 for (auto Line : Lines) 1322 OS << DOT::EscapeString(Line.str()) << "\\n"; 1323 } 1324 1325 OS << "\"]\n"; 1326 OS << "node [shape=rect, fontname=Courier, fontsize=30]\n"; 1327 OS << "edge [fontname=Courier, fontsize=30]\n"; 1328 OS << "compound=true\n"; 1329 1330 dumpBlock(Plan.getPreheader()); 1331 1332 for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry())) 1333 dumpBlock(Block); 1334 1335 OS << "}\n"; 1336 } 1337 1338 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { 1339 if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block)) 1340 dumpBasicBlock(BasicBlock); 1341 else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 1342 dumpRegion(Region); 1343 else 1344 llvm_unreachable("Unsupported kind of VPBlock."); 1345 } 1346 1347 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, 1348 bool Hidden, const Twine &Label) { 1349 // Due to "dot" we print an edge between two regions as an edge between the 1350 // exiting basic block and the entry basic of the respective regions. 1351 const VPBlockBase *Tail = From->getExitingBasicBlock(); 1352 const VPBlockBase *Head = To->getEntryBasicBlock(); 1353 OS << Indent << getUID(Tail) << " -> " << getUID(Head); 1354 OS << " [ label=\"" << Label << '\"'; 1355 if (Tail != From) 1356 OS << " ltail=" << getUID(From); 1357 if (Head != To) 1358 OS << " lhead=" << getUID(To); 1359 if (Hidden) 1360 OS << "; splines=none"; 1361 OS << "]\n"; 1362 } 1363 1364 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) { 1365 auto &Successors = Block->getSuccessors(); 1366 if (Successors.size() == 1) 1367 drawEdge(Block, Successors.front(), false, ""); 1368 else if (Successors.size() == 2) { 1369 drawEdge(Block, Successors.front(), false, "T"); 1370 drawEdge(Block, Successors.back(), false, "F"); 1371 } else { 1372 unsigned SuccessorNumber = 0; 1373 for (auto *Successor : Successors) 1374 drawEdge(Block, Successor, false, Twine(SuccessorNumber++)); 1375 } 1376 } 1377 1378 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { 1379 // Implement dot-formatted dump by performing plain-text dump into the 1380 // temporary storage followed by some post-processing. 1381 OS << Indent << getUID(BasicBlock) << " [label =\n"; 1382 bumpIndent(1); 1383 std::string Str; 1384 raw_string_ostream SS(Str); 1385 // Use no indentation as we need to wrap the lines into quotes ourselves. 1386 BasicBlock->print(SS, "", SlotTracker); 1387 1388 // We need to process each line of the output separately, so split 1389 // single-string plain-text dump. 1390 SmallVector<StringRef, 0> Lines; 1391 StringRef(Str).rtrim('\n').split(Lines, "\n"); 1392 1393 auto EmitLine = [&](StringRef Line, StringRef Suffix) { 1394 OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix; 1395 }; 1396 1397 // Don't need the "+" after the last line. 1398 for (auto Line : make_range(Lines.begin(), Lines.end() - 1)) 1399 EmitLine(Line, " +\n"); 1400 EmitLine(Lines.back(), "\n"); 1401 1402 bumpIndent(-1); 1403 OS << Indent << "]\n"; 1404 1405 dumpEdges(BasicBlock); 1406 } 1407 1408 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { 1409 OS << Indent << "subgraph " << getUID(Region) << " {\n"; 1410 bumpIndent(1); 1411 OS << Indent << "fontname=Courier\n" 1412 << Indent << "label=\"" 1413 << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ") 1414 << DOT::EscapeString(Region->getName()) << "\"\n"; 1415 // Dump the blocks of the region. 1416 assert(Region->getEntry() && "Region contains no inner blocks."); 1417 for (const VPBlockBase *Block : vp_depth_first_shallow(Region->getEntry())) 1418 dumpBlock(Block); 1419 bumpIndent(-1); 1420 OS << Indent << "}\n"; 1421 dumpEdges(Region); 1422 } 1423 1424 void VPlanIngredient::print(raw_ostream &O) const { 1425 if (auto *Inst = dyn_cast<Instruction>(V)) { 1426 if (!Inst->getType()->isVoidTy()) { 1427 Inst->printAsOperand(O, false); 1428 O << " = "; 1429 } 1430 O << Inst->getOpcodeName() << " "; 1431 unsigned E = Inst->getNumOperands(); 1432 if (E > 0) { 1433 Inst->getOperand(0)->printAsOperand(O, false); 1434 for (unsigned I = 1; I < E; ++I) 1435 Inst->getOperand(I)->printAsOperand(O << ", ", false); 1436 } 1437 } else // !Inst 1438 V->printAsOperand(O, false); 1439 } 1440 1441 #endif 1442 1443 bool VPValue::isDefinedOutsideLoopRegions() const { 1444 return !hasDefiningRecipe() || 1445 !getDefiningRecipe()->getParent()->getEnclosingLoopRegion(); 1446 } 1447 1448 void VPValue::replaceAllUsesWith(VPValue *New) { 1449 replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; }); 1450 } 1451 1452 void VPValue::replaceUsesWithIf( 1453 VPValue *New, 1454 llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) { 1455 // Note that this early exit is required for correctness; the implementation 1456 // below relies on the number of users for this VPValue to decrease, which 1457 // isn't the case if this == New. 1458 if (this == New) 1459 return; 1460 1461 for (unsigned J = 0; J < getNumUsers();) { 1462 VPUser *User = Users[J]; 1463 bool RemovedUser = false; 1464 for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) { 1465 if (User->getOperand(I) != this || !ShouldReplace(*User, I)) 1466 continue; 1467 1468 RemovedUser = true; 1469 User->setOperand(I, New); 1470 } 1471 // If a user got removed after updating the current user, the next user to 1472 // update will be moved to the current position, so we only need to 1473 // increment the index if the number of users did not change. 1474 if (!RemovedUser) 1475 J++; 1476 } 1477 } 1478 1479 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1480 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const { 1481 OS << Tracker.getOrCreateName(this); 1482 } 1483 1484 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const { 1485 interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) { 1486 Op->printAsOperand(O, SlotTracker); 1487 }); 1488 } 1489 #endif 1490 1491 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region, 1492 Old2NewTy &Old2New, 1493 InterleavedAccessInfo &IAI) { 1494 ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> 1495 RPOT(Region->getEntry()); 1496 for (VPBlockBase *Base : RPOT) { 1497 visitBlock(Base, Old2New, IAI); 1498 } 1499 } 1500 1501 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, 1502 InterleavedAccessInfo &IAI) { 1503 if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) { 1504 for (VPRecipeBase &VPI : *VPBB) { 1505 if (isa<VPWidenPHIRecipe>(&VPI)) 1506 continue; 1507 assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions"); 1508 auto *VPInst = cast<VPInstruction>(&VPI); 1509 1510 auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue()); 1511 if (!Inst) 1512 continue; 1513 auto *IG = IAI.getInterleaveGroup(Inst); 1514 if (!IG) 1515 continue; 1516 1517 auto NewIGIter = Old2New.find(IG); 1518 if (NewIGIter == Old2New.end()) 1519 Old2New[IG] = new InterleaveGroup<VPInstruction>( 1520 IG->getFactor(), IG->isReverse(), IG->getAlign()); 1521 1522 if (Inst == IG->getInsertPos()) 1523 Old2New[IG]->setInsertPos(VPInst); 1524 1525 InterleaveGroupMap[VPInst] = Old2New[IG]; 1526 InterleaveGroupMap[VPInst]->insertMember( 1527 VPInst, IG->getIndex(Inst), 1528 Align(IG->isReverse() ? (-1) * int(IG->getFactor()) 1529 : IG->getFactor())); 1530 } 1531 } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) 1532 visitRegion(Region, Old2New, IAI); 1533 else 1534 llvm_unreachable("Unsupported kind of VPBlock."); 1535 } 1536 1537 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan, 1538 InterleavedAccessInfo &IAI) { 1539 Old2NewTy Old2New; 1540 visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI); 1541 } 1542 1543 void VPSlotTracker::assignName(const VPValue *V) { 1544 assert(!VPValue2Name.contains(V) && "VPValue already has a name!"); 1545 auto *UV = V->getUnderlyingValue(); 1546 auto *VPI = dyn_cast_or_null<VPInstruction>(V->getDefiningRecipe()); 1547 if (!UV && !(VPI && !VPI->getName().empty())) { 1548 VPValue2Name[V] = (Twine("vp<%") + Twine(NextSlot) + ">").str(); 1549 NextSlot++; 1550 return; 1551 } 1552 1553 // Use the name of the underlying Value, wrapped in "ir<>", and versioned by 1554 // appending ".Number" to the name if there are multiple uses. 1555 std::string Name; 1556 if (UV) { 1557 raw_string_ostream S(Name); 1558 UV->printAsOperand(S, false); 1559 } else 1560 Name = VPI->getName(); 1561 1562 assert(!Name.empty() && "Name cannot be empty."); 1563 StringRef Prefix = UV ? "ir<" : "vp<%"; 1564 std::string BaseName = (Twine(Prefix) + Name + Twine(">")).str(); 1565 1566 // First assign the base name for V. 1567 const auto &[A, _] = VPValue2Name.insert({V, BaseName}); 1568 // Integer or FP constants with different types will result in he same string 1569 // due to stripping types. 1570 if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(UV)) 1571 return; 1572 1573 // If it is already used by C > 0 other VPValues, increase the version counter 1574 // C and use it for V. 1575 const auto &[C, UseInserted] = BaseName2Version.insert({BaseName, 0}); 1576 if (!UseInserted) { 1577 C->second++; 1578 A->second = (BaseName + Twine(".") + Twine(C->second)).str(); 1579 } 1580 } 1581 1582 void VPSlotTracker::assignNames(const VPlan &Plan) { 1583 if (Plan.VF.getNumUsers() > 0) 1584 assignName(&Plan.VF); 1585 if (Plan.VFxUF.getNumUsers() > 0) 1586 assignName(&Plan.VFxUF); 1587 assignName(&Plan.VectorTripCount); 1588 if (Plan.BackedgeTakenCount) 1589 assignName(Plan.BackedgeTakenCount); 1590 for (VPValue *LI : Plan.VPLiveInsToFree) 1591 assignName(LI); 1592 assignNames(Plan.getPreheader()); 1593 1594 ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>> 1595 RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry())); 1596 for (const VPBasicBlock *VPBB : 1597 VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT)) 1598 assignNames(VPBB); 1599 } 1600 1601 void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) { 1602 for (const VPRecipeBase &Recipe : *VPBB) 1603 for (VPValue *Def : Recipe.definedValues()) 1604 assignName(Def); 1605 } 1606 1607 std::string VPSlotTracker::getOrCreateName(const VPValue *V) const { 1608 std::string Name = VPValue2Name.lookup(V); 1609 if (!Name.empty()) 1610 return Name; 1611 1612 // If no name was assigned, no VPlan was provided when creating the slot 1613 // tracker or it is not reachable from the provided VPlan. This can happen, 1614 // e.g. when trying to print a recipe that has not been inserted into a VPlan 1615 // in a debugger. 1616 // TODO: Update VPSlotTracker constructor to assign names to recipes & 1617 // VPValues not associated with a VPlan, instead of constructing names ad-hoc 1618 // here. 1619 const VPRecipeBase *DefR = V->getDefiningRecipe(); 1620 (void)DefR; 1621 assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) && 1622 "VPValue defined by a recipe in a VPlan?"); 1623 1624 // Use the underlying value's name, if there is one. 1625 if (auto *UV = V->getUnderlyingValue()) { 1626 std::string Name; 1627 raw_string_ostream S(Name); 1628 UV->printAsOperand(S, false); 1629 return (Twine("ir<") + Name + ">").str(); 1630 } 1631 1632 return "<badref>"; 1633 } 1634 1635 bool LoopVectorizationPlanner::getDecisionAndClampRange( 1636 const std::function<bool(ElementCount)> &Predicate, VFRange &Range) { 1637 assert(!Range.isEmpty() && "Trying to test an empty VF range."); 1638 bool PredicateAtRangeStart = Predicate(Range.Start); 1639 1640 for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End)) 1641 if (Predicate(TmpVF) != PredicateAtRangeStart) { 1642 Range.End = TmpVF; 1643 break; 1644 } 1645 1646 return PredicateAtRangeStart; 1647 } 1648 1649 /// Build VPlans for the full range of feasible VF's = {\p MinVF, 2 * \p MinVF, 1650 /// 4 * \p MinVF, ..., \p MaxVF} by repeatedly building a VPlan for a sub-range 1651 /// of VF's starting at a given VF and extending it as much as possible. Each 1652 /// vectorization decision can potentially shorten this sub-range during 1653 /// buildVPlan(). 1654 void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF, 1655 ElementCount MaxVF) { 1656 auto MaxVFTimes2 = MaxVF * 2; 1657 for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) { 1658 VFRange SubRange = {VF, MaxVFTimes2}; 1659 auto Plan = buildVPlan(SubRange); 1660 VPlanTransforms::optimize(*Plan); 1661 VPlans.push_back(std::move(Plan)); 1662 VF = SubRange.End; 1663 } 1664 } 1665 1666 VPlan &LoopVectorizationPlanner::getPlanFor(ElementCount VF) const { 1667 assert(count_if(VPlans, 1668 [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) == 1669 1 && 1670 "Multiple VPlans for VF."); 1671 1672 for (const VPlanPtr &Plan : VPlans) { 1673 if (Plan->hasVF(VF)) 1674 return *Plan.get(); 1675 } 1676 llvm_unreachable("No plan found!"); 1677 } 1678 1679 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1680 void LoopVectorizationPlanner::printPlans(raw_ostream &O) { 1681 if (VPlans.empty()) { 1682 O << "LV: No VPlans built.\n"; 1683 return; 1684 } 1685 for (const auto &Plan : VPlans) 1686 if (PrintVPlansInDotFormat) 1687 Plan->printDOT(O); 1688 else 1689 Plan->print(O); 1690 } 1691 #endif 1692 1693 TargetTransformInfo::OperandValueInfo 1694 VPCostContext::getOperandInfo(VPValue *V) const { 1695 if (!V->isLiveIn()) 1696 return {}; 1697 1698 return TTI::getOperandInfo(V->getLiveInIRValue()); 1699 } 1700