1 //===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file contains the declarations of the Vectorization Plan base classes: 11 /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual 12 /// VPBlockBase, together implementing a Hierarchical CFG; 13 /// 2. Specializations of GraphTraits that allow VPBlockBase graphs to be 14 /// treated as proper graphs for generic algorithms; 15 /// 3. Pure virtual VPRecipeBase serving as the base class for recipes contained 16 /// within VPBasicBlocks; 17 /// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned 18 /// instruction; 19 /// 5. The VPlan class holding a candidate for vectorization; 20 /// 6. The VPlanPrinter class providing a way to print a plan in dot format; 21 /// These are documented in docs/VectorizationPlan.rst. 22 // 23 //===----------------------------------------------------------------------===// 24 25 #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 26 #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 27 28 #include "VPlanLoopInfo.h" 29 #include "VPlanValue.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/DepthFirstIterator.h" 32 #include "llvm/ADT/GraphTraits.h" 33 #include "llvm/ADT/Optional.h" 34 #include "llvm/ADT/SmallBitVector.h" 35 #include "llvm/ADT/SmallPtrSet.h" 36 #include "llvm/ADT/SmallSet.h" 37 #include "llvm/ADT/SmallVector.h" 38 #include "llvm/ADT/Twine.h" 39 #include "llvm/ADT/ilist.h" 40 #include "llvm/ADT/ilist_node.h" 41 #include "llvm/Analysis/VectorUtils.h" 42 #include "llvm/IR/DebugLoc.h" 43 #include "llvm/IR/IRBuilder.h" 44 #include "llvm/Support/InstructionCost.h" 45 #include <algorithm> 46 #include <cassert> 47 #include <cstddef> 48 #include <map> 49 #include <string> 50 51 namespace llvm { 52 53 class BasicBlock; 54 class DominatorTree; 55 class InductionDescriptor; 56 class InnerLoopVectorizer; 57 class LoopInfo; 58 class raw_ostream; 59 class RecurrenceDescriptor; 60 class Value; 61 class VPBasicBlock; 62 class VPRegionBlock; 63 class VPlan; 64 class VPReplicateRecipe; 65 class VPlanSlp; 66 67 /// Returns a calculation for the total number of elements for a given \p VF. 68 /// For fixed width vectors this value is a constant, whereas for scalable 69 /// vectors it is an expression determined at runtime. 70 Value *getRuntimeVF(IRBuilder<> &B, Type *Ty, ElementCount VF); 71 72 /// A range of powers-of-2 vectorization factors with fixed start and 73 /// adjustable end. The range includes start and excludes end, e.g.,: 74 /// [1, 9) = {1, 2, 4, 8} 75 struct VFRange { 76 // A power of 2. 77 const ElementCount Start; 78 79 // Need not be a power of 2. If End <= Start range is empty. 80 ElementCount End; 81 82 bool isEmpty() const { 83 return End.getKnownMinValue() <= Start.getKnownMinValue(); 84 } 85 86 VFRange(const ElementCount &Start, const ElementCount &End) 87 : Start(Start), End(End) { 88 assert(Start.isScalable() == End.isScalable() && 89 "Both Start and End should have the same scalable flag"); 90 assert(isPowerOf2_32(Start.getKnownMinValue()) && 91 "Expected Start to be a power of 2"); 92 } 93 }; 94 95 using VPlanPtr = std::unique_ptr<VPlan>; 96 97 /// In what follows, the term "input IR" refers to code that is fed into the 98 /// vectorizer whereas the term "output IR" refers to code that is generated by 99 /// the vectorizer. 100 101 /// VPLane provides a way to access lanes in both fixed width and scalable 102 /// vectors, where for the latter the lane index sometimes needs calculating 103 /// as a runtime expression. 104 class VPLane { 105 public: 106 /// Kind describes how to interpret Lane. 107 enum class Kind : uint8_t { 108 /// For First, Lane is the index into the first N elements of a 109 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>. 110 First, 111 /// For ScalableLast, Lane is the offset from the start of the last 112 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For 113 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of 114 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc. 115 ScalableLast 116 }; 117 118 private: 119 /// in [0..VF) 120 unsigned Lane; 121 122 /// Indicates how the Lane should be interpreted, as described above. 123 Kind LaneKind; 124 125 public: 126 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {} 127 128 static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); } 129 130 static VPLane getLastLaneForVF(const ElementCount &VF) { 131 unsigned LaneOffset = VF.getKnownMinValue() - 1; 132 Kind LaneKind; 133 if (VF.isScalable()) 134 // In this case 'LaneOffset' refers to the offset from the start of the 135 // last subvector with VF.getKnownMinValue() elements. 136 LaneKind = VPLane::Kind::ScalableLast; 137 else 138 LaneKind = VPLane::Kind::First; 139 return VPLane(LaneOffset, LaneKind); 140 } 141 142 /// Returns a compile-time known value for the lane index and asserts if the 143 /// lane can only be calculated at runtime. 144 unsigned getKnownLane() const { 145 assert(LaneKind == Kind::First); 146 return Lane; 147 } 148 149 /// Returns an expression describing the lane index that can be used at 150 /// runtime. 151 Value *getAsRuntimeExpr(IRBuilder<> &Builder, const ElementCount &VF) const; 152 153 /// Returns the Kind of lane offset. 154 Kind getKind() const { return LaneKind; } 155 156 /// Returns true if this is the first lane of the whole vector. 157 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; } 158 159 /// Maps the lane to a cache index based on \p VF. 160 unsigned mapToCacheIndex(const ElementCount &VF) const { 161 switch (LaneKind) { 162 case VPLane::Kind::ScalableLast: 163 assert(VF.isScalable() && Lane < VF.getKnownMinValue()); 164 return VF.getKnownMinValue() + Lane; 165 default: 166 assert(Lane < VF.getKnownMinValue()); 167 return Lane; 168 } 169 } 170 171 /// Returns the maxmimum number of lanes that we are able to consider 172 /// caching for \p VF. 173 static unsigned getNumCachedLanes(const ElementCount &VF) { 174 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1); 175 } 176 }; 177 178 /// VPIteration represents a single point in the iteration space of the output 179 /// (vectorized and/or unrolled) IR loop. 180 struct VPIteration { 181 /// in [0..UF) 182 unsigned Part; 183 184 VPLane Lane; 185 186 VPIteration(unsigned Part, unsigned Lane, 187 VPLane::Kind Kind = VPLane::Kind::First) 188 : Part(Part), Lane(Lane, Kind) {} 189 190 VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {} 191 192 bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); } 193 }; 194 195 /// VPTransformState holds information passed down when "executing" a VPlan, 196 /// needed for generating the output IR. 197 struct VPTransformState { 198 VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, 199 DominatorTree *DT, IRBuilder<> &Builder, 200 InnerLoopVectorizer *ILV, VPlan *Plan) 201 : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder), ILV(ILV), 202 Plan(Plan) {} 203 204 /// The chosen Vectorization and Unroll Factors of the loop being vectorized. 205 ElementCount VF; 206 unsigned UF; 207 208 /// Hold the indices to generate specific scalar instructions. Null indicates 209 /// that all instances are to be generated, using either scalar or vector 210 /// instructions. 211 Optional<VPIteration> Instance; 212 213 struct DataState { 214 /// A type for vectorized values in the new loop. Each value from the 215 /// original loop, when vectorized, is represented by UF vector values in 216 /// the new unrolled loop, where UF is the unroll factor. 217 typedef SmallVector<Value *, 2> PerPartValuesTy; 218 219 DenseMap<VPValue *, PerPartValuesTy> PerPartOutput; 220 221 using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>; 222 DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars; 223 } Data; 224 225 /// Get the generated Value for a given VPValue and a given Part. Note that 226 /// as some Defs are still created by ILV and managed in its ValueMap, this 227 /// method will delegate the call to ILV in such cases in order to provide 228 /// callers a consistent API. 229 /// \see set. 230 Value *get(VPValue *Def, unsigned Part); 231 232 /// Get the generated Value for a given VPValue and given Part and Lane. 233 Value *get(VPValue *Def, const VPIteration &Instance); 234 235 bool hasVectorValue(VPValue *Def, unsigned Part) { 236 auto I = Data.PerPartOutput.find(Def); 237 return I != Data.PerPartOutput.end() && Part < I->second.size() && 238 I->second[Part]; 239 } 240 241 bool hasAnyVectorValue(VPValue *Def) const { 242 return Data.PerPartOutput.find(Def) != Data.PerPartOutput.end(); 243 } 244 245 bool hasScalarValue(VPValue *Def, VPIteration Instance) { 246 auto I = Data.PerPartScalars.find(Def); 247 if (I == Data.PerPartScalars.end()) 248 return false; 249 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); 250 return Instance.Part < I->second.size() && 251 CacheIdx < I->second[Instance.Part].size() && 252 I->second[Instance.Part][CacheIdx]; 253 } 254 255 /// Set the generated Value for a given VPValue and a given Part. 256 void set(VPValue *Def, Value *V, unsigned Part) { 257 if (!Data.PerPartOutput.count(Def)) { 258 DataState::PerPartValuesTy Entry(UF); 259 Data.PerPartOutput[Def] = Entry; 260 } 261 Data.PerPartOutput[Def][Part] = V; 262 } 263 /// Reset an existing vector value for \p Def and a given \p Part. 264 void reset(VPValue *Def, Value *V, unsigned Part) { 265 auto Iter = Data.PerPartOutput.find(Def); 266 assert(Iter != Data.PerPartOutput.end() && 267 "need to overwrite existing value"); 268 Iter->second[Part] = V; 269 } 270 271 /// Set the generated scalar \p V for \p Def and the given \p Instance. 272 void set(VPValue *Def, Value *V, const VPIteration &Instance) { 273 auto Iter = Data.PerPartScalars.insert({Def, {}}); 274 auto &PerPartVec = Iter.first->second; 275 while (PerPartVec.size() <= Instance.Part) 276 PerPartVec.emplace_back(); 277 auto &Scalars = PerPartVec[Instance.Part]; 278 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); 279 while (Scalars.size() <= CacheIdx) 280 Scalars.push_back(nullptr); 281 assert(!Scalars[CacheIdx] && "should overwrite existing value"); 282 Scalars[CacheIdx] = V; 283 } 284 285 /// Reset an existing scalar value for \p Def and a given \p Instance. 286 void reset(VPValue *Def, Value *V, const VPIteration &Instance) { 287 auto Iter = Data.PerPartScalars.find(Def); 288 assert(Iter != Data.PerPartScalars.end() && 289 "need to overwrite existing value"); 290 assert(Instance.Part < Iter->second.size() && 291 "need to overwrite existing value"); 292 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF); 293 assert(CacheIdx < Iter->second[Instance.Part].size() && 294 "need to overwrite existing value"); 295 Iter->second[Instance.Part][CacheIdx] = V; 296 } 297 298 /// Hold state information used when constructing the CFG of the output IR, 299 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks. 300 struct CFGState { 301 /// The previous VPBasicBlock visited. Initially set to null. 302 VPBasicBlock *PrevVPBB = nullptr; 303 304 /// The previous IR BasicBlock created or used. Initially set to the new 305 /// header BasicBlock. 306 BasicBlock *PrevBB = nullptr; 307 308 /// The last IR BasicBlock in the output IR. Set to the new latch 309 /// BasicBlock, used for placing the newly created BasicBlocks. 310 BasicBlock *LastBB = nullptr; 311 312 /// The IR BasicBlock that is the preheader of the vector loop in the output 313 /// IR. 314 /// FIXME: The vector preheader should also be modeled in VPlan, so any code 315 /// that needs to be added to the preheader gets directly generated by 316 /// VPlan. There should be no need to manage a pointer to the IR BasicBlock. 317 BasicBlock *VectorPreHeader = nullptr; 318 319 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case 320 /// of replication, maps the BasicBlock of the last replica created. 321 SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB; 322 323 /// Vector of VPBasicBlocks whose terminator instruction needs to be fixed 324 /// up at the end of vector code generation. 325 SmallVector<VPBasicBlock *, 8> VPBBsToFix; 326 327 CFGState() = default; 328 } CFG; 329 330 /// Hold a pointer to LoopInfo to register new basic blocks in the loop. 331 LoopInfo *LI; 332 333 /// Hold a pointer to Dominator Tree to register new basic blocks in the loop. 334 DominatorTree *DT; 335 336 /// Hold a reference to the IRBuilder used to generate output IR code. 337 IRBuilder<> &Builder; 338 339 VPValue2ValueTy VPValue2Value; 340 341 /// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF). 342 Value *CanonicalIV = nullptr; 343 344 /// Hold the trip count of the scalar loop. 345 Value *TripCount = nullptr; 346 347 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods. 348 InnerLoopVectorizer *ILV; 349 350 /// Pointer to the VPlan code is generated for. 351 VPlan *Plan; 352 353 /// Holds recipes that may generate a poison value that is used after 354 /// vectorization, even when their operands are not poison. 355 SmallPtrSet<VPRecipeBase *, 16> MayGeneratePoisonRecipes; 356 }; 357 358 /// VPUsers instance used by VPBlockBase to manage CondBit and the block 359 /// predicate. Currently VPBlockUsers are used in VPBlockBase for historical 360 /// reasons, but in the future the only VPUsers should either be recipes or 361 /// live-outs.VPBlockBase uses. 362 struct VPBlockUser : public VPUser { 363 VPBlockUser() : VPUser({}, VPUserID::Block) {} 364 365 VPValue *getSingleOperandOrNull() { 366 if (getNumOperands() == 1) 367 return getOperand(0); 368 369 return nullptr; 370 } 371 const VPValue *getSingleOperandOrNull() const { 372 if (getNumOperands() == 1) 373 return getOperand(0); 374 375 return nullptr; 376 } 377 378 void resetSingleOpUser(VPValue *NewVal) { 379 assert(getNumOperands() <= 1 && "Didn't expect more than one operand!"); 380 if (!NewVal) { 381 if (getNumOperands() == 1) 382 removeLastOperand(); 383 return; 384 } 385 386 if (getNumOperands() == 1) 387 setOperand(0, NewVal); 388 else 389 addOperand(NewVal); 390 } 391 }; 392 393 /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph. 394 /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock. 395 class VPBlockBase { 396 friend class VPBlockUtils; 397 398 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). 399 400 /// An optional name for the block. 401 std::string Name; 402 403 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if 404 /// it is a topmost VPBlockBase. 405 VPRegionBlock *Parent = nullptr; 406 407 /// List of predecessor blocks. 408 SmallVector<VPBlockBase *, 1> Predecessors; 409 410 /// List of successor blocks. 411 SmallVector<VPBlockBase *, 1> Successors; 412 413 /// Successor selector managed by a VPUser. For blocks with zero or one 414 /// successors, there is no operand. Otherwise there is exactly one operand 415 /// which is the branch condition. 416 VPBlockUser CondBitUser; 417 418 /// If the block is predicated, its predicate is stored as an operand of this 419 /// VPUser to maintain the def-use relations. Otherwise there is no operand 420 /// here. 421 VPBlockUser PredicateUser; 422 423 /// VPlan containing the block. Can only be set on the entry block of the 424 /// plan. 425 VPlan *Plan = nullptr; 426 427 /// Add \p Successor as the last successor to this block. 428 void appendSuccessor(VPBlockBase *Successor) { 429 assert(Successor && "Cannot add nullptr successor!"); 430 Successors.push_back(Successor); 431 } 432 433 /// Add \p Predecessor as the last predecessor to this block. 434 void appendPredecessor(VPBlockBase *Predecessor) { 435 assert(Predecessor && "Cannot add nullptr predecessor!"); 436 Predecessors.push_back(Predecessor); 437 } 438 439 /// Remove \p Predecessor from the predecessors of this block. 440 void removePredecessor(VPBlockBase *Predecessor) { 441 auto Pos = find(Predecessors, Predecessor); 442 assert(Pos && "Predecessor does not exist"); 443 Predecessors.erase(Pos); 444 } 445 446 /// Remove \p Successor from the successors of this block. 447 void removeSuccessor(VPBlockBase *Successor) { 448 auto Pos = find(Successors, Successor); 449 assert(Pos && "Successor does not exist"); 450 Successors.erase(Pos); 451 } 452 453 protected: 454 VPBlockBase(const unsigned char SC, const std::string &N) 455 : SubclassID(SC), Name(N) {} 456 457 public: 458 /// An enumeration for keeping track of the concrete subclass of VPBlockBase 459 /// that are actually instantiated. Values of this enumeration are kept in the 460 /// SubclassID field of the VPBlockBase objects. They are used for concrete 461 /// type identification. 462 using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC }; 463 464 using VPBlocksTy = SmallVectorImpl<VPBlockBase *>; 465 466 virtual ~VPBlockBase() = default; 467 468 const std::string &getName() const { return Name; } 469 470 void setName(const Twine &newName) { Name = newName.str(); } 471 472 /// \return an ID for the concrete type of this object. 473 /// This is used to implement the classof checks. This should not be used 474 /// for any other purpose, as the values may change as LLVM evolves. 475 unsigned getVPBlockID() const { return SubclassID; } 476 477 VPRegionBlock *getParent() { return Parent; } 478 const VPRegionBlock *getParent() const { return Parent; } 479 480 /// \return A pointer to the plan containing the current block. 481 VPlan *getPlan(); 482 const VPlan *getPlan() const; 483 484 /// Sets the pointer of the plan containing the block. The block must be the 485 /// entry block into the VPlan. 486 void setPlan(VPlan *ParentPlan); 487 488 void setParent(VPRegionBlock *P) { Parent = P; } 489 490 /// \return the VPBasicBlock that is the entry of this VPBlockBase, 491 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this 492 /// VPBlockBase is a VPBasicBlock, it is returned. 493 const VPBasicBlock *getEntryBasicBlock() const; 494 VPBasicBlock *getEntryBasicBlock(); 495 496 /// \return the VPBasicBlock that is the exit of this VPBlockBase, 497 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this 498 /// VPBlockBase is a VPBasicBlock, it is returned. 499 const VPBasicBlock *getExitBasicBlock() const; 500 VPBasicBlock *getExitBasicBlock(); 501 502 const VPBlocksTy &getSuccessors() const { return Successors; } 503 VPBlocksTy &getSuccessors() { return Successors; } 504 505 iterator_range<VPBlockBase **> successors() { return Successors; } 506 507 const VPBlocksTy &getPredecessors() const { return Predecessors; } 508 VPBlocksTy &getPredecessors() { return Predecessors; } 509 510 /// \return the successor of this VPBlockBase if it has a single successor. 511 /// Otherwise return a null pointer. 512 VPBlockBase *getSingleSuccessor() const { 513 return (Successors.size() == 1 ? *Successors.begin() : nullptr); 514 } 515 516 /// \return the predecessor of this VPBlockBase if it has a single 517 /// predecessor. Otherwise return a null pointer. 518 VPBlockBase *getSinglePredecessor() const { 519 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr); 520 } 521 522 size_t getNumSuccessors() const { return Successors.size(); } 523 size_t getNumPredecessors() const { return Predecessors.size(); } 524 525 /// An Enclosing Block of a block B is any block containing B, including B 526 /// itself. \return the closest enclosing block starting from "this", which 527 /// has successors. \return the root enclosing block if all enclosing blocks 528 /// have no successors. 529 VPBlockBase *getEnclosingBlockWithSuccessors(); 530 531 /// \return the closest enclosing block starting from "this", which has 532 /// predecessors. \return the root enclosing block if all enclosing blocks 533 /// have no predecessors. 534 VPBlockBase *getEnclosingBlockWithPredecessors(); 535 536 /// \return the successors either attached directly to this VPBlockBase or, if 537 /// this VPBlockBase is the exit block of a VPRegionBlock and has no 538 /// successors of its own, search recursively for the first enclosing 539 /// VPRegionBlock that has successors and return them. If no such 540 /// VPRegionBlock exists, return the (empty) successors of the topmost 541 /// VPBlockBase reached. 542 const VPBlocksTy &getHierarchicalSuccessors() { 543 return getEnclosingBlockWithSuccessors()->getSuccessors(); 544 } 545 546 /// \return the hierarchical successor of this VPBlockBase if it has a single 547 /// hierarchical successor. Otherwise return a null pointer. 548 VPBlockBase *getSingleHierarchicalSuccessor() { 549 return getEnclosingBlockWithSuccessors()->getSingleSuccessor(); 550 } 551 552 /// \return the predecessors either attached directly to this VPBlockBase or, 553 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no 554 /// predecessors of its own, search recursively for the first enclosing 555 /// VPRegionBlock that has predecessors and return them. If no such 556 /// VPRegionBlock exists, return the (empty) predecessors of the topmost 557 /// VPBlockBase reached. 558 const VPBlocksTy &getHierarchicalPredecessors() { 559 return getEnclosingBlockWithPredecessors()->getPredecessors(); 560 } 561 562 /// \return the hierarchical predecessor of this VPBlockBase if it has a 563 /// single hierarchical predecessor. Otherwise return a null pointer. 564 VPBlockBase *getSingleHierarchicalPredecessor() { 565 return getEnclosingBlockWithPredecessors()->getSinglePredecessor(); 566 } 567 568 /// \return the condition bit selecting the successor. 569 VPValue *getCondBit(); 570 /// \return the condition bit selecting the successor. 571 const VPValue *getCondBit() const; 572 /// Set the condition bit selecting the successor. 573 void setCondBit(VPValue *CV); 574 575 /// \return the block's predicate. 576 VPValue *getPredicate(); 577 /// \return the block's predicate. 578 const VPValue *getPredicate() const; 579 /// Set the block's predicate. 580 void setPredicate(VPValue *Pred); 581 582 /// Set a given VPBlockBase \p Successor as the single successor of this 583 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor. 584 /// This VPBlockBase must have no successors. 585 void setOneSuccessor(VPBlockBase *Successor) { 586 assert(Successors.empty() && "Setting one successor when others exist."); 587 appendSuccessor(Successor); 588 } 589 590 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two 591 /// successors of this VPBlockBase. \p Condition is set as the successor 592 /// selector. This VPBlockBase is not added as predecessor of \p IfTrue or \p 593 /// IfFalse. This VPBlockBase must have no successors. 594 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse, 595 VPValue *Condition) { 596 assert(Successors.empty() && "Setting two successors when others exist."); 597 assert(Condition && "Setting two successors without condition!"); 598 setCondBit(Condition); 599 appendSuccessor(IfTrue); 600 appendSuccessor(IfFalse); 601 } 602 603 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase. 604 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added 605 /// as successor of any VPBasicBlock in \p NewPreds. 606 void setPredecessors(ArrayRef<VPBlockBase *> NewPreds) { 607 assert(Predecessors.empty() && "Block predecessors already set."); 608 for (auto *Pred : NewPreds) 609 appendPredecessor(Pred); 610 } 611 612 /// Remove all the predecessor of this block. 613 void clearPredecessors() { Predecessors.clear(); } 614 615 /// Remove all the successors of this block and set to null its condition bit 616 void clearSuccessors() { 617 Successors.clear(); 618 setCondBit(nullptr); 619 } 620 621 /// The method which generates the output IR that correspond to this 622 /// VPBlockBase, thereby "executing" the VPlan. 623 virtual void execute(struct VPTransformState *State) = 0; 624 625 /// Delete all blocks reachable from a given VPBlockBase, inclusive. 626 static void deleteCFG(VPBlockBase *Entry); 627 628 /// Return true if it is legal to hoist instructions into this block. 629 bool isLegalToHoistInto() { 630 // There are currently no constraints that prevent an instruction to be 631 // hoisted into a VPBlockBase. 632 return true; 633 } 634 635 /// Replace all operands of VPUsers in the block with \p NewValue and also 636 /// replaces all uses of VPValues defined in the block with NewValue. 637 virtual void dropAllReferences(VPValue *NewValue) = 0; 638 639 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 640 void printAsOperand(raw_ostream &OS, bool PrintType) const { 641 OS << getName(); 642 } 643 644 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines 645 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using 646 /// consequtive numbers. 647 /// 648 /// Note that the numbering is applied to the whole VPlan, so printing 649 /// individual blocks is consistent with the whole VPlan printing. 650 virtual void print(raw_ostream &O, const Twine &Indent, 651 VPSlotTracker &SlotTracker) const = 0; 652 653 /// Print plain-text dump of this VPlan to \p O. 654 void print(raw_ostream &O) const { 655 VPSlotTracker SlotTracker(getPlan()); 656 print(O, "", SlotTracker); 657 } 658 659 /// Print the successors of this block to \p O, prefixing all lines with \p 660 /// Indent. 661 void printSuccessors(raw_ostream &O, const Twine &Indent) const; 662 663 /// Dump this VPBlockBase to dbgs(). 664 LLVM_DUMP_METHOD void dump() const { print(dbgs()); } 665 #endif 666 }; 667 668 /// VPRecipeBase is a base class modeling a sequence of one or more output IR 669 /// instructions. VPRecipeBase owns the the VPValues it defines through VPDef 670 /// and is responsible for deleting its defined values. Single-value 671 /// VPRecipeBases that also inherit from VPValue must make sure to inherit from 672 /// VPRecipeBase before VPValue. 673 class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>, 674 public VPDef, 675 public VPUser { 676 friend VPBasicBlock; 677 friend class VPBlockUtils; 678 679 /// Each VPRecipe belongs to a single VPBasicBlock. 680 VPBasicBlock *Parent = nullptr; 681 682 public: 683 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands) 684 : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe) {} 685 686 template <typename IterT> 687 VPRecipeBase(const unsigned char SC, iterator_range<IterT> Operands) 688 : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe) {} 689 virtual ~VPRecipeBase() = default; 690 691 /// \return the VPBasicBlock which this VPRecipe belongs to. 692 VPBasicBlock *getParent() { return Parent; } 693 const VPBasicBlock *getParent() const { return Parent; } 694 695 /// The method which generates the output IR instructions that correspond to 696 /// this VPRecipe, thereby "executing" the VPlan. 697 virtual void execute(struct VPTransformState &State) = 0; 698 699 /// Insert an unlinked recipe into a basic block immediately before 700 /// the specified recipe. 701 void insertBefore(VPRecipeBase *InsertPos); 702 703 /// Insert an unlinked Recipe into a basic block immediately after 704 /// the specified Recipe. 705 void insertAfter(VPRecipeBase *InsertPos); 706 707 /// Unlink this recipe from its current VPBasicBlock and insert it into 708 /// the VPBasicBlock that MovePos lives in, right after MovePos. 709 void moveAfter(VPRecipeBase *MovePos); 710 711 /// Unlink this recipe and insert into BB before I. 712 /// 713 /// \pre I is a valid iterator into BB. 714 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I); 715 716 /// This method unlinks 'this' from the containing basic block, but does not 717 /// delete it. 718 void removeFromParent(); 719 720 /// This method unlinks 'this' from the containing basic block and deletes it. 721 /// 722 /// \returns an iterator pointing to the element after the erased one 723 iplist<VPRecipeBase>::iterator eraseFromParent(); 724 725 /// Returns the underlying instruction, if the recipe is a VPValue or nullptr 726 /// otherwise. 727 Instruction *getUnderlyingInstr() { 728 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()); 729 } 730 const Instruction *getUnderlyingInstr() const { 731 return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()); 732 } 733 734 /// Method to support type inquiry through isa, cast, and dyn_cast. 735 static inline bool classof(const VPDef *D) { 736 // All VPDefs are also VPRecipeBases. 737 return true; 738 } 739 740 static inline bool classof(const VPUser *U) { 741 return U->getVPUserID() == VPUser::VPUserID::Recipe; 742 } 743 744 /// Returns true if the recipe may have side-effects. 745 bool mayHaveSideEffects() const; 746 747 /// Returns true for PHI-like recipes. 748 bool isPhi() const { 749 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC; 750 } 751 752 /// Returns true if the recipe may read from memory. 753 bool mayReadFromMemory() const; 754 755 /// Returns true if the recipe may write to memory. 756 bool mayWriteToMemory() const; 757 758 /// Returns true if the recipe may read from or write to memory. 759 bool mayReadOrWriteMemory() const { 760 return mayReadFromMemory() || mayWriteToMemory(); 761 } 762 }; 763 764 inline bool VPUser::classof(const VPDef *Def) { 765 return Def->getVPDefID() == VPRecipeBase::VPInstructionSC || 766 Def->getVPDefID() == VPRecipeBase::VPWidenSC || 767 Def->getVPDefID() == VPRecipeBase::VPWidenCallSC || 768 Def->getVPDefID() == VPRecipeBase::VPWidenSelectSC || 769 Def->getVPDefID() == VPRecipeBase::VPWidenGEPSC || 770 Def->getVPDefID() == VPRecipeBase::VPBlendSC || 771 Def->getVPDefID() == VPRecipeBase::VPInterleaveSC || 772 Def->getVPDefID() == VPRecipeBase::VPReplicateSC || 773 Def->getVPDefID() == VPRecipeBase::VPReductionSC || 774 Def->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC || 775 Def->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC; 776 } 777 778 /// This is a concrete Recipe that models a single VPlan-level instruction. 779 /// While as any Recipe it may generate a sequence of IR instructions when 780 /// executed, these instructions would always form a single-def expression as 781 /// the VPInstruction is also a single def-use vertex. 782 class VPInstruction : public VPRecipeBase, public VPValue { 783 friend class VPlanSlp; 784 785 public: 786 /// VPlan opcodes, extending LLVM IR with idiomatics instructions. 787 enum { 788 FirstOrderRecurrenceSplice = 789 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous 790 // values of a first-order recurrence. 791 Not, 792 ICmpULE, 793 SLPLoad, 794 SLPStore, 795 ActiveLaneMask, 796 }; 797 798 private: 799 typedef unsigned char OpcodeTy; 800 OpcodeTy Opcode; 801 FastMathFlags FMF; 802 DebugLoc DL; 803 804 /// Utility method serving execute(): generates a single instance of the 805 /// modeled instruction. 806 void generateInstruction(VPTransformState &State, unsigned Part); 807 808 protected: 809 void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); } 810 811 public: 812 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL) 813 : VPRecipeBase(VPRecipeBase::VPInstructionSC, Operands), 814 VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode), 815 DL(DL) {} 816 817 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands, 818 DebugLoc DL = {}) 819 : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL) {} 820 821 /// Method to support type inquiry through isa, cast, and dyn_cast. 822 static inline bool classof(const VPValue *V) { 823 return V->getVPValueID() == VPValue::VPVInstructionSC; 824 } 825 826 VPInstruction *clone() const { 827 SmallVector<VPValue *, 2> Operands(operands()); 828 return new VPInstruction(Opcode, Operands, DL); 829 } 830 831 /// Method to support type inquiry through isa, cast, and dyn_cast. 832 static inline bool classof(const VPDef *R) { 833 return R->getVPDefID() == VPRecipeBase::VPInstructionSC; 834 } 835 836 unsigned getOpcode() const { return Opcode; } 837 838 /// Generate the instruction. 839 /// TODO: We currently execute only per-part unless a specific instance is 840 /// provided. 841 void execute(VPTransformState &State) override; 842 843 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 844 /// Print the VPInstruction to \p O. 845 void print(raw_ostream &O, const Twine &Indent, 846 VPSlotTracker &SlotTracker) const override; 847 848 /// Print the VPInstruction to dbgs() (for debugging). 849 LLVM_DUMP_METHOD void dump() const; 850 #endif 851 852 /// Return true if this instruction may modify memory. 853 bool mayWriteToMemory() const { 854 // TODO: we can use attributes of the called function to rule out memory 855 // modifications. 856 return Opcode == Instruction::Store || Opcode == Instruction::Call || 857 Opcode == Instruction::Invoke || Opcode == SLPStore; 858 } 859 860 bool hasResult() const { 861 // CallInst may or may not have a result, depending on the called function. 862 // Conservatively return calls have results for now. 863 switch (getOpcode()) { 864 case Instruction::Ret: 865 case Instruction::Br: 866 case Instruction::Store: 867 case Instruction::Switch: 868 case Instruction::IndirectBr: 869 case Instruction::Resume: 870 case Instruction::CatchRet: 871 case Instruction::Unreachable: 872 case Instruction::Fence: 873 case Instruction::AtomicRMW: 874 return false; 875 default: 876 return true; 877 } 878 } 879 880 /// Set the fast-math flags. 881 void setFastMathFlags(FastMathFlags FMFNew); 882 }; 883 884 /// VPWidenRecipe is a recipe for producing a copy of vector type its 885 /// ingredient. This recipe covers most of the traditional vectorization cases 886 /// where each ingredient transforms into a vectorized version of itself. 887 class VPWidenRecipe : public VPRecipeBase, public VPValue { 888 public: 889 template <typename IterT> 890 VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands) 891 : VPRecipeBase(VPRecipeBase::VPWidenSC, Operands), 892 VPValue(VPValue::VPVWidenSC, &I, this) {} 893 894 ~VPWidenRecipe() override = default; 895 896 /// Method to support type inquiry through isa, cast, and dyn_cast. 897 static inline bool classof(const VPDef *D) { 898 return D->getVPDefID() == VPRecipeBase::VPWidenSC; 899 } 900 static inline bool classof(const VPValue *V) { 901 return V->getVPValueID() == VPValue::VPVWidenSC; 902 } 903 904 /// Produce widened copies of all Ingredients. 905 void execute(VPTransformState &State) override; 906 907 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 908 /// Print the recipe. 909 void print(raw_ostream &O, const Twine &Indent, 910 VPSlotTracker &SlotTracker) const override; 911 #endif 912 }; 913 914 /// A recipe for widening Call instructions. 915 class VPWidenCallRecipe : public VPRecipeBase, public VPValue { 916 917 public: 918 template <typename IterT> 919 VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments) 920 : VPRecipeBase(VPRecipeBase::VPWidenCallSC, CallArguments), 921 VPValue(VPValue::VPVWidenCallSC, &I, this) {} 922 923 ~VPWidenCallRecipe() override = default; 924 925 /// Method to support type inquiry through isa, cast, and dyn_cast. 926 static inline bool classof(const VPDef *D) { 927 return D->getVPDefID() == VPRecipeBase::VPWidenCallSC; 928 } 929 930 /// Produce a widened version of the call instruction. 931 void execute(VPTransformState &State) override; 932 933 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 934 /// Print the recipe. 935 void print(raw_ostream &O, const Twine &Indent, 936 VPSlotTracker &SlotTracker) const override; 937 #endif 938 }; 939 940 /// A recipe for widening select instructions. 941 class VPWidenSelectRecipe : public VPRecipeBase, public VPValue { 942 943 /// Is the condition of the select loop invariant? 944 bool InvariantCond; 945 946 public: 947 template <typename IterT> 948 VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands, 949 bool InvariantCond) 950 : VPRecipeBase(VPRecipeBase::VPWidenSelectSC, Operands), 951 VPValue(VPValue::VPVWidenSelectSC, &I, this), 952 InvariantCond(InvariantCond) {} 953 954 ~VPWidenSelectRecipe() override = default; 955 956 /// Method to support type inquiry through isa, cast, and dyn_cast. 957 static inline bool classof(const VPDef *D) { 958 return D->getVPDefID() == VPRecipeBase::VPWidenSelectSC; 959 } 960 961 /// Produce a widened version of the select instruction. 962 void execute(VPTransformState &State) override; 963 964 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 965 /// Print the recipe. 966 void print(raw_ostream &O, const Twine &Indent, 967 VPSlotTracker &SlotTracker) const override; 968 #endif 969 }; 970 971 /// A recipe for handling GEP instructions. 972 class VPWidenGEPRecipe : public VPRecipeBase, public VPValue { 973 bool IsPtrLoopInvariant; 974 SmallBitVector IsIndexLoopInvariant; 975 976 public: 977 template <typename IterT> 978 VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands) 979 : VPRecipeBase(VPRecipeBase::VPWidenGEPSC, Operands), 980 VPValue(VPWidenGEPSC, GEP, this), 981 IsIndexLoopInvariant(GEP->getNumIndices(), false) {} 982 983 template <typename IterT> 984 VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands, 985 Loop *OrigLoop) 986 : VPRecipeBase(VPRecipeBase::VPWidenGEPSC, Operands), 987 VPValue(VPValue::VPVWidenGEPSC, GEP, this), 988 IsIndexLoopInvariant(GEP->getNumIndices(), false) { 989 IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand()); 990 for (auto Index : enumerate(GEP->indices())) 991 IsIndexLoopInvariant[Index.index()] = 992 OrigLoop->isLoopInvariant(Index.value().get()); 993 } 994 ~VPWidenGEPRecipe() override = default; 995 996 /// Method to support type inquiry through isa, cast, and dyn_cast. 997 static inline bool classof(const VPDef *D) { 998 return D->getVPDefID() == VPRecipeBase::VPWidenGEPSC; 999 } 1000 1001 /// Generate the gep nodes. 1002 void execute(VPTransformState &State) override; 1003 1004 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1005 /// Print the recipe. 1006 void print(raw_ostream &O, const Twine &Indent, 1007 VPSlotTracker &SlotTracker) const override; 1008 #endif 1009 }; 1010 1011 /// A recipe for handling phi nodes of integer and floating-point inductions, 1012 /// producing their vector and scalar values. 1013 class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue { 1014 PHINode *IV; 1015 const InductionDescriptor &IndDesc; 1016 1017 public: 1018 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, 1019 const InductionDescriptor &IndDesc) 1020 : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(IV, this), 1021 IV(IV), IndDesc(IndDesc) {} 1022 1023 VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, 1024 const InductionDescriptor &IndDesc, 1025 TruncInst *Trunc) 1026 : VPRecipeBase(VPWidenIntOrFpInductionSC, {Start}), VPValue(Trunc, this), 1027 IV(IV), IndDesc(IndDesc) {} 1028 1029 ~VPWidenIntOrFpInductionRecipe() override = default; 1030 1031 /// Method to support type inquiry through isa, cast, and dyn_cast. 1032 static inline bool classof(const VPDef *D) { 1033 return D->getVPDefID() == VPRecipeBase::VPWidenIntOrFpInductionSC; 1034 } 1035 1036 /// Generate the vectorized and scalarized versions of the phi node as 1037 /// needed by their users. 1038 void execute(VPTransformState &State) override; 1039 1040 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1041 /// Print the recipe. 1042 void print(raw_ostream &O, const Twine &Indent, 1043 VPSlotTracker &SlotTracker) const override; 1044 #endif 1045 1046 /// Returns the start value of the induction. 1047 VPValue *getStartValue() { return getOperand(0); } 1048 1049 /// Returns the first defined value as TruncInst, if it is one or nullptr 1050 /// otherwise. 1051 TruncInst *getTruncInst() { 1052 return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue()); 1053 } 1054 const TruncInst *getTruncInst() const { 1055 return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue()); 1056 } 1057 1058 /// Returns the induction descriptor for the recipe. 1059 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } 1060 }; 1061 1062 /// A recipe for handling first order recurrences and pointer inductions. For 1063 /// first-order recurrences, the start value is the first operand of the recipe 1064 /// and the incoming value from the backedge is the second operand. It also 1065 /// serves as base class for VPReductionPHIRecipe. In the VPlan native path, all 1066 /// incoming VPValues & VPBasicBlock pairs are managed in the recipe directly. 1067 class VPWidenPHIRecipe : public VPRecipeBase, public VPValue { 1068 /// List of incoming blocks. Only used in the VPlan native path. 1069 SmallVector<VPBasicBlock *, 2> IncomingBlocks; 1070 1071 protected: 1072 VPWidenPHIRecipe(unsigned char VPVID, unsigned char VPDefID, PHINode *Phi, 1073 VPValue *Start = nullptr) 1074 : VPRecipeBase(VPDefID, {}), VPValue(VPVID, Phi, this) { 1075 if (Start) 1076 addOperand(Start); 1077 } 1078 1079 public: 1080 /// Create a VPWidenPHIRecipe for \p Phi 1081 VPWidenPHIRecipe(PHINode *Phi) 1082 : VPWidenPHIRecipe(VPVWidenPHISC, VPWidenPHISC, Phi) {} 1083 1084 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start. 1085 VPWidenPHIRecipe(PHINode *Phi, VPValue &Start) : VPWidenPHIRecipe(Phi) { 1086 addOperand(&Start); 1087 } 1088 1089 ~VPWidenPHIRecipe() override = default; 1090 1091 /// Method to support type inquiry through isa, cast, and dyn_cast. 1092 static inline bool classof(const VPRecipeBase *B) { 1093 return B->getVPDefID() == VPRecipeBase::VPWidenPHISC || 1094 B->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC || 1095 B->getVPDefID() == VPRecipeBase::VPReductionPHISC; 1096 } 1097 static inline bool classof(const VPValue *V) { 1098 return V->getVPValueID() == VPValue::VPVWidenPHISC || 1099 V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC || 1100 V->getVPValueID() == VPValue::VPVReductionPHISC; 1101 } 1102 1103 /// Generate the phi/select nodes. 1104 void execute(VPTransformState &State) override; 1105 1106 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1107 /// Print the recipe. 1108 void print(raw_ostream &O, const Twine &Indent, 1109 VPSlotTracker &SlotTracker) const override; 1110 #endif 1111 1112 /// Returns the start value of the phi, if it is a reduction or first-order 1113 /// recurrence. 1114 VPValue *getStartValue() { 1115 return getNumOperands() == 0 ? nullptr : getOperand(0); 1116 } 1117 1118 /// Returns the incoming value from the loop backedge, if it is a reduction or 1119 /// first-order recurrence. 1120 VPValue *getBackedgeValue() { 1121 return getOperand(1); 1122 } 1123 1124 /// Returns the backedge value as a recipe. The backedge value is guaranteed 1125 /// to be a recipe. 1126 VPRecipeBase *getBackedgeRecipe() { 1127 return cast<VPRecipeBase>(getBackedgeValue()->getDef()); 1128 } 1129 1130 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi. 1131 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) { 1132 addOperand(IncomingV); 1133 IncomingBlocks.push_back(IncomingBlock); 1134 } 1135 1136 /// Returns the \p I th incoming VPValue. 1137 VPValue *getIncomingValue(unsigned I) { return getOperand(I); } 1138 1139 /// Returns the \p I th incoming VPBasicBlock. 1140 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; } 1141 }; 1142 1143 /// A recipe for handling first-order recurrence phis. The start value is the 1144 /// first operand of the recipe and the incoming value from the backedge is the 1145 /// second operand. 1146 struct VPFirstOrderRecurrencePHIRecipe : public VPWidenPHIRecipe { 1147 VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start) 1148 : VPWidenPHIRecipe(VPVFirstOrderRecurrencePHISC, 1149 VPFirstOrderRecurrencePHISC, Phi, &Start) {} 1150 1151 /// Method to support type inquiry through isa, cast, and dyn_cast. 1152 static inline bool classof(const VPRecipeBase *R) { 1153 return R->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC; 1154 } 1155 static inline bool classof(const VPWidenPHIRecipe *D) { 1156 return D->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC; 1157 } 1158 static inline bool classof(const VPValue *V) { 1159 return V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC; 1160 } 1161 1162 void execute(VPTransformState &State) override; 1163 1164 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1165 /// Print the recipe. 1166 void print(raw_ostream &O, const Twine &Indent, 1167 VPSlotTracker &SlotTracker) const override; 1168 #endif 1169 }; 1170 1171 /// A recipe for handling reduction phis. The start value is the first operand 1172 /// of the recipe and the incoming value from the backedge is the second 1173 /// operand. 1174 class VPReductionPHIRecipe : public VPWidenPHIRecipe { 1175 /// Descriptor for the reduction. 1176 const RecurrenceDescriptor &RdxDesc; 1177 1178 /// The phi is part of an in-loop reduction. 1179 bool IsInLoop; 1180 1181 /// The phi is part of an ordered reduction. Requires IsInLoop to be true. 1182 bool IsOrdered; 1183 1184 public: 1185 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p 1186 /// RdxDesc. 1187 VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, 1188 VPValue &Start, bool IsInLoop = false, 1189 bool IsOrdered = false) 1190 : VPWidenPHIRecipe(VPVReductionPHISC, VPReductionPHISC, Phi, &Start), 1191 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) { 1192 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop"); 1193 } 1194 1195 ~VPReductionPHIRecipe() override = default; 1196 1197 /// Method to support type inquiry through isa, cast, and dyn_cast. 1198 static inline bool classof(const VPRecipeBase *R) { 1199 return R->getVPDefID() == VPRecipeBase::VPReductionPHISC; 1200 } 1201 static inline bool classof(const VPValue *V) { 1202 return V->getVPValueID() == VPValue::VPVReductionPHISC; 1203 } 1204 static inline bool classof(const VPWidenPHIRecipe *R) { 1205 return R->getVPDefID() == VPRecipeBase::VPReductionPHISC; 1206 } 1207 1208 /// Generate the phi/select nodes. 1209 void execute(VPTransformState &State) override; 1210 1211 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1212 /// Print the recipe. 1213 void print(raw_ostream &O, const Twine &Indent, 1214 VPSlotTracker &SlotTracker) const override; 1215 #endif 1216 1217 const RecurrenceDescriptor &getRecurrenceDescriptor() const { 1218 return RdxDesc; 1219 } 1220 1221 /// Returns true, if the phi is part of an ordered reduction. 1222 bool isOrdered() const { return IsOrdered; } 1223 1224 /// Returns true, if the phi is part of an in-loop reduction. 1225 bool isInLoop() const { return IsInLoop; } 1226 }; 1227 1228 /// A recipe for vectorizing a phi-node as a sequence of mask-based select 1229 /// instructions. 1230 class VPBlendRecipe : public VPRecipeBase, public VPValue { 1231 PHINode *Phi; 1232 1233 public: 1234 /// The blend operation is a User of the incoming values and of their 1235 /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value 1236 /// might be incoming with a full mask for which there is no VPValue. 1237 VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands) 1238 : VPRecipeBase(VPBlendSC, Operands), 1239 VPValue(VPValue::VPVBlendSC, Phi, this), Phi(Phi) { 1240 assert(Operands.size() > 0 && 1241 ((Operands.size() == 1) || (Operands.size() % 2 == 0)) && 1242 "Expected either a single incoming value or a positive even number " 1243 "of operands"); 1244 } 1245 1246 /// Method to support type inquiry through isa, cast, and dyn_cast. 1247 static inline bool classof(const VPDef *D) { 1248 return D->getVPDefID() == VPRecipeBase::VPBlendSC; 1249 } 1250 1251 /// Return the number of incoming values, taking into account that a single 1252 /// incoming value has no mask. 1253 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; } 1254 1255 /// Return incoming value number \p Idx. 1256 VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); } 1257 1258 /// Return mask number \p Idx. 1259 VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); } 1260 1261 /// Generate the phi/select nodes. 1262 void execute(VPTransformState &State) override; 1263 1264 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1265 /// Print the recipe. 1266 void print(raw_ostream &O, const Twine &Indent, 1267 VPSlotTracker &SlotTracker) const override; 1268 #endif 1269 }; 1270 1271 /// VPInterleaveRecipe is a recipe for transforming an interleave group of load 1272 /// or stores into one wide load/store and shuffles. The first operand of a 1273 /// VPInterleave recipe is the address, followed by the stored values, followed 1274 /// by an optional mask. 1275 class VPInterleaveRecipe : public VPRecipeBase { 1276 const InterleaveGroup<Instruction> *IG; 1277 1278 bool HasMask = false; 1279 1280 public: 1281 VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr, 1282 ArrayRef<VPValue *> StoredValues, VPValue *Mask) 1283 : VPRecipeBase(VPInterleaveSC, {Addr}), IG(IG) { 1284 for (unsigned i = 0; i < IG->getFactor(); ++i) 1285 if (Instruction *I = IG->getMember(i)) { 1286 if (I->getType()->isVoidTy()) 1287 continue; 1288 new VPValue(I, this); 1289 } 1290 1291 for (auto *SV : StoredValues) 1292 addOperand(SV); 1293 if (Mask) { 1294 HasMask = true; 1295 addOperand(Mask); 1296 } 1297 } 1298 ~VPInterleaveRecipe() override = default; 1299 1300 /// Method to support type inquiry through isa, cast, and dyn_cast. 1301 static inline bool classof(const VPDef *D) { 1302 return D->getVPDefID() == VPRecipeBase::VPInterleaveSC; 1303 } 1304 1305 /// Return the address accessed by this recipe. 1306 VPValue *getAddr() const { 1307 return getOperand(0); // Address is the 1st, mandatory operand. 1308 } 1309 1310 /// Return the mask used by this recipe. Note that a full mask is represented 1311 /// by a nullptr. 1312 VPValue *getMask() const { 1313 // Mask is optional and therefore the last, currently 2nd operand. 1314 return HasMask ? getOperand(getNumOperands() - 1) : nullptr; 1315 } 1316 1317 /// Return the VPValues stored by this interleave group. If it is a load 1318 /// interleave group, return an empty ArrayRef. 1319 ArrayRef<VPValue *> getStoredValues() const { 1320 // The first operand is the address, followed by the stored values, followed 1321 // by an optional mask. 1322 return ArrayRef<VPValue *>(op_begin(), getNumOperands()) 1323 .slice(1, getNumStoreOperands()); 1324 } 1325 1326 /// Generate the wide load or store, and shuffles. 1327 void execute(VPTransformState &State) override; 1328 1329 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1330 /// Print the recipe. 1331 void print(raw_ostream &O, const Twine &Indent, 1332 VPSlotTracker &SlotTracker) const override; 1333 #endif 1334 1335 const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; } 1336 1337 /// Returns the number of stored operands of this interleave group. Returns 0 1338 /// for load interleave groups. 1339 unsigned getNumStoreOperands() const { 1340 return getNumOperands() - (HasMask ? 2 : 1); 1341 } 1342 }; 1343 1344 /// A recipe to represent inloop reduction operations, performing a reduction on 1345 /// a vector operand into a scalar value, and adding the result to a chain. 1346 /// The Operands are {ChainOp, VecOp, [Condition]}. 1347 class VPReductionRecipe : public VPRecipeBase, public VPValue { 1348 /// The recurrence decriptor for the reduction in question. 1349 const RecurrenceDescriptor *RdxDesc; 1350 /// Pointer to the TTI, needed to create the target reduction 1351 const TargetTransformInfo *TTI; 1352 1353 public: 1354 VPReductionRecipe(const RecurrenceDescriptor *R, Instruction *I, 1355 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, 1356 const TargetTransformInfo *TTI) 1357 : VPRecipeBase(VPRecipeBase::VPReductionSC, {ChainOp, VecOp}), 1358 VPValue(VPValue::VPVReductionSC, I, this), RdxDesc(R), TTI(TTI) { 1359 if (CondOp) 1360 addOperand(CondOp); 1361 } 1362 1363 ~VPReductionRecipe() override = default; 1364 1365 /// Method to support type inquiry through isa, cast, and dyn_cast. 1366 static inline bool classof(const VPValue *V) { 1367 return V->getVPValueID() == VPValue::VPVReductionSC; 1368 } 1369 1370 /// Generate the reduction in the loop 1371 void execute(VPTransformState &State) override; 1372 1373 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1374 /// Print the recipe. 1375 void print(raw_ostream &O, const Twine &Indent, 1376 VPSlotTracker &SlotTracker) const override; 1377 #endif 1378 1379 /// The VPValue of the scalar Chain being accumulated. 1380 VPValue *getChainOp() const { return getOperand(0); } 1381 /// The VPValue of the vector value to be reduced. 1382 VPValue *getVecOp() const { return getOperand(1); } 1383 /// The VPValue of the condition for the block. 1384 VPValue *getCondOp() const { 1385 return getNumOperands() > 2 ? getOperand(2) : nullptr; 1386 } 1387 }; 1388 1389 /// VPReplicateRecipe replicates a given instruction producing multiple scalar 1390 /// copies of the original scalar type, one per lane, instead of producing a 1391 /// single copy of widened type for all lanes. If the instruction is known to be 1392 /// uniform only one copy, per lane zero, will be generated. 1393 class VPReplicateRecipe : public VPRecipeBase, public VPValue { 1394 /// Indicator if only a single replica per lane is needed. 1395 bool IsUniform; 1396 1397 /// Indicator if the replicas are also predicated. 1398 bool IsPredicated; 1399 1400 /// Indicator if the scalar values should also be packed into a vector. 1401 bool AlsoPack; 1402 1403 public: 1404 template <typename IterT> 1405 VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands, 1406 bool IsUniform, bool IsPredicated = false) 1407 : VPRecipeBase(VPReplicateSC, Operands), VPValue(VPVReplicateSC, I, this), 1408 IsUniform(IsUniform), IsPredicated(IsPredicated) { 1409 // Retain the previous behavior of predicateInstructions(), where an 1410 // insert-element of a predicated instruction got hoisted into the 1411 // predicated basic block iff it was its only user. This is achieved by 1412 // having predicated instructions also pack their values into a vector by 1413 // default unless they have a replicated user which uses their scalar value. 1414 AlsoPack = IsPredicated && !I->use_empty(); 1415 } 1416 1417 ~VPReplicateRecipe() override = default; 1418 1419 /// Method to support type inquiry through isa, cast, and dyn_cast. 1420 static inline bool classof(const VPDef *D) { 1421 return D->getVPDefID() == VPRecipeBase::VPReplicateSC; 1422 } 1423 1424 static inline bool classof(const VPValue *V) { 1425 return V->getVPValueID() == VPValue::VPVReplicateSC; 1426 } 1427 1428 /// Generate replicas of the desired Ingredient. Replicas will be generated 1429 /// for all parts and lanes unless a specific part and lane are specified in 1430 /// the \p State. 1431 void execute(VPTransformState &State) override; 1432 1433 void setAlsoPack(bool Pack) { AlsoPack = Pack; } 1434 1435 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1436 /// Print the recipe. 1437 void print(raw_ostream &O, const Twine &Indent, 1438 VPSlotTracker &SlotTracker) const override; 1439 #endif 1440 1441 bool isUniform() const { return IsUniform; } 1442 1443 bool isPacked() const { return AlsoPack; } 1444 1445 bool isPredicated() const { return IsPredicated; } 1446 }; 1447 1448 /// A recipe for generating conditional branches on the bits of a mask. 1449 class VPBranchOnMaskRecipe : public VPRecipeBase { 1450 public: 1451 VPBranchOnMaskRecipe(VPValue *BlockInMask) 1452 : VPRecipeBase(VPBranchOnMaskSC, {}) { 1453 if (BlockInMask) // nullptr means all-one mask. 1454 addOperand(BlockInMask); 1455 } 1456 1457 /// Method to support type inquiry through isa, cast, and dyn_cast. 1458 static inline bool classof(const VPDef *D) { 1459 return D->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC; 1460 } 1461 1462 /// Generate the extraction of the appropriate bit from the block mask and the 1463 /// conditional branch. 1464 void execute(VPTransformState &State) override; 1465 1466 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1467 /// Print the recipe. 1468 void print(raw_ostream &O, const Twine &Indent, 1469 VPSlotTracker &SlotTracker) const override { 1470 O << Indent << "BRANCH-ON-MASK "; 1471 if (VPValue *Mask = getMask()) 1472 Mask->printAsOperand(O, SlotTracker); 1473 else 1474 O << " All-One"; 1475 } 1476 #endif 1477 1478 /// Return the mask used by this recipe. Note that a full mask is represented 1479 /// by a nullptr. 1480 VPValue *getMask() const { 1481 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands"); 1482 // Mask is optional. 1483 return getNumOperands() == 1 ? getOperand(0) : nullptr; 1484 } 1485 }; 1486 1487 /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when 1488 /// control converges back from a Branch-on-Mask. The phi nodes are needed in 1489 /// order to merge values that are set under such a branch and feed their uses. 1490 /// The phi nodes can be scalar or vector depending on the users of the value. 1491 /// This recipe works in concert with VPBranchOnMaskRecipe. 1492 class VPPredInstPHIRecipe : public VPRecipeBase, public VPValue { 1493 public: 1494 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi 1495 /// nodes after merging back from a Branch-on-Mask. 1496 VPPredInstPHIRecipe(VPValue *PredV) 1497 : VPRecipeBase(VPPredInstPHISC, PredV), 1498 VPValue(VPValue::VPVPredInstPHI, nullptr, this) {} 1499 ~VPPredInstPHIRecipe() override = default; 1500 1501 /// Method to support type inquiry through isa, cast, and dyn_cast. 1502 static inline bool classof(const VPDef *D) { 1503 return D->getVPDefID() == VPRecipeBase::VPPredInstPHISC; 1504 } 1505 1506 /// Generates phi nodes for live-outs as needed to retain SSA form. 1507 void execute(VPTransformState &State) override; 1508 1509 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1510 /// Print the recipe. 1511 void print(raw_ostream &O, const Twine &Indent, 1512 VPSlotTracker &SlotTracker) const override; 1513 #endif 1514 }; 1515 1516 /// A Recipe for widening load/store operations. 1517 /// The recipe uses the following VPValues: 1518 /// - For load: Address, optional mask 1519 /// - For store: Address, stored value, optional mask 1520 /// TODO: We currently execute only per-part unless a specific instance is 1521 /// provided. 1522 class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPValue { 1523 Instruction &Ingredient; 1524 1525 // Whether the loaded-from / stored-to addresses are consecutive. 1526 bool Consecutive; 1527 1528 // Whether the consecutive loaded/stored addresses are in reverse order. 1529 bool Reverse; 1530 1531 void setMask(VPValue *Mask) { 1532 if (!Mask) 1533 return; 1534 addOperand(Mask); 1535 } 1536 1537 bool isMasked() const { 1538 return isStore() ? getNumOperands() == 3 : getNumOperands() == 2; 1539 } 1540 1541 public: 1542 VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, 1543 bool Consecutive, bool Reverse) 1544 : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), 1545 VPValue(VPValue::VPVMemoryInstructionSC, &Load, this), Ingredient(Load), 1546 Consecutive(Consecutive), Reverse(Reverse) { 1547 assert((Consecutive || !Reverse) && "Reverse implies consecutive"); 1548 setMask(Mask); 1549 } 1550 1551 VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, 1552 VPValue *StoredValue, VPValue *Mask, 1553 bool Consecutive, bool Reverse) 1554 : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}), 1555 VPValue(VPValue::VPVMemoryInstructionSC, &Store, this), 1556 Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) { 1557 assert((Consecutive || !Reverse) && "Reverse implies consecutive"); 1558 setMask(Mask); 1559 } 1560 1561 /// Method to support type inquiry through isa, cast, and dyn_cast. 1562 static inline bool classof(const VPDef *D) { 1563 return D->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC; 1564 } 1565 1566 /// Return the address accessed by this recipe. 1567 VPValue *getAddr() const { 1568 return getOperand(0); // Address is the 1st, mandatory operand. 1569 } 1570 1571 /// Return the mask used by this recipe. Note that a full mask is represented 1572 /// by a nullptr. 1573 VPValue *getMask() const { 1574 // Mask is optional and therefore the last operand. 1575 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr; 1576 } 1577 1578 /// Returns true if this recipe is a store. 1579 bool isStore() const { return isa<StoreInst>(Ingredient); } 1580 1581 /// Return the address accessed by this recipe. 1582 VPValue *getStoredValue() const { 1583 assert(isStore() && "Stored value only available for store instructions"); 1584 return getOperand(1); // Stored value is the 2nd, mandatory operand. 1585 } 1586 1587 // Return whether the loaded-from / stored-to addresses are consecutive. 1588 bool isConsecutive() const { return Consecutive; } 1589 1590 // Return whether the consecutive loaded/stored addresses are in reverse 1591 // order. 1592 bool isReverse() const { return Reverse; } 1593 1594 /// Generate the wide load/store. 1595 void execute(VPTransformState &State) override; 1596 1597 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1598 /// Print the recipe. 1599 void print(raw_ostream &O, const Twine &Indent, 1600 VPSlotTracker &SlotTracker) const override; 1601 #endif 1602 }; 1603 1604 /// A Recipe for widening the canonical induction variable of the vector loop. 1605 class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue { 1606 public: 1607 VPWidenCanonicalIVRecipe() 1608 : VPRecipeBase(VPWidenCanonicalIVSC, {}), 1609 VPValue(VPValue::VPVWidenCanonicalIVSC, nullptr, this) {} 1610 1611 ~VPWidenCanonicalIVRecipe() override = default; 1612 1613 /// Method to support type inquiry through isa, cast, and dyn_cast. 1614 static inline bool classof(const VPDef *D) { 1615 return D->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC; 1616 } 1617 1618 /// Generate a canonical vector induction variable of the vector loop, with 1619 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and 1620 /// step = <VF*UF, VF*UF, ..., VF*UF>. 1621 void execute(VPTransformState &State) override; 1622 1623 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1624 /// Print the recipe. 1625 void print(raw_ostream &O, const Twine &Indent, 1626 VPSlotTracker &SlotTracker) const override; 1627 #endif 1628 }; 1629 1630 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It 1631 /// holds a sequence of zero or more VPRecipe's each representing a sequence of 1632 /// output IR instructions. All PHI-like recipes must come before any non-PHI recipes. 1633 class VPBasicBlock : public VPBlockBase { 1634 public: 1635 using RecipeListTy = iplist<VPRecipeBase>; 1636 1637 private: 1638 /// The VPRecipes held in the order of output instructions to generate. 1639 RecipeListTy Recipes; 1640 1641 public: 1642 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr) 1643 : VPBlockBase(VPBasicBlockSC, Name.str()) { 1644 if (Recipe) 1645 appendRecipe(Recipe); 1646 } 1647 1648 ~VPBasicBlock() override { 1649 while (!Recipes.empty()) 1650 Recipes.pop_back(); 1651 } 1652 1653 /// Instruction iterators... 1654 using iterator = RecipeListTy::iterator; 1655 using const_iterator = RecipeListTy::const_iterator; 1656 using reverse_iterator = RecipeListTy::reverse_iterator; 1657 using const_reverse_iterator = RecipeListTy::const_reverse_iterator; 1658 1659 //===--------------------------------------------------------------------===// 1660 /// Recipe iterator methods 1661 /// 1662 inline iterator begin() { return Recipes.begin(); } 1663 inline const_iterator begin() const { return Recipes.begin(); } 1664 inline iterator end() { return Recipes.end(); } 1665 inline const_iterator end() const { return Recipes.end(); } 1666 1667 inline reverse_iterator rbegin() { return Recipes.rbegin(); } 1668 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); } 1669 inline reverse_iterator rend() { return Recipes.rend(); } 1670 inline const_reverse_iterator rend() const { return Recipes.rend(); } 1671 1672 inline size_t size() const { return Recipes.size(); } 1673 inline bool empty() const { return Recipes.empty(); } 1674 inline const VPRecipeBase &front() const { return Recipes.front(); } 1675 inline VPRecipeBase &front() { return Recipes.front(); } 1676 inline const VPRecipeBase &back() const { return Recipes.back(); } 1677 inline VPRecipeBase &back() { return Recipes.back(); } 1678 1679 /// Returns a reference to the list of recipes. 1680 RecipeListTy &getRecipeList() { return Recipes; } 1681 1682 /// Returns a pointer to a member of the recipe list. 1683 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) { 1684 return &VPBasicBlock::Recipes; 1685 } 1686 1687 /// Method to support type inquiry through isa, cast, and dyn_cast. 1688 static inline bool classof(const VPBlockBase *V) { 1689 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC; 1690 } 1691 1692 void insert(VPRecipeBase *Recipe, iterator InsertPt) { 1693 assert(Recipe && "No recipe to append."); 1694 assert(!Recipe->Parent && "Recipe already in VPlan"); 1695 Recipe->Parent = this; 1696 Recipes.insert(InsertPt, Recipe); 1697 } 1698 1699 /// Augment the existing recipes of a VPBasicBlock with an additional 1700 /// \p Recipe as the last recipe. 1701 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); } 1702 1703 /// The method which generates the output IR instructions that correspond to 1704 /// this VPBasicBlock, thereby "executing" the VPlan. 1705 void execute(struct VPTransformState *State) override; 1706 1707 /// Return the position of the first non-phi node recipe in the block. 1708 iterator getFirstNonPhi(); 1709 1710 /// Returns an iterator range over the PHI-like recipes in the block. 1711 iterator_range<iterator> phis() { 1712 return make_range(begin(), getFirstNonPhi()); 1713 } 1714 1715 void dropAllReferences(VPValue *NewValue) override; 1716 1717 /// Split current block at \p SplitAt by inserting a new block between the 1718 /// current block and its successors and moving all recipes starting at 1719 /// SplitAt to the new block. Returns the new block. 1720 VPBasicBlock *splitAt(iterator SplitAt); 1721 1722 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1723 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p 1724 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers. 1725 /// 1726 /// Note that the numbering is applied to the whole VPlan, so printing 1727 /// individual blocks is consistent with the whole VPlan printing. 1728 void print(raw_ostream &O, const Twine &Indent, 1729 VPSlotTracker &SlotTracker) const override; 1730 using VPBlockBase::print; // Get the print(raw_stream &O) version. 1731 #endif 1732 1733 private: 1734 /// Create an IR BasicBlock to hold the output instructions generated by this 1735 /// VPBasicBlock, and return it. Update the CFGState accordingly. 1736 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG); 1737 }; 1738 1739 /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks 1740 /// which form a Single-Entry-Single-Exit subgraph of the output IR CFG. 1741 /// A VPRegionBlock may indicate that its contents are to be replicated several 1742 /// times. This is designed to support predicated scalarization, in which a 1743 /// scalar if-then code structure needs to be generated VF * UF times. Having 1744 /// this replication indicator helps to keep a single model for multiple 1745 /// candidate VF's. The actual replication takes place only once the desired VF 1746 /// and UF have been determined. 1747 class VPRegionBlock : public VPBlockBase { 1748 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock. 1749 VPBlockBase *Entry; 1750 1751 /// Hold the Single Exit of the SESE region modelled by the VPRegionBlock. 1752 VPBlockBase *Exit; 1753 1754 /// An indicator whether this region is to generate multiple replicated 1755 /// instances of output IR corresponding to its VPBlockBases. 1756 bool IsReplicator; 1757 1758 public: 1759 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exit, 1760 const std::string &Name = "", bool IsReplicator = false) 1761 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exit(Exit), 1762 IsReplicator(IsReplicator) { 1763 assert(Entry->getPredecessors().empty() && "Entry block has predecessors."); 1764 assert(Exit->getSuccessors().empty() && "Exit block has successors."); 1765 Entry->setParent(this); 1766 Exit->setParent(this); 1767 } 1768 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false) 1769 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exit(nullptr), 1770 IsReplicator(IsReplicator) {} 1771 1772 ~VPRegionBlock() override { 1773 if (Entry) { 1774 VPValue DummyValue; 1775 Entry->dropAllReferences(&DummyValue); 1776 deleteCFG(Entry); 1777 } 1778 } 1779 1780 /// Method to support type inquiry through isa, cast, and dyn_cast. 1781 static inline bool classof(const VPBlockBase *V) { 1782 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC; 1783 } 1784 1785 const VPBlockBase *getEntry() const { return Entry; } 1786 VPBlockBase *getEntry() { return Entry; } 1787 1788 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p 1789 /// EntryBlock must have no predecessors. 1790 void setEntry(VPBlockBase *EntryBlock) { 1791 assert(EntryBlock->getPredecessors().empty() && 1792 "Entry block cannot have predecessors."); 1793 Entry = EntryBlock; 1794 EntryBlock->setParent(this); 1795 } 1796 1797 // FIXME: DominatorTreeBase is doing 'A->getParent()->front()'. 'front' is a 1798 // specific interface of llvm::Function, instead of using 1799 // GraphTraints::getEntryNode. We should add a new template parameter to 1800 // DominatorTreeBase representing the Graph type. 1801 VPBlockBase &front() const { return *Entry; } 1802 1803 const VPBlockBase *getExit() const { return Exit; } 1804 VPBlockBase *getExit() { return Exit; } 1805 1806 /// Set \p ExitBlock as the exit VPBlockBase of this VPRegionBlock. \p 1807 /// ExitBlock must have no successors. 1808 void setExit(VPBlockBase *ExitBlock) { 1809 assert(ExitBlock->getSuccessors().empty() && 1810 "Exit block cannot have successors."); 1811 Exit = ExitBlock; 1812 ExitBlock->setParent(this); 1813 } 1814 1815 /// An indicator whether this region is to generate multiple replicated 1816 /// instances of output IR corresponding to its VPBlockBases. 1817 bool isReplicator() const { return IsReplicator; } 1818 1819 /// The method which generates the output IR instructions that correspond to 1820 /// this VPRegionBlock, thereby "executing" the VPlan. 1821 void execute(struct VPTransformState *State) override; 1822 1823 void dropAllReferences(VPValue *NewValue) override; 1824 1825 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1826 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with 1827 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using 1828 /// consequtive numbers. 1829 /// 1830 /// Note that the numbering is applied to the whole VPlan, so printing 1831 /// individual regions is consistent with the whole VPlan printing. 1832 void print(raw_ostream &O, const Twine &Indent, 1833 VPSlotTracker &SlotTracker) const override; 1834 using VPBlockBase::print; // Get the print(raw_stream &O) version. 1835 #endif 1836 }; 1837 1838 //===----------------------------------------------------------------------===// 1839 // GraphTraits specializations for VPlan Hierarchical Control-Flow Graphs // 1840 //===----------------------------------------------------------------------===// 1841 1842 // The following set of template specializations implement GraphTraits to treat 1843 // any VPBlockBase as a node in a graph of VPBlockBases. It's important to note 1844 // that VPBlockBase traits don't recurse into VPRegioBlocks, i.e., if the 1845 // VPBlockBase is a VPRegionBlock, this specialization provides access to its 1846 // successors/predecessors but not to the blocks inside the region. 1847 1848 template <> struct GraphTraits<VPBlockBase *> { 1849 using NodeRef = VPBlockBase *; 1850 using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator; 1851 1852 static NodeRef getEntryNode(NodeRef N) { return N; } 1853 1854 static inline ChildIteratorType child_begin(NodeRef N) { 1855 return N->getSuccessors().begin(); 1856 } 1857 1858 static inline ChildIteratorType child_end(NodeRef N) { 1859 return N->getSuccessors().end(); 1860 } 1861 }; 1862 1863 template <> struct GraphTraits<const VPBlockBase *> { 1864 using NodeRef = const VPBlockBase *; 1865 using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::const_iterator; 1866 1867 static NodeRef getEntryNode(NodeRef N) { return N; } 1868 1869 static inline ChildIteratorType child_begin(NodeRef N) { 1870 return N->getSuccessors().begin(); 1871 } 1872 1873 static inline ChildIteratorType child_end(NodeRef N) { 1874 return N->getSuccessors().end(); 1875 } 1876 }; 1877 1878 // Inverse order specialization for VPBasicBlocks. Predecessors are used instead 1879 // of successors for the inverse traversal. 1880 template <> struct GraphTraits<Inverse<VPBlockBase *>> { 1881 using NodeRef = VPBlockBase *; 1882 using ChildIteratorType = SmallVectorImpl<VPBlockBase *>::iterator; 1883 1884 static NodeRef getEntryNode(Inverse<NodeRef> B) { return B.Graph; } 1885 1886 static inline ChildIteratorType child_begin(NodeRef N) { 1887 return N->getPredecessors().begin(); 1888 } 1889 1890 static inline ChildIteratorType child_end(NodeRef N) { 1891 return N->getPredecessors().end(); 1892 } 1893 }; 1894 1895 // The following set of template specializations implement GraphTraits to 1896 // treat VPRegionBlock as a graph and recurse inside its nodes. It's important 1897 // to note that the blocks inside the VPRegionBlock are treated as VPBlockBases 1898 // (i.e., no dyn_cast is performed, VPBlockBases specialization is used), so 1899 // there won't be automatic recursion into other VPBlockBases that turn to be 1900 // VPRegionBlocks. 1901 1902 template <> 1903 struct GraphTraits<VPRegionBlock *> : public GraphTraits<VPBlockBase *> { 1904 using GraphRef = VPRegionBlock *; 1905 using nodes_iterator = df_iterator<NodeRef>; 1906 1907 static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } 1908 1909 static nodes_iterator nodes_begin(GraphRef N) { 1910 return nodes_iterator::begin(N->getEntry()); 1911 } 1912 1913 static nodes_iterator nodes_end(GraphRef N) { 1914 // df_iterator::end() returns an empty iterator so the node used doesn't 1915 // matter. 1916 return nodes_iterator::end(N); 1917 } 1918 }; 1919 1920 template <> 1921 struct GraphTraits<const VPRegionBlock *> 1922 : public GraphTraits<const VPBlockBase *> { 1923 using GraphRef = const VPRegionBlock *; 1924 using nodes_iterator = df_iterator<NodeRef>; 1925 1926 static NodeRef getEntryNode(GraphRef N) { return N->getEntry(); } 1927 1928 static nodes_iterator nodes_begin(GraphRef N) { 1929 return nodes_iterator::begin(N->getEntry()); 1930 } 1931 1932 static nodes_iterator nodes_end(GraphRef N) { 1933 // df_iterator::end() returns an empty iterator so the node used doesn't 1934 // matter. 1935 return nodes_iterator::end(N); 1936 } 1937 }; 1938 1939 template <> 1940 struct GraphTraits<Inverse<VPRegionBlock *>> 1941 : public GraphTraits<Inverse<VPBlockBase *>> { 1942 using GraphRef = VPRegionBlock *; 1943 using nodes_iterator = df_iterator<NodeRef>; 1944 1945 static NodeRef getEntryNode(Inverse<GraphRef> N) { 1946 return N.Graph->getExit(); 1947 } 1948 1949 static nodes_iterator nodes_begin(GraphRef N) { 1950 return nodes_iterator::begin(N->getExit()); 1951 } 1952 1953 static nodes_iterator nodes_end(GraphRef N) { 1954 // df_iterator::end() returns an empty iterator so the node used doesn't 1955 // matter. 1956 return nodes_iterator::end(N); 1957 } 1958 }; 1959 1960 /// Iterator to traverse all successors of a VPBlockBase node. This includes the 1961 /// entry node of VPRegionBlocks. Exit blocks of a region implicitly have their 1962 /// parent region's successors. This ensures all blocks in a region are visited 1963 /// before any blocks in a successor region when doing a reverse post-order 1964 // traversal of the graph. 1965 template <typename BlockPtrTy> 1966 class VPAllSuccessorsIterator 1967 : public iterator_facade_base<VPAllSuccessorsIterator<BlockPtrTy>, 1968 std::forward_iterator_tag, VPBlockBase> { 1969 BlockPtrTy Block; 1970 /// Index of the current successor. For VPBasicBlock nodes, this simply is the 1971 /// index for the successor array. For VPRegionBlock, SuccessorIdx == 0 is 1972 /// used for the region's entry block, and SuccessorIdx - 1 are the indices 1973 /// for the successor array. 1974 size_t SuccessorIdx; 1975 1976 static BlockPtrTy getBlockWithSuccs(BlockPtrTy Current) { 1977 while (Current && Current->getNumSuccessors() == 0) 1978 Current = Current->getParent(); 1979 return Current; 1980 } 1981 1982 /// Templated helper to dereference successor \p SuccIdx of \p Block. Used by 1983 /// both the const and non-const operator* implementations. 1984 template <typename T1> static T1 deref(T1 Block, unsigned SuccIdx) { 1985 if (auto *R = dyn_cast<VPRegionBlock>(Block)) { 1986 if (SuccIdx == 0) 1987 return R->getEntry(); 1988 SuccIdx--; 1989 } 1990 1991 // For exit blocks, use the next parent region with successors. 1992 return getBlockWithSuccs(Block)->getSuccessors()[SuccIdx]; 1993 } 1994 1995 public: 1996 VPAllSuccessorsIterator(BlockPtrTy Block, size_t Idx = 0) 1997 : Block(Block), SuccessorIdx(Idx) {} 1998 VPAllSuccessorsIterator(const VPAllSuccessorsIterator &Other) 1999 : Block(Other.Block), SuccessorIdx(Other.SuccessorIdx) {} 2000 2001 VPAllSuccessorsIterator &operator=(const VPAllSuccessorsIterator &R) { 2002 Block = R.Block; 2003 SuccessorIdx = R.SuccessorIdx; 2004 return *this; 2005 } 2006 2007 static VPAllSuccessorsIterator end(BlockPtrTy Block) { 2008 BlockPtrTy ParentWithSuccs = getBlockWithSuccs(Block); 2009 unsigned NumSuccessors = ParentWithSuccs 2010 ? ParentWithSuccs->getNumSuccessors() 2011 : Block->getNumSuccessors(); 2012 2013 if (auto *R = dyn_cast<VPRegionBlock>(Block)) 2014 return {R, NumSuccessors + 1}; 2015 return {Block, NumSuccessors}; 2016 } 2017 2018 bool operator==(const VPAllSuccessorsIterator &R) const { 2019 return Block == R.Block && SuccessorIdx == R.SuccessorIdx; 2020 } 2021 2022 const VPBlockBase *operator*() const { return deref(Block, SuccessorIdx); } 2023 2024 BlockPtrTy operator*() { return deref(Block, SuccessorIdx); } 2025 2026 VPAllSuccessorsIterator &operator++() { 2027 SuccessorIdx++; 2028 return *this; 2029 } 2030 2031 VPAllSuccessorsIterator operator++(int X) { 2032 VPAllSuccessorsIterator Orig = *this; 2033 SuccessorIdx++; 2034 return Orig; 2035 } 2036 }; 2037 2038 /// Helper for GraphTraits specialization that traverses through VPRegionBlocks. 2039 template <typename BlockTy> class VPBlockRecursiveTraversalWrapper { 2040 BlockTy Entry; 2041 2042 public: 2043 VPBlockRecursiveTraversalWrapper(BlockTy Entry) : Entry(Entry) {} 2044 BlockTy getEntry() { return Entry; } 2045 }; 2046 2047 /// GraphTraits specialization to recursively traverse VPBlockBase nodes, 2048 /// including traversing through VPRegionBlocks. Exit blocks of a region 2049 /// implicitly have their parent region's successors. This ensures all blocks in 2050 /// a region are visited before any blocks in a successor region when doing a 2051 /// reverse post-order traversal of the graph. 2052 template <> 2053 struct GraphTraits<VPBlockRecursiveTraversalWrapper<VPBlockBase *>> { 2054 using NodeRef = VPBlockBase *; 2055 using ChildIteratorType = VPAllSuccessorsIterator<VPBlockBase *>; 2056 2057 static NodeRef 2058 getEntryNode(VPBlockRecursiveTraversalWrapper<VPBlockBase *> N) { 2059 return N.getEntry(); 2060 } 2061 2062 static inline ChildIteratorType child_begin(NodeRef N) { 2063 return ChildIteratorType(N); 2064 } 2065 2066 static inline ChildIteratorType child_end(NodeRef N) { 2067 return ChildIteratorType::end(N); 2068 } 2069 }; 2070 2071 template <> 2072 struct GraphTraits<VPBlockRecursiveTraversalWrapper<const VPBlockBase *>> { 2073 using NodeRef = const VPBlockBase *; 2074 using ChildIteratorType = VPAllSuccessorsIterator<const VPBlockBase *>; 2075 2076 static NodeRef 2077 getEntryNode(VPBlockRecursiveTraversalWrapper<const VPBlockBase *> N) { 2078 return N.getEntry(); 2079 } 2080 2081 static inline ChildIteratorType child_begin(NodeRef N) { 2082 return ChildIteratorType(N); 2083 } 2084 2085 static inline ChildIteratorType child_end(NodeRef N) { 2086 return ChildIteratorType::end(N); 2087 } 2088 }; 2089 2090 /// VPlan models a candidate for vectorization, encoding various decisions take 2091 /// to produce efficient output IR, including which branches, basic-blocks and 2092 /// output IR instructions to generate, and their cost. VPlan holds a 2093 /// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry 2094 /// VPBlock. 2095 class VPlan { 2096 friend class VPlanPrinter; 2097 friend class VPSlotTracker; 2098 2099 /// Hold the single entry to the Hierarchical CFG of the VPlan. 2100 VPBlockBase *Entry; 2101 2102 /// Holds the VFs applicable to this VPlan. 2103 SmallSetVector<ElementCount, 2> VFs; 2104 2105 /// Holds the name of the VPlan, for printing. 2106 std::string Name; 2107 2108 /// Holds all the external definitions created for this VPlan. 2109 // TODO: Introduce a specific representation for external definitions in 2110 // VPlan. External definitions must be immutable and hold a pointer to its 2111 // underlying IR that will be used to implement its structural comparison 2112 // (operators '==' and '<'). 2113 SetVector<VPValue *> VPExternalDefs; 2114 2115 /// Represents the backedge taken count of the original loop, for folding 2116 /// the tail. 2117 VPValue *BackedgeTakenCount = nullptr; 2118 2119 /// Holds a mapping between Values and their corresponding VPValue inside 2120 /// VPlan. 2121 Value2VPValueTy Value2VPValue; 2122 2123 /// Contains all VPValues that been allocated by addVPValue directly and need 2124 /// to be free when the plan's destructor is called. 2125 SmallVector<VPValue *, 16> VPValuesToFree; 2126 2127 /// Holds the VPLoopInfo analysis for this VPlan. 2128 VPLoopInfo VPLInfo; 2129 2130 /// Indicates whether it is safe use the Value2VPValue mapping or if the 2131 /// mapping cannot be used any longer, because it is stale. 2132 bool Value2VPValueEnabled = true; 2133 2134 public: 2135 VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) { 2136 if (Entry) 2137 Entry->setPlan(this); 2138 } 2139 2140 ~VPlan() { 2141 if (Entry) { 2142 VPValue DummyValue; 2143 for (VPBlockBase *Block : depth_first(Entry)) 2144 Block->dropAllReferences(&DummyValue); 2145 2146 VPBlockBase::deleteCFG(Entry); 2147 } 2148 for (VPValue *VPV : VPValuesToFree) 2149 delete VPV; 2150 if (BackedgeTakenCount) 2151 delete BackedgeTakenCount; 2152 for (VPValue *Def : VPExternalDefs) 2153 delete Def; 2154 } 2155 2156 /// Generate the IR code for this VPlan. 2157 void execute(struct VPTransformState *State); 2158 2159 VPBlockBase *getEntry() { return Entry; } 2160 const VPBlockBase *getEntry() const { return Entry; } 2161 2162 VPBlockBase *setEntry(VPBlockBase *Block) { 2163 Entry = Block; 2164 Block->setPlan(this); 2165 return Entry; 2166 } 2167 2168 /// The backedge taken count of the original loop. 2169 VPValue *getOrCreateBackedgeTakenCount() { 2170 if (!BackedgeTakenCount) 2171 BackedgeTakenCount = new VPValue(); 2172 return BackedgeTakenCount; 2173 } 2174 2175 /// Mark the plan to indicate that using Value2VPValue is not safe any 2176 /// longer, because it may be stale. 2177 void disableValue2VPValue() { Value2VPValueEnabled = false; } 2178 2179 void addVF(ElementCount VF) { VFs.insert(VF); } 2180 2181 bool hasVF(ElementCount VF) { return VFs.count(VF); } 2182 2183 const std::string &getName() const { return Name; } 2184 2185 void setName(const Twine &newName) { Name = newName.str(); } 2186 2187 /// Add \p VPVal to the pool of external definitions if it's not already 2188 /// in the pool. 2189 void addExternalDef(VPValue *VPVal) { VPExternalDefs.insert(VPVal); } 2190 2191 void addVPValue(Value *V) { 2192 assert(Value2VPValueEnabled && 2193 "IR value to VPValue mapping may be out of date!"); 2194 assert(V && "Trying to add a null Value to VPlan"); 2195 assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); 2196 VPValue *VPV = new VPValue(V); 2197 Value2VPValue[V] = VPV; 2198 VPValuesToFree.push_back(VPV); 2199 } 2200 2201 void addVPValue(Value *V, VPValue *VPV) { 2202 assert(Value2VPValueEnabled && "Value2VPValue mapping may be out of date!"); 2203 assert(V && "Trying to add a null Value to VPlan"); 2204 assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); 2205 Value2VPValue[V] = VPV; 2206 } 2207 2208 /// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable 2209 /// checking whether it is safe to query VPValues using IR Values. 2210 VPValue *getVPValue(Value *V, bool OverrideAllowed = false) { 2211 assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) && 2212 "Value2VPValue mapping may be out of date!"); 2213 assert(V && "Trying to get the VPValue of a null Value"); 2214 assert(Value2VPValue.count(V) && "Value does not exist in VPlan"); 2215 return Value2VPValue[V]; 2216 } 2217 2218 /// Gets the VPValue or adds a new one (if none exists yet) for \p V. \p 2219 /// OverrideAllowed can be used to disable checking whether it is safe to 2220 /// query VPValues using IR Values. 2221 VPValue *getOrAddVPValue(Value *V, bool OverrideAllowed = false) { 2222 assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) && 2223 "Value2VPValue mapping may be out of date!"); 2224 assert(V && "Trying to get or add the VPValue of a null Value"); 2225 if (!Value2VPValue.count(V)) 2226 addVPValue(V); 2227 return getVPValue(V); 2228 } 2229 2230 void removeVPValueFor(Value *V) { 2231 assert(Value2VPValueEnabled && 2232 "IR value to VPValue mapping may be out of date!"); 2233 Value2VPValue.erase(V); 2234 } 2235 2236 /// Return the VPLoopInfo analysis for this VPlan. 2237 VPLoopInfo &getVPLoopInfo() { return VPLInfo; } 2238 const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; } 2239 2240 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2241 /// Print this VPlan to \p O. 2242 void print(raw_ostream &O) const; 2243 2244 /// Print this VPlan in DOT format to \p O. 2245 void printDOT(raw_ostream &O) const; 2246 2247 /// Dump the plan to stderr (for debugging). 2248 LLVM_DUMP_METHOD void dump() const; 2249 #endif 2250 2251 /// Returns a range mapping the values the range \p Operands to their 2252 /// corresponding VPValues. 2253 iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>> 2254 mapToVPValues(User::op_range Operands) { 2255 std::function<VPValue *(Value *)> Fn = [this](Value *Op) { 2256 return getOrAddVPValue(Op); 2257 }; 2258 return map_range(Operands, Fn); 2259 } 2260 2261 /// Returns true if \p VPV is uniform after vectorization. 2262 bool isUniformAfterVectorization(VPValue *VPV) const { 2263 auto RepR = dyn_cast_or_null<VPReplicateRecipe>(VPV->getDef()); 2264 return !VPV->getDef() || (RepR && RepR->isUniform()); 2265 } 2266 2267 private: 2268 /// Add to the given dominator tree the header block and every new basic block 2269 /// that was created between it and the latch block, inclusive. 2270 static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopLatchBB, 2271 BasicBlock *LoopPreHeaderBB, 2272 BasicBlock *LoopExitBB); 2273 }; 2274 2275 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2276 /// VPlanPrinter prints a given VPlan to a given output stream. The printing is 2277 /// indented and follows the dot format. 2278 class VPlanPrinter { 2279 raw_ostream &OS; 2280 const VPlan &Plan; 2281 unsigned Depth = 0; 2282 unsigned TabWidth = 2; 2283 std::string Indent; 2284 unsigned BID = 0; 2285 SmallDenseMap<const VPBlockBase *, unsigned> BlockID; 2286 2287 VPSlotTracker SlotTracker; 2288 2289 /// Handle indentation. 2290 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); } 2291 2292 /// Print a given \p Block of the Plan. 2293 void dumpBlock(const VPBlockBase *Block); 2294 2295 /// Print the information related to the CFG edges going out of a given 2296 /// \p Block, followed by printing the successor blocks themselves. 2297 void dumpEdges(const VPBlockBase *Block); 2298 2299 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing 2300 /// its successor blocks. 2301 void dumpBasicBlock(const VPBasicBlock *BasicBlock); 2302 2303 /// Print a given \p Region of the Plan. 2304 void dumpRegion(const VPRegionBlock *Region); 2305 2306 unsigned getOrCreateBID(const VPBlockBase *Block) { 2307 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++; 2308 } 2309 2310 Twine getOrCreateName(const VPBlockBase *Block); 2311 2312 Twine getUID(const VPBlockBase *Block); 2313 2314 /// Print the information related to a CFG edge between two VPBlockBases. 2315 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden, 2316 const Twine &Label); 2317 2318 public: 2319 VPlanPrinter(raw_ostream &O, const VPlan &P) 2320 : OS(O), Plan(P), SlotTracker(&P) {} 2321 2322 LLVM_DUMP_METHOD void dump(); 2323 }; 2324 2325 struct VPlanIngredient { 2326 const Value *V; 2327 2328 VPlanIngredient(const Value *V) : V(V) {} 2329 2330 void print(raw_ostream &O) const; 2331 }; 2332 2333 inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) { 2334 I.print(OS); 2335 return OS; 2336 } 2337 2338 inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) { 2339 Plan.print(OS); 2340 return OS; 2341 } 2342 #endif 2343 2344 //===----------------------------------------------------------------------===// 2345 // VPlan Utilities 2346 //===----------------------------------------------------------------------===// 2347 2348 /// Class that provides utilities for VPBlockBases in VPlan. 2349 class VPBlockUtils { 2350 public: 2351 VPBlockUtils() = delete; 2352 2353 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p 2354 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p 2355 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's 2356 /// successors are moved from \p BlockPtr to \p NewBlock and \p BlockPtr's 2357 /// conditional bit is propagated to \p NewBlock. \p NewBlock must have 2358 /// neither successors nor predecessors. 2359 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { 2360 assert(NewBlock->getSuccessors().empty() && 2361 NewBlock->getPredecessors().empty() && 2362 "Can't insert new block with predecessors or successors."); 2363 NewBlock->setParent(BlockPtr->getParent()); 2364 SmallVector<VPBlockBase *> Succs(BlockPtr->successors()); 2365 for (VPBlockBase *Succ : Succs) { 2366 disconnectBlocks(BlockPtr, Succ); 2367 connectBlocks(NewBlock, Succ); 2368 } 2369 NewBlock->setCondBit(BlockPtr->getCondBit()); 2370 BlockPtr->setCondBit(nullptr); 2371 connectBlocks(BlockPtr, NewBlock); 2372 } 2373 2374 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p 2375 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p 2376 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr 2377 /// parent to \p IfTrue and \p IfFalse. \p Condition is set as the successor 2378 /// selector. \p BlockPtr must have no successors and \p IfTrue and \p IfFalse 2379 /// must have neither successors nor predecessors. 2380 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, 2381 VPValue *Condition, VPBlockBase *BlockPtr) { 2382 assert(IfTrue->getSuccessors().empty() && 2383 "Can't insert IfTrue with successors."); 2384 assert(IfFalse->getSuccessors().empty() && 2385 "Can't insert IfFalse with successors."); 2386 BlockPtr->setTwoSuccessors(IfTrue, IfFalse, Condition); 2387 IfTrue->setPredecessors({BlockPtr}); 2388 IfFalse->setPredecessors({BlockPtr}); 2389 IfTrue->setParent(BlockPtr->getParent()); 2390 IfFalse->setParent(BlockPtr->getParent()); 2391 } 2392 2393 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to 2394 /// the successors of \p From and \p From to the predecessors of \p To. Both 2395 /// VPBlockBases must have the same parent, which can be null. Both 2396 /// VPBlockBases can be already connected to other VPBlockBases. 2397 static void connectBlocks(VPBlockBase *From, VPBlockBase *To) { 2398 assert((From->getParent() == To->getParent()) && 2399 "Can't connect two block with different parents"); 2400 assert(From->getNumSuccessors() < 2 && 2401 "Blocks can't have more than two successors."); 2402 From->appendSuccessor(To); 2403 To->appendPredecessor(From); 2404 } 2405 2406 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To 2407 /// from the successors of \p From and \p From from the predecessors of \p To. 2408 static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) { 2409 assert(To && "Successor to disconnect is null."); 2410 From->removeSuccessor(To); 2411 To->removePredecessor(From); 2412 } 2413 2414 /// Try to merge \p Block into its single predecessor, if \p Block is a 2415 /// VPBasicBlock and its predecessor has a single successor. Returns a pointer 2416 /// to the predecessor \p Block was merged into or nullptr otherwise. 2417 static VPBasicBlock *tryToMergeBlockIntoPredecessor(VPBlockBase *Block) { 2418 auto *VPBB = dyn_cast<VPBasicBlock>(Block); 2419 auto *PredVPBB = 2420 dyn_cast_or_null<VPBasicBlock>(Block->getSinglePredecessor()); 2421 if (!VPBB || !PredVPBB || PredVPBB->getNumSuccessors() != 1) 2422 return nullptr; 2423 2424 for (VPRecipeBase &R : make_early_inc_range(*VPBB)) 2425 R.moveBefore(*PredVPBB, PredVPBB->end()); 2426 VPBlockUtils::disconnectBlocks(PredVPBB, VPBB); 2427 auto *ParentRegion = cast<VPRegionBlock>(Block->getParent()); 2428 if (ParentRegion->getExit() == Block) 2429 ParentRegion->setExit(PredVPBB); 2430 SmallVector<VPBlockBase *> Successors(Block->successors()); 2431 for (auto *Succ : Successors) { 2432 VPBlockUtils::disconnectBlocks(Block, Succ); 2433 VPBlockUtils::connectBlocks(PredVPBB, Succ); 2434 } 2435 delete Block; 2436 return PredVPBB; 2437 } 2438 2439 /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge. 2440 static bool isBackEdge(const VPBlockBase *FromBlock, 2441 const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) { 2442 assert(FromBlock->getParent() == ToBlock->getParent() && 2443 FromBlock->getParent() && "Must be in same region"); 2444 const VPLoop *FromLoop = VPLI->getLoopFor(FromBlock); 2445 const VPLoop *ToLoop = VPLI->getLoopFor(ToBlock); 2446 if (!FromLoop || !ToLoop || FromLoop != ToLoop) 2447 return false; 2448 2449 // A back-edge is a branch from the loop latch to its header. 2450 return ToLoop->isLoopLatch(FromBlock) && ToBlock == ToLoop->getHeader(); 2451 } 2452 2453 /// Returns true if \p Block is a loop latch 2454 static bool blockIsLoopLatch(const VPBlockBase *Block, 2455 const VPLoopInfo *VPLInfo) { 2456 if (const VPLoop *ParentVPL = VPLInfo->getLoopFor(Block)) 2457 return ParentVPL->isLoopLatch(Block); 2458 2459 return false; 2460 } 2461 2462 /// Count and return the number of succesors of \p PredBlock excluding any 2463 /// backedges. 2464 static unsigned countSuccessorsNoBE(VPBlockBase *PredBlock, 2465 VPLoopInfo *VPLI) { 2466 unsigned Count = 0; 2467 for (VPBlockBase *SuccBlock : PredBlock->getSuccessors()) { 2468 if (!VPBlockUtils::isBackEdge(PredBlock, SuccBlock, VPLI)) 2469 Count++; 2470 } 2471 return Count; 2472 } 2473 2474 /// Return an iterator range over \p Range which only includes \p BlockTy 2475 /// blocks. The accesses are casted to \p BlockTy. 2476 template <typename BlockTy, typename T> 2477 static auto blocksOnly(const T &Range) { 2478 // Create BaseTy with correct const-ness based on BlockTy. 2479 using BaseTy = 2480 typename std::conditional<std::is_const<BlockTy>::value, 2481 const VPBlockBase, VPBlockBase>::type; 2482 2483 // We need to first create an iterator range over (const) BlocktTy & instead 2484 // of (const) BlockTy * for filter_range to work properly. 2485 auto Mapped = 2486 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; }); 2487 auto Filter = make_filter_range( 2488 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); }); 2489 return map_range(Filter, [](BaseTy &Block) -> BlockTy * { 2490 return cast<BlockTy>(&Block); 2491 }); 2492 } 2493 }; 2494 2495 class VPInterleavedAccessInfo { 2496 DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *> 2497 InterleaveGroupMap; 2498 2499 /// Type for mapping of instruction based interleave groups to VPInstruction 2500 /// interleave groups 2501 using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *, 2502 InterleaveGroup<VPInstruction> *>; 2503 2504 /// Recursively \p Region and populate VPlan based interleave groups based on 2505 /// \p IAI. 2506 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New, 2507 InterleavedAccessInfo &IAI); 2508 /// Recursively traverse \p Block and populate VPlan based interleave groups 2509 /// based on \p IAI. 2510 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, 2511 InterleavedAccessInfo &IAI); 2512 2513 public: 2514 VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI); 2515 2516 ~VPInterleavedAccessInfo() { 2517 SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet; 2518 // Avoid releasing a pointer twice. 2519 for (auto &I : InterleaveGroupMap) 2520 DelSet.insert(I.second); 2521 for (auto *Ptr : DelSet) 2522 delete Ptr; 2523 } 2524 2525 /// Get the interleave group that \p Instr belongs to. 2526 /// 2527 /// \returns nullptr if doesn't have such group. 2528 InterleaveGroup<VPInstruction> * 2529 getInterleaveGroup(VPInstruction *Instr) const { 2530 return InterleaveGroupMap.lookup(Instr); 2531 } 2532 }; 2533 2534 /// Class that maps (parts of) an existing VPlan to trees of combined 2535 /// VPInstructions. 2536 class VPlanSlp { 2537 enum class OpMode { Failed, Load, Opcode }; 2538 2539 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as 2540 /// DenseMap keys. 2541 struct BundleDenseMapInfo { 2542 static SmallVector<VPValue *, 4> getEmptyKey() { 2543 return {reinterpret_cast<VPValue *>(-1)}; 2544 } 2545 2546 static SmallVector<VPValue *, 4> getTombstoneKey() { 2547 return {reinterpret_cast<VPValue *>(-2)}; 2548 } 2549 2550 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) { 2551 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end())); 2552 } 2553 2554 static bool isEqual(const SmallVector<VPValue *, 4> &LHS, 2555 const SmallVector<VPValue *, 4> &RHS) { 2556 return LHS == RHS; 2557 } 2558 }; 2559 2560 /// Mapping of values in the original VPlan to a combined VPInstruction. 2561 DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo> 2562 BundleToCombined; 2563 2564 VPInterleavedAccessInfo &IAI; 2565 2566 /// Basic block to operate on. For now, only instructions in a single BB are 2567 /// considered. 2568 const VPBasicBlock &BB; 2569 2570 /// Indicates whether we managed to combine all visited instructions or not. 2571 bool CompletelySLP = true; 2572 2573 /// Width of the widest combined bundle in bits. 2574 unsigned WidestBundleBits = 0; 2575 2576 using MultiNodeOpTy = 2577 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>; 2578 2579 // Input operand bundles for the current multi node. Each multi node operand 2580 // bundle contains values not matching the multi node's opcode. They will 2581 // be reordered in reorderMultiNodeOps, once we completed building a 2582 // multi node. 2583 SmallVector<MultiNodeOpTy, 4> MultiNodeOps; 2584 2585 /// Indicates whether we are building a multi node currently. 2586 bool MultiNodeActive = false; 2587 2588 /// Check if we can vectorize Operands together. 2589 bool areVectorizable(ArrayRef<VPValue *> Operands) const; 2590 2591 /// Add combined instruction \p New for the bundle \p Operands. 2592 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New); 2593 2594 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now. 2595 VPInstruction *markFailed(); 2596 2597 /// Reorder operands in the multi node to maximize sequential memory access 2598 /// and commutative operations. 2599 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps(); 2600 2601 /// Choose the best candidate to use for the lane after \p Last. The set of 2602 /// candidates to choose from are values with an opcode matching \p Last's 2603 /// or loads consecutive to \p Last. 2604 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last, 2605 SmallPtrSetImpl<VPValue *> &Candidates, 2606 VPInterleavedAccessInfo &IAI); 2607 2608 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2609 /// Print bundle \p Values to dbgs(). 2610 void dumpBundle(ArrayRef<VPValue *> Values); 2611 #endif 2612 2613 public: 2614 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {} 2615 2616 ~VPlanSlp() = default; 2617 2618 /// Tries to build an SLP tree rooted at \p Operands and returns a 2619 /// VPInstruction combining \p Operands, if they can be combined. 2620 VPInstruction *buildGraph(ArrayRef<VPValue *> Operands); 2621 2622 /// Return the width of the widest combined bundle in bits. 2623 unsigned getWidestBundleBits() const { return WidestBundleBits; } 2624 2625 /// Return true if all visited instruction can be combined. 2626 bool isCompletelySLP() const { return CompletelySLP; } 2627 }; 2628 } // end namespace llvm 2629 2630 #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H 2631