1 //===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner. 10 // 11 // Software pipelining (SWP) is an instruction scheduling technique for loops 12 // that overlap loop iterations and exploits ILP via a compiler transformation. 13 // 14 // Swing Modulo Scheduling is an implementation of software pipelining 15 // that generates schedules that are near optimal in terms of initiation 16 // interval, register requirements, and stage count. See the papers: 17 // 18 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa, 19 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996 20 // Conference on Parallel Architectures and Compilation Techiniques. 21 // 22 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J. 23 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE 24 // Transactions on Computers, Vol. 50, No. 3, 2001. 25 // 26 // "An Implementation of Swing Modulo Scheduling With Extensions for 27 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at 28 // Urbana-Champaign, 2005. 29 // 30 // 31 // The SMS algorithm consists of three main steps after computing the minimal 32 // initiation interval (MII). 33 // 1) Analyze the dependence graph and compute information about each 34 // instruction in the graph. 35 // 2) Order the nodes (instructions) by priority based upon the heuristics 36 // described in the algorithm. 37 // 3) Attempt to schedule the nodes in the specified order using the MII. 38 // 39 //===----------------------------------------------------------------------===// 40 #ifndef LLVM_CODEGEN_MACHINEPIPELINER_H 41 #define LLVM_CODEGEN_MACHINEPIPELINER_H 42 43 #include "llvm/ADT/STLExtras.h" 44 #include "llvm/ADT/SetVector.h" 45 #include "llvm/CodeGen/DFAPacketizer.h" 46 #include "llvm/CodeGen/MachineDominators.h" 47 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" 48 #include "llvm/CodeGen/MachineScheduler.h" 49 #include "llvm/CodeGen/RegisterClassInfo.h" 50 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 51 #include "llvm/CodeGen/ScheduleDAGMutation.h" 52 #include "llvm/CodeGen/TargetInstrInfo.h" 53 #include "llvm/CodeGen/WindowScheduler.h" 54 #include "llvm/InitializePasses.h" 55 56 #include <deque> 57 58 namespace llvm { 59 60 class AAResults; 61 class NodeSet; 62 class SMSchedule; 63 64 extern cl::opt<bool> SwpEnableCopyToPhi; 65 extern cl::opt<int> SwpForceIssueWidth; 66 67 /// The main class in the implementation of the target independent 68 /// software pipeliner pass. 69 class MachinePipeliner : public MachineFunctionPass { 70 public: 71 MachineFunction *MF = nullptr; 72 MachineOptimizationRemarkEmitter *ORE = nullptr; 73 const MachineLoopInfo *MLI = nullptr; 74 const MachineDominatorTree *MDT = nullptr; 75 const InstrItineraryData *InstrItins = nullptr; 76 const TargetInstrInfo *TII = nullptr; 77 RegisterClassInfo RegClassInfo; 78 bool disabledByPragma = false; 79 unsigned II_setByPragma = 0; 80 81 #ifndef NDEBUG 82 static int NumTries; 83 #endif 84 85 /// Cache the target analysis information about the loop. 86 struct LoopInfo { 87 MachineBasicBlock *TBB = nullptr; 88 MachineBasicBlock *FBB = nullptr; 89 SmallVector<MachineOperand, 4> BrCond; 90 MachineInstr *LoopInductionVar = nullptr; 91 MachineInstr *LoopCompare = nullptr; 92 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo = 93 nullptr; 94 }; 95 LoopInfo LI; 96 97 static char ID; 98 99 MachinePipeliner() : MachineFunctionPass(ID) { 100 initializeMachinePipelinerPass(*PassRegistry::getPassRegistry()); 101 } 102 103 bool runOnMachineFunction(MachineFunction &MF) override; 104 105 void getAnalysisUsage(AnalysisUsage &AU) const override; 106 107 private: 108 void preprocessPhiNodes(MachineBasicBlock &B); 109 bool canPipelineLoop(MachineLoop &L); 110 bool scheduleLoop(MachineLoop &L); 111 bool swingModuloScheduler(MachineLoop &L); 112 void setPragmaPipelineOptions(MachineLoop &L); 113 bool runWindowScheduler(MachineLoop &L); 114 bool useSwingModuloScheduler(); 115 bool useWindowScheduler(bool Changed); 116 }; 117 118 /// Represents a dependence between two instruction. 119 class SwingSchedulerDDGEdge { 120 SUnit *Dst = nullptr; 121 SDep Pred; 122 unsigned Distance = 0; 123 124 public: 125 /// Creates an edge corresponding to an edge represented by \p PredOrSucc and 126 /// \p Dep in the original DAG. This pair has no information about the 127 /// direction of the edge, so we need to pass an additional argument \p 128 /// IsSucc. 129 SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc) 130 : Dst(PredOrSucc), Pred(Dep), Distance(0u) { 131 SUnit *Src = Dep.getSUnit(); 132 133 if (IsSucc) { 134 std::swap(Src, Dst); 135 Pred.setSUnit(Src); 136 } 137 138 // An anti-dependence to PHI means loop-carried dependence. 139 if (Pred.getKind() == SDep::Anti && Src->getInstr()->isPHI()) { 140 Distance = 1; 141 std::swap(Src, Dst); 142 auto Reg = Pred.getReg(); 143 Pred = SDep(Src, SDep::Kind::Data, Reg); 144 } 145 } 146 147 /// Returns the SUnit from which the edge comes (source node). 148 SUnit *getSrc() const { return Pred.getSUnit(); } 149 150 /// Returns the SUnit to which the edge points (destination node). 151 SUnit *getDst() const { return Dst; } 152 153 /// Returns the latency value for the edge. 154 unsigned getLatency() const { return Pred.getLatency(); } 155 156 /// Sets the latency for the edge. 157 void setLatency(unsigned Latency) { Pred.setLatency(Latency); } 158 159 /// Returns the distance value for the edge. 160 unsigned getDistance() const { return Distance; } 161 162 /// Sets the distance value for the edge. 163 void setDistance(unsigned D) { Distance = D; } 164 165 /// Returns the register associated with the edge. 166 Register getReg() const { return Pred.getReg(); } 167 168 /// Returns true if the edge represents anti dependence. 169 bool isAntiDep() const { return Pred.getKind() == SDep::Kind::Anti; } 170 171 /// Returns true if the edge represents output dependence. 172 bool isOutputDep() const { return Pred.getKind() == SDep::Kind::Output; } 173 174 /// Returns true if the edge represents a dependence that is not data, anti or 175 /// output dependence. 176 bool isOrderDep() const { return Pred.getKind() == SDep::Kind::Order; } 177 178 /// Returns true if the edge represents unknown scheduling barrier. 179 bool isBarrier() const { return Pred.isBarrier(); } 180 181 /// Returns true if the edge represents an artificial dependence. 182 bool isArtificial() const { return Pred.isArtificial(); } 183 184 /// Tests if this is a Data dependence that is associated with a register. 185 bool isAssignedRegDep() const { return Pred.isAssignedRegDep(); } 186 187 /// Returns true for DDG nodes that we ignore when computing the cost 188 /// functions. We ignore the back-edge recurrence in order to avoid unbounded 189 /// recursion in the calculation of the ASAP, ALAP, etc functions. 190 bool ignoreDependence(bool IgnoreAnti) const; 191 }; 192 193 /// Represents dependencies between instructions. This class is a wrapper of 194 /// `SUnits` and its dependencies to manipulate back-edges in a natural way. 195 /// Currently it only supports back-edges via PHI, which are expressed as 196 /// anti-dependencies in the original DAG. 197 /// FIXME: Support any other loop-carried dependencies 198 class SwingSchedulerDDG { 199 using EdgesType = SmallVector<SwingSchedulerDDGEdge, 4>; 200 201 struct SwingSchedulerDDGEdges { 202 EdgesType Preds; 203 EdgesType Succs; 204 }; 205 206 void initEdges(SUnit *SU); 207 208 SUnit *EntrySU; 209 SUnit *ExitSU; 210 211 std::vector<SwingSchedulerDDGEdges> EdgesVec; 212 SwingSchedulerDDGEdges EntrySUEdges; 213 SwingSchedulerDDGEdges ExitSUEdges; 214 215 void addEdge(const SUnit *SU, const SwingSchedulerDDGEdge &Edge); 216 217 SwingSchedulerDDGEdges &getEdges(const SUnit *SU); 218 const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const; 219 220 public: 221 SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU); 222 223 const EdgesType &getInEdges(const SUnit *SU) const; 224 225 const EdgesType &getOutEdges(const SUnit *SU) const; 226 }; 227 228 /// This class builds the dependence graph for the instructions in a loop, 229 /// and attempts to schedule the instructions using the SMS algorithm. 230 class SwingSchedulerDAG : public ScheduleDAGInstrs { 231 MachinePipeliner &Pass; 232 233 std::unique_ptr<SwingSchedulerDDG> DDG; 234 235 /// The minimum initiation interval between iterations for this schedule. 236 unsigned MII = 0; 237 /// The maximum initiation interval between iterations for this schedule. 238 unsigned MAX_II = 0; 239 /// Set to true if a valid pipelined schedule is found for the loop. 240 bool Scheduled = false; 241 MachineLoop &Loop; 242 LiveIntervals &LIS; 243 const RegisterClassInfo &RegClassInfo; 244 unsigned II_setByPragma = 0; 245 TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr; 246 247 /// A topological ordering of the SUnits, which is needed for changing 248 /// dependences and iterating over the SUnits. 249 ScheduleDAGTopologicalSort Topo; 250 251 struct NodeInfo { 252 int ASAP = 0; 253 int ALAP = 0; 254 int ZeroLatencyDepth = 0; 255 int ZeroLatencyHeight = 0; 256 257 NodeInfo() = default; 258 }; 259 /// Computed properties for each node in the graph. 260 std::vector<NodeInfo> ScheduleInfo; 261 262 enum OrderKind { BottomUp = 0, TopDown = 1 }; 263 /// Computed node ordering for scheduling. 264 SetVector<SUnit *> NodeOrder; 265 266 using NodeSetType = SmallVector<NodeSet, 8>; 267 using ValueMapTy = DenseMap<unsigned, unsigned>; 268 using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>; 269 using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>; 270 271 /// Instructions to change when emitting the final schedule. 272 DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges; 273 274 /// We may create a new instruction, so remember it because it 275 /// must be deleted when the pass is finished. 276 DenseMap<MachineInstr*, MachineInstr *> NewMIs; 277 278 /// Ordered list of DAG postprocessing steps. 279 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; 280 281 /// Helper class to implement Johnson's circuit finding algorithm. 282 class Circuits { 283 std::vector<SUnit> &SUnits; 284 SetVector<SUnit *> Stack; 285 BitVector Blocked; 286 SmallVector<SmallPtrSet<SUnit *, 4>, 10> B; 287 SmallVector<SmallVector<int, 4>, 16> AdjK; 288 // Node to Index from ScheduleDAGTopologicalSort 289 std::vector<int> *Node2Idx; 290 unsigned NumPaths = 0u; 291 static unsigned MaxPaths; 292 293 public: 294 Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo) 295 : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) { 296 Node2Idx = new std::vector<int>(SUs.size()); 297 unsigned Idx = 0; 298 for (const auto &NodeNum : Topo) 299 Node2Idx->at(NodeNum) = Idx++; 300 } 301 Circuits &operator=(const Circuits &other) = delete; 302 Circuits(const Circuits &other) = delete; 303 ~Circuits() { delete Node2Idx; } 304 305 /// Reset the data structures used in the circuit algorithm. 306 void reset() { 307 Stack.clear(); 308 Blocked.reset(); 309 B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>()); 310 NumPaths = 0; 311 } 312 313 void createAdjacencyStructure(SwingSchedulerDAG *DAG); 314 bool circuit(int V, int S, NodeSetType &NodeSets, 315 const SwingSchedulerDAG *DAG, bool HasBackedge = false); 316 void unblock(int U); 317 }; 318 319 struct CopyToPhiMutation : public ScheduleDAGMutation { 320 void apply(ScheduleDAGInstrs *DAG) override; 321 }; 322 323 public: 324 SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, 325 const RegisterClassInfo &rci, unsigned II, 326 TargetInstrInfo::PipelinerLoopInfo *PLI) 327 : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis), 328 RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI), 329 Topo(SUnits, &ExitSU) { 330 P.MF->getSubtarget().getSMSMutations(Mutations); 331 if (SwpEnableCopyToPhi) 332 Mutations.push_back(std::make_unique<CopyToPhiMutation>()); 333 } 334 335 void schedule() override; 336 void finishBlock() override; 337 338 /// Return true if the loop kernel has been scheduled. 339 bool hasNewSchedule() { return Scheduled; } 340 341 /// Return the earliest time an instruction may be scheduled. 342 int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; } 343 344 /// Return the latest time an instruction my be scheduled. 345 int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; } 346 347 /// The mobility function, which the number of slots in which 348 /// an instruction may be scheduled. 349 int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); } 350 351 /// The depth, in the dependence graph, for a node. 352 unsigned getDepth(SUnit *Node) { return Node->getDepth(); } 353 354 /// The maximum unweighted length of a path from an arbitrary node to the 355 /// given node in which each edge has latency 0 356 int getZeroLatencyDepth(SUnit *Node) { 357 return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth; 358 } 359 360 /// The height, in the dependence graph, for a node. 361 unsigned getHeight(SUnit *Node) { return Node->getHeight(); } 362 363 /// The maximum unweighted length of a path from the given node to an 364 /// arbitrary node in which each edge has latency 0 365 int getZeroLatencyHeight(SUnit *Node) { 366 return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight; 367 } 368 369 bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const; 370 371 void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule); 372 373 void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs); 374 375 /// Return the new base register that was stored away for the changed 376 /// instruction. 377 unsigned getInstrBaseReg(SUnit *SU) const { 378 DenseMap<SUnit *, std::pair<unsigned, int64_t>>::const_iterator It = 379 InstrChanges.find(SU); 380 if (It != InstrChanges.end()) 381 return It->second.first; 382 return 0; 383 } 384 385 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { 386 Mutations.push_back(std::move(Mutation)); 387 } 388 389 static bool classof(const ScheduleDAGInstrs *DAG) { return true; } 390 391 const SwingSchedulerDDG *getDDG() const { return DDG.get(); } 392 393 private: 394 void addLoopCarriedDependences(AAResults *AA); 395 void updatePhiDependences(); 396 void changeDependences(); 397 unsigned calculateResMII(); 398 unsigned calculateRecMII(NodeSetType &RecNodeSets); 399 void findCircuits(NodeSetType &NodeSets); 400 void fuseRecs(NodeSetType &NodeSets); 401 void removeDuplicateNodes(NodeSetType &NodeSets); 402 void computeNodeFunctions(NodeSetType &NodeSets); 403 void registerPressureFilter(NodeSetType &NodeSets); 404 void colocateNodeSets(NodeSetType &NodeSets); 405 void checkNodeSets(NodeSetType &NodeSets); 406 void groupRemainingNodes(NodeSetType &NodeSets); 407 void addConnectedNodes(SUnit *SU, NodeSet &NewSet, 408 SetVector<SUnit *> &NodesAdded); 409 void computeNodeOrder(NodeSetType &NodeSets); 410 void checkValidNodeOrder(const NodeSetType &Circuits) const; 411 bool schedulePipeline(SMSchedule &Schedule); 412 bool computeDelta(MachineInstr &MI, unsigned &Delta) const; 413 MachineInstr *findDefInLoop(Register Reg); 414 bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos, 415 unsigned &OffsetPos, unsigned &NewBase, 416 int64_t &NewOffset); 417 void postProcessDAG(); 418 /// Set the Minimum Initiation Interval for this schedule attempt. 419 void setMII(unsigned ResMII, unsigned RecMII); 420 /// Set the Maximum Initiation Interval for this schedule attempt. 421 void setMAX_II(); 422 }; 423 424 /// A NodeSet contains a set of SUnit DAG nodes with additional information 425 /// that assigns a priority to the set. 426 class NodeSet { 427 SetVector<SUnit *> Nodes; 428 bool HasRecurrence = false; 429 unsigned RecMII = 0; 430 int MaxMOV = 0; 431 unsigned MaxDepth = 0; 432 unsigned Colocate = 0; 433 SUnit *ExceedPressure = nullptr; 434 unsigned Latency = 0; 435 436 public: 437 using iterator = SetVector<SUnit *>::const_iterator; 438 439 NodeSet() = default; 440 NodeSet(iterator S, iterator E, const SwingSchedulerDAG *DAG) 441 : Nodes(S, E), HasRecurrence(true) { 442 // Calculate the latency of this node set. 443 // Example to demonstrate the calculation: 444 // Given: N0 -> N1 -> N2 -> N0 445 // Edges: 446 // (N0 -> N1, 3) 447 // (N0 -> N1, 5) 448 // (N1 -> N2, 2) 449 // (N2 -> N0, 1) 450 // The total latency which is a lower bound of the recurrence MII is the 451 // longest path from N0 back to N0 given only the edges of this node set. 452 // In this example, the latency is: 5 + 2 + 1 = 8. 453 // 454 // Hold a map from each SUnit in the circle to the maximum distance from the 455 // source node by only considering the nodes. 456 const SwingSchedulerDDG *DDG = DAG->getDDG(); 457 DenseMap<SUnit *, unsigned> SUnitToDistance; 458 for (auto *Node : Nodes) 459 SUnitToDistance[Node] = 0; 460 461 for (unsigned I = 1, E = Nodes.size(); I <= E; ++I) { 462 SUnit *U = Nodes[I - 1]; 463 SUnit *V = Nodes[I % Nodes.size()]; 464 for (const SwingSchedulerDDGEdge &Succ : DDG->getOutEdges(U)) { 465 SUnit *SuccSUnit = Succ.getDst(); 466 if (V != SuccSUnit) 467 continue; 468 if (SUnitToDistance[U] + Succ.getLatency() > SUnitToDistance[V]) { 469 SUnitToDistance[V] = SUnitToDistance[U] + Succ.getLatency(); 470 } 471 } 472 } 473 // Handle a back-edge in loop carried dependencies 474 SUnit *FirstNode = Nodes[0]; 475 SUnit *LastNode = Nodes[Nodes.size() - 1]; 476 477 for (auto &PI : DDG->getInEdges(LastNode)) { 478 // If we have an order dep that is potentially loop carried then a 479 // back-edge exists between the last node and the first node that isn't 480 // modeled in the DAG. Handle it manually by adding 1 to the distance of 481 // the last node. 482 if (PI.getSrc() != FirstNode || !PI.isOrderDep() || 483 !DAG->isLoopCarriedDep(PI)) 484 continue; 485 SUnitToDistance[FirstNode] = 486 std::max(SUnitToDistance[FirstNode], SUnitToDistance[LastNode] + 1); 487 } 488 489 // The latency is the distance from the source node to itself. 490 Latency = SUnitToDistance[Nodes.front()]; 491 } 492 493 bool insert(SUnit *SU) { return Nodes.insert(SU); } 494 495 void insert(iterator S, iterator E) { Nodes.insert(S, E); } 496 497 template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) { 498 return Nodes.remove_if(P); 499 } 500 501 unsigned count(SUnit *SU) const { return Nodes.count(SU); } 502 503 bool hasRecurrence() { return HasRecurrence; }; 504 505 unsigned size() const { return Nodes.size(); } 506 507 bool empty() const { return Nodes.empty(); } 508 509 SUnit *getNode(unsigned i) const { return Nodes[i]; }; 510 511 void setRecMII(unsigned mii) { RecMII = mii; }; 512 513 void setColocate(unsigned c) { Colocate = c; }; 514 515 void setExceedPressure(SUnit *SU) { ExceedPressure = SU; } 516 517 bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; } 518 519 int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; } 520 521 int getRecMII() { return RecMII; } 522 523 /// Summarize node functions for the entire node set. 524 void computeNodeSetInfo(SwingSchedulerDAG *SSD) { 525 for (SUnit *SU : *this) { 526 MaxMOV = std::max(MaxMOV, SSD->getMOV(SU)); 527 MaxDepth = std::max(MaxDepth, SSD->getDepth(SU)); 528 } 529 } 530 531 unsigned getLatency() { return Latency; } 532 533 unsigned getMaxDepth() { return MaxDepth; } 534 535 void clear() { 536 Nodes.clear(); 537 RecMII = 0; 538 HasRecurrence = false; 539 MaxMOV = 0; 540 MaxDepth = 0; 541 Colocate = 0; 542 ExceedPressure = nullptr; 543 } 544 545 operator SetVector<SUnit *> &() { return Nodes; } 546 547 /// Sort the node sets by importance. First, rank them by recurrence MII, 548 /// then by mobility (least mobile done first), and finally by depth. 549 /// Each node set may contain a colocate value which is used as the first 550 /// tie breaker, if it's set. 551 bool operator>(const NodeSet &RHS) const { 552 if (RecMII == RHS.RecMII) { 553 if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate) 554 return Colocate < RHS.Colocate; 555 if (MaxMOV == RHS.MaxMOV) 556 return MaxDepth > RHS.MaxDepth; 557 return MaxMOV < RHS.MaxMOV; 558 } 559 return RecMII > RHS.RecMII; 560 } 561 562 bool operator==(const NodeSet &RHS) const { 563 return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV && 564 MaxDepth == RHS.MaxDepth; 565 } 566 567 bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); } 568 569 iterator begin() { return Nodes.begin(); } 570 iterator end() { return Nodes.end(); } 571 void print(raw_ostream &os) const; 572 573 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 574 LLVM_DUMP_METHOD void dump() const; 575 #endif 576 }; 577 578 // 16 was selected based on the number of ProcResource kinds for all 579 // existing Subtargets, so that SmallVector don't need to resize too often. 580 static const int DefaultProcResSize = 16; 581 582 class ResourceManager { 583 private: 584 const MCSubtargetInfo *STI; 585 const MCSchedModel &SM; 586 const TargetSubtargetInfo *ST; 587 const TargetInstrInfo *TII; 588 ScheduleDAGInstrs *DAG; 589 const bool UseDFA; 590 /// DFA resources for each slot 591 llvm::SmallVector<std::unique_ptr<DFAPacketizer>> DFAResources; 592 /// Modulo Reservation Table. When a resource with ID R is consumed in cycle 593 /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F) 594 llvm::SmallVector<llvm::SmallVector<uint64_t, DefaultProcResSize>> MRT; 595 /// The number of scheduled micro operations for each slot. Micro operations 596 /// are assumed to be scheduled one per cycle, starting with the cycle in 597 /// which the instruction is scheduled. 598 llvm::SmallVector<int> NumScheduledMops; 599 /// Each processor resource is associated with a so-called processor resource 600 /// mask. This vector allows to correlate processor resource IDs with 601 /// processor resource masks. There is exactly one element per each processor 602 /// resource declared by the scheduling model. 603 llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks; 604 int InitiationInterval = 0; 605 /// The number of micro operations that can be scheduled at a cycle. 606 int IssueWidth; 607 608 int calculateResMIIDFA() const; 609 /// Check if MRT is overbooked 610 bool isOverbooked() const; 611 /// Reserve resources on MRT 612 void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle); 613 /// Unreserve resources on MRT 614 void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle); 615 616 /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor. 617 /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C, 618 /// II). 619 int positiveModulo(int Dividend, int Divisor) const { 620 assert(Divisor > 0); 621 int R = Dividend % Divisor; 622 if (R < 0) 623 R += Divisor; 624 return R; 625 } 626 627 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 628 LLVM_DUMP_METHOD void dumpMRT() const; 629 #endif 630 631 public: 632 ResourceManager(const TargetSubtargetInfo *ST, ScheduleDAGInstrs *DAG) 633 : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()), 634 DAG(DAG), UseDFA(ST->useDFAforSMS()), 635 ProcResourceMasks(SM.getNumProcResourceKinds(), 0), 636 IssueWidth(SM.IssueWidth) { 637 initProcResourceVectors(SM, ProcResourceMasks); 638 if (IssueWidth <= 0) 639 // If IssueWidth is not specified, set a sufficiently large value 640 IssueWidth = 100; 641 if (SwpForceIssueWidth > 0) 642 IssueWidth = SwpForceIssueWidth; 643 } 644 645 void initProcResourceVectors(const MCSchedModel &SM, 646 SmallVectorImpl<uint64_t> &Masks); 647 648 /// Check if the resources occupied by a machine instruction are available 649 /// in the current state. 650 bool canReserveResources(SUnit &SU, int Cycle); 651 652 /// Reserve the resources occupied by a machine instruction and change the 653 /// current state to reflect that change. 654 void reserveResources(SUnit &SU, int Cycle); 655 656 int calculateResMII() const; 657 658 /// Initialize resources with the initiation interval II. 659 void init(int II); 660 }; 661 662 /// This class represents the scheduled code. The main data structure is a 663 /// map from scheduled cycle to instructions. During scheduling, the 664 /// data structure explicitly represents all stages/iterations. When 665 /// the algorithm finshes, the schedule is collapsed into a single stage, 666 /// which represents instructions from different loop iterations. 667 /// 668 /// The SMS algorithm allows negative values for cycles, so the first cycle 669 /// in the schedule is the smallest cycle value. 670 class SMSchedule { 671 private: 672 /// Map from execution cycle to instructions. 673 DenseMap<int, std::deque<SUnit *>> ScheduledInstrs; 674 675 /// Map from instruction to execution cycle. 676 std::map<SUnit *, int> InstrToCycle; 677 678 /// Keep track of the first cycle value in the schedule. It starts 679 /// as zero, but the algorithm allows negative values. 680 int FirstCycle = 0; 681 682 /// Keep track of the last cycle value in the schedule. 683 int LastCycle = 0; 684 685 /// The initiation interval (II) for the schedule. 686 int InitiationInterval = 0; 687 688 /// Target machine information. 689 const TargetSubtargetInfo &ST; 690 691 /// Virtual register information. 692 MachineRegisterInfo &MRI; 693 694 ResourceManager ProcItinResources; 695 696 public: 697 SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG) 698 : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), 699 ProcItinResources(&ST, DAG) {} 700 701 void reset() { 702 ScheduledInstrs.clear(); 703 InstrToCycle.clear(); 704 FirstCycle = 0; 705 LastCycle = 0; 706 InitiationInterval = 0; 707 } 708 709 /// Set the initiation interval for this schedule. 710 void setInitiationInterval(int ii) { 711 InitiationInterval = ii; 712 ProcItinResources.init(ii); 713 } 714 715 /// Return the initiation interval for this schedule. 716 int getInitiationInterval() const { return InitiationInterval; } 717 718 /// Return the first cycle in the completed schedule. This 719 /// can be a negative value. 720 int getFirstCycle() const { return FirstCycle; } 721 722 /// Return the last cycle in the finalized schedule. 723 int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; } 724 725 /// Return the cycle of the earliest scheduled instruction in the dependence 726 /// chain. 727 int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, 728 const SwingSchedulerDDG *DDG); 729 730 /// Return the cycle of the latest scheduled instruction in the dependence 731 /// chain. 732 int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, 733 const SwingSchedulerDDG *DDG); 734 735 void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, 736 SwingSchedulerDAG *DAG); 737 bool insert(SUnit *SU, int StartCycle, int EndCycle, int II); 738 739 /// Iterators for the cycle to instruction map. 740 using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator; 741 using const_sched_iterator = 742 DenseMap<int, std::deque<SUnit *>>::const_iterator; 743 744 /// Return true if the instruction is scheduled at the specified stage. 745 bool isScheduledAtStage(SUnit *SU, unsigned StageNum) { 746 return (stageScheduled(SU) == (int)StageNum); 747 } 748 749 /// Return the stage for a scheduled instruction. Return -1 if 750 /// the instruction has not been scheduled. 751 int stageScheduled(SUnit *SU) const { 752 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU); 753 if (it == InstrToCycle.end()) 754 return -1; 755 return (it->second - FirstCycle) / InitiationInterval; 756 } 757 758 /// Return the cycle for a scheduled instruction. This function normalizes 759 /// the first cycle to be 0. 760 unsigned cycleScheduled(SUnit *SU) const { 761 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU); 762 assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled."); 763 return (it->second - FirstCycle) % InitiationInterval; 764 } 765 766 /// Return the maximum stage count needed for this schedule. 767 unsigned getMaxStageCount() { 768 return (LastCycle - FirstCycle) / InitiationInterval; 769 } 770 771 /// Return the instructions that are scheduled at the specified cycle. 772 std::deque<SUnit *> &getInstructions(int cycle) { 773 return ScheduledInstrs[cycle]; 774 } 775 776 SmallSet<SUnit *, 8> 777 computeUnpipelineableNodes(SwingSchedulerDAG *SSD, 778 TargetInstrInfo::PipelinerLoopInfo *PLI); 779 780 std::deque<SUnit *> 781 reorderInstructions(const SwingSchedulerDAG *SSD, 782 const std::deque<SUnit *> &Instrs) const; 783 784 bool 785 normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, 786 TargetInstrInfo::PipelinerLoopInfo *PLI); 787 bool isValidSchedule(SwingSchedulerDAG *SSD); 788 void finalizeSchedule(SwingSchedulerDAG *SSD); 789 void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, 790 std::deque<SUnit *> &Insts) const; 791 bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const; 792 bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, 793 MachineOperand &MO) const; 794 795 bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, 796 const SwingSchedulerDDG *DDG) const; 797 void print(raw_ostream &os) const; 798 void dump() const; 799 }; 800 801 } // end namespace llvm 802 803 #endif // LLVM_CODEGEN_MACHINEPIPELINER_H 804