1 //=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This class implements a deterministic finite automaton (DFA) based 9 // packetizing mechanism for VLIW architectures. It provides APIs to 10 // determine whether there exists a legal mapping of instructions to 11 // functional unit assignments in a packet. The DFA is auto-generated from 12 // the target's Schedule.td file. 13 // 14 // A DFA consists of 3 major elements: states, inputs, and transitions. For 15 // the packetizing mechanism, the input is the set of instruction classes for 16 // a target. The state models all possible combinations of functional unit 17 // consumption for a given set of instructions in a packet. A transition 18 // models the addition of an instruction to a packet. In the DFA constructed 19 // by this class, if an instruction can be added to a packet, then a valid 20 // transition exists from the corresponding state. Invalid transitions 21 // indicate that the instruction cannot be added to the current packet. 22 // 23 //===----------------------------------------------------------------------===// 24 25 #include "llvm/CodeGen/DFAPacketizer.h" 26 #include "llvm/ADT/StringExtras.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBundle.h" 30 #include "llvm/CodeGen/ScheduleDAG.h" 31 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 32 #include "llvm/CodeGen/TargetInstrInfo.h" 33 #include "llvm/CodeGen/TargetSubtargetInfo.h" 34 #include "llvm/MC/MCInstrDesc.h" 35 #include "llvm/MC/MCInstrItineraries.h" 36 #include "llvm/Support/CommandLine.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <algorithm> 40 #include <cassert> 41 #include <iterator> 42 #include <memory> 43 #include <vector> 44 45 using namespace llvm; 46 47 #define DEBUG_TYPE "packets" 48 49 static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden, 50 cl::init(0), cl::desc("If present, stops packetizing after N instructions")); 51 52 static unsigned InstrCount = 0; 53 54 // -------------------------------------------------------------------- 55 // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp 56 57 static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { 58 return (Inp << DFA_MAX_RESOURCES) | FuncUnits; 59 } 60 61 /// Return the DFAInput for an instruction class input vector. 62 /// This function is used in both DFAPacketizer.cpp and in 63 /// DFAPacketizerEmitter.cpp. 64 static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) { 65 DFAInput InsnInput = 0; 66 assert((InsnClass.size() <= DFA_MAX_RESTERMS) && 67 "Exceeded maximum number of DFA terms"); 68 for (auto U : InsnClass) 69 InsnInput = addDFAFuncUnits(InsnInput, U); 70 return InsnInput; 71 } 72 73 // -------------------------------------------------------------------- 74 75 DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, 76 const DFAStateInput (*SIT)[2], const unsigned *SET, 77 const std::pair<unsigned, unsigned> *RTT, 78 const unsigned *RTET) 79 : InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET), 80 DFAResourceTransitionTable(RTT), DFAResourceTransitionEntryTable(RTET) { 81 // Make sure DFA types are large enough for the number of terms & resources. 82 static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= 83 (8 * sizeof(DFAInput)), 84 "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); 85 static_assert( 86 (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)), 87 "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); 88 clearResources(); 89 } 90 91 // Read the DFA transition table and update CachedTable. 92 // 93 // Format of the transition tables: 94 // DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid 95 // transitions 96 // DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable 97 // for the ith state 98 // 99 void DFAPacketizer::ReadTable(unsigned int state) { 100 unsigned ThisStateIdx = DFAStateEntryTable[state]; 101 unsigned NextStateIdxInTable = DFAStateEntryTable[state + 1]; 102 // Early exit in case CachedTable has already contains this 103 // state's transitions. 104 if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisStateIdx][0]))) 105 return; 106 107 for (unsigned TransitionIdx = ThisStateIdx; 108 TransitionIdx < NextStateIdxInTable; TransitionIdx++) { 109 auto TransitionPair = 110 UnsignPair(state, DFAStateInputTable[TransitionIdx][0]); 111 CachedTable[TransitionPair] = DFAStateInputTable[TransitionIdx][1]; 112 113 if (TrackResources) { 114 unsigned I = DFAResourceTransitionEntryTable[TransitionIdx]; 115 unsigned E = DFAResourceTransitionEntryTable[TransitionIdx + 1]; 116 CachedResourceTransitions[TransitionPair] = makeArrayRef( 117 &DFAResourceTransitionTable[I], &DFAResourceTransitionTable[E]); 118 } 119 } 120 } 121 122 // Return the DFAInput for an instruction class. 123 DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { 124 // Note: this logic must match that in DFAPacketizerDefs.h for input vectors. 125 DFAInput InsnInput = 0; 126 unsigned i = 0; 127 (void)i; 128 for (const InstrStage *IS = InstrItins->beginStage(InsnClass), 129 *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) { 130 InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits()); 131 assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs"); 132 } 133 return InsnInput; 134 } 135 136 // Return the DFAInput for an instruction class input vector. 137 DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) { 138 return getDFAInsnInput(InsnClass); 139 } 140 141 // Check if the resources occupied by a MCInstrDesc are available in the 142 // current state. 143 bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) { 144 unsigned InsnClass = MID->getSchedClass(); 145 DFAInput InsnInput = getInsnInput(InsnClass); 146 UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); 147 ReadTable(CurrentState); 148 return CachedTable.count(StateTrans) != 0; 149 } 150 151 // Reserve the resources occupied by a MCInstrDesc and change the current 152 // state to reflect that change. 153 void DFAPacketizer::reserveResources(const MCInstrDesc *MID) { 154 unsigned InsnClass = MID->getSchedClass(); 155 DFAInput InsnInput = getInsnInput(InsnClass); 156 UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); 157 ReadTable(CurrentState); 158 159 if (TrackResources) { 160 DenseMap<unsigned, SmallVector<unsigned, 8>> NewResourceStates; 161 for (const auto &KV : CachedResourceTransitions[StateTrans]) { 162 assert(ResourceStates.count(KV.first)); 163 NewResourceStates[KV.second] = ResourceStates[KV.first]; 164 NewResourceStates[KV.second].push_back(KV.second); 165 } 166 ResourceStates = NewResourceStates; 167 } 168 assert(CachedTable.count(StateTrans) != 0); 169 CurrentState = CachedTable[StateTrans]; 170 } 171 172 // Check if the resources occupied by a machine instruction are available 173 // in the current state. 174 bool DFAPacketizer::canReserveResources(MachineInstr &MI) { 175 const MCInstrDesc &MID = MI.getDesc(); 176 return canReserveResources(&MID); 177 } 178 179 // Reserve the resources occupied by a machine instruction and change the 180 // current state to reflect that change. 181 void DFAPacketizer::reserveResources(MachineInstr &MI) { 182 const MCInstrDesc &MID = MI.getDesc(); 183 reserveResources(&MID); 184 } 185 186 unsigned DFAPacketizer::getUsedResources(unsigned InstIdx) { 187 assert(TrackResources && "getUsedResources requires resource tracking!"); 188 // Assert that there is at least one example of a valid bundle format. 189 assert(!ResourceStates.empty() && "Invalid bundle!"); 190 SmallVectorImpl<unsigned> &RS = ResourceStates.begin()->second; 191 192 // RS stores the cumulative resources used up to and including the I'th 193 // instruction. The 0th instruction is the base case. 194 if (InstIdx == 0) 195 return RS[0]; 196 // Return the difference between the cumulative resources used by InstIdx and 197 // its predecessor. 198 return RS[InstIdx] ^ RS[InstIdx - 1]; 199 } 200 201 namespace llvm { 202 203 // This class extends ScheduleDAGInstrs and overrides the schedule method 204 // to build the dependence graph. 205 class DefaultVLIWScheduler : public ScheduleDAGInstrs { 206 private: 207 AliasAnalysis *AA; 208 /// Ordered list of DAG postprocessing steps. 209 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; 210 211 public: 212 DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, 213 AliasAnalysis *AA); 214 215 // Actual scheduling work. 216 void schedule() override; 217 218 /// DefaultVLIWScheduler takes ownership of the Mutation object. 219 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { 220 Mutations.push_back(std::move(Mutation)); 221 } 222 223 protected: 224 void postprocessDAG(); 225 }; 226 227 } // end namespace llvm 228 229 DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, 230 MachineLoopInfo &MLI, 231 AliasAnalysis *AA) 232 : ScheduleDAGInstrs(MF, &MLI), AA(AA) { 233 CanHandleTerminators = true; 234 } 235 236 /// Apply each ScheduleDAGMutation step in order. 237 void DefaultVLIWScheduler::postprocessDAG() { 238 for (auto &M : Mutations) 239 M->apply(this); 240 } 241 242 void DefaultVLIWScheduler::schedule() { 243 // Build the scheduling graph. 244 buildSchedGraph(AA); 245 postprocessDAG(); 246 } 247 248 VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf, 249 MachineLoopInfo &mli, AliasAnalysis *aa) 250 : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) { 251 ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget()); 252 ResourceTracker->setTrackResources(true); 253 VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA); 254 } 255 256 VLIWPacketizerList::~VLIWPacketizerList() { 257 delete VLIWScheduler; 258 delete ResourceTracker; 259 } 260 261 // End the current packet, bundle packet instructions and reset DFA state. 262 void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, 263 MachineBasicBlock::iterator MI) { 264 LLVM_DEBUG({ 265 if (!CurrentPacketMIs.empty()) { 266 dbgs() << "Finalizing packet:\n"; 267 unsigned Idx = 0; 268 for (MachineInstr *MI : CurrentPacketMIs) { 269 unsigned R = ResourceTracker->getUsedResources(Idx++); 270 dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI; 271 } 272 } 273 }); 274 if (CurrentPacketMIs.size() > 1) { 275 MachineInstr &MIFirst = *CurrentPacketMIs.front(); 276 finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator()); 277 } 278 CurrentPacketMIs.clear(); 279 ResourceTracker->clearResources(); 280 LLVM_DEBUG(dbgs() << "End packet\n"); 281 } 282 283 // Bundle machine instructions into packets. 284 void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, 285 MachineBasicBlock::iterator BeginItr, 286 MachineBasicBlock::iterator EndItr) { 287 assert(VLIWScheduler && "VLIW Scheduler is not initialized!"); 288 VLIWScheduler->startBlock(MBB); 289 VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, 290 std::distance(BeginItr, EndItr)); 291 VLIWScheduler->schedule(); 292 293 LLVM_DEBUG({ 294 dbgs() << "Scheduling DAG of the packetize region\n"; 295 VLIWScheduler->dump(); 296 }); 297 298 // Generate MI -> SU map. 299 MIToSUnit.clear(); 300 for (SUnit &SU : VLIWScheduler->SUnits) 301 MIToSUnit[SU.getInstr()] = &SU; 302 303 bool LimitPresent = InstrLimit.getPosition(); 304 305 // The main packetizer loop. 306 for (; BeginItr != EndItr; ++BeginItr) { 307 if (LimitPresent) { 308 if (InstrCount >= InstrLimit) { 309 EndItr = BeginItr; 310 break; 311 } 312 InstrCount++; 313 } 314 MachineInstr &MI = *BeginItr; 315 initPacketizerState(); 316 317 // End the current packet if needed. 318 if (isSoloInstruction(MI)) { 319 endPacket(MBB, MI); 320 continue; 321 } 322 323 // Ignore pseudo instructions. 324 if (ignorePseudoInstruction(MI, MBB)) 325 continue; 326 327 SUnit *SUI = MIToSUnit[&MI]; 328 assert(SUI && "Missing SUnit Info!"); 329 330 // Ask DFA if machine resource is available for MI. 331 LLVM_DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI); 332 333 bool ResourceAvail = ResourceTracker->canReserveResources(MI); 334 LLVM_DEBUG({ 335 if (ResourceAvail) 336 dbgs() << " Resources are available for adding MI to packet\n"; 337 else 338 dbgs() << " Resources NOT available\n"; 339 }); 340 if (ResourceAvail && shouldAddToPacket(MI)) { 341 // Dependency check for MI with instructions in CurrentPacketMIs. 342 for (auto MJ : CurrentPacketMIs) { 343 SUnit *SUJ = MIToSUnit[MJ]; 344 assert(SUJ && "Missing SUnit Info!"); 345 346 LLVM_DEBUG(dbgs() << " Checking against MJ " << *MJ); 347 // Is it legal to packetize SUI and SUJ together. 348 if (!isLegalToPacketizeTogether(SUI, SUJ)) { 349 LLVM_DEBUG(dbgs() << " Not legal to add MI, try to prune\n"); 350 // Allow packetization if dependency can be pruned. 351 if (!isLegalToPruneDependencies(SUI, SUJ)) { 352 // End the packet if dependency cannot be pruned. 353 LLVM_DEBUG(dbgs() 354 << " Could not prune dependencies for adding MI\n"); 355 endPacket(MBB, MI); 356 break; 357 } 358 LLVM_DEBUG(dbgs() << " Pruned dependence for adding MI\n"); 359 } 360 } 361 } else { 362 LLVM_DEBUG(if (ResourceAvail) dbgs() 363 << "Resources are available, but instruction should not be " 364 "added to packet\n " 365 << MI); 366 // End the packet if resource is not available, or if the instruction 367 // shoud not be added to the current packet. 368 endPacket(MBB, MI); 369 } 370 371 // Add MI to the current packet. 372 LLVM_DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n'); 373 BeginItr = addToPacket(MI); 374 } // For all instructions in the packetization range. 375 376 // End any packet left behind. 377 endPacket(MBB, EndItr); 378 VLIWScheduler->exitRegion(); 379 VLIWScheduler->finishBlock(); 380 } 381 382 bool VLIWPacketizerList::alias(const MachineMemOperand &Op1, 383 const MachineMemOperand &Op2, 384 bool UseTBAA) const { 385 if (!Op1.getValue() || !Op2.getValue()) 386 return true; 387 388 int64_t MinOffset = std::min(Op1.getOffset(), Op2.getOffset()); 389 int64_t Overlapa = Op1.getSize() + Op1.getOffset() - MinOffset; 390 int64_t Overlapb = Op2.getSize() + Op2.getOffset() - MinOffset; 391 392 AliasResult AAResult = 393 AA->alias(MemoryLocation(Op1.getValue(), Overlapa, 394 UseTBAA ? Op1.getAAInfo() : AAMDNodes()), 395 MemoryLocation(Op2.getValue(), Overlapb, 396 UseTBAA ? Op2.getAAInfo() : AAMDNodes())); 397 398 return AAResult != NoAlias; 399 } 400 401 bool VLIWPacketizerList::alias(const MachineInstr &MI1, 402 const MachineInstr &MI2, 403 bool UseTBAA) const { 404 if (MI1.memoperands_empty() || MI2.memoperands_empty()) 405 return true; 406 407 for (const MachineMemOperand *Op1 : MI1.memoperands()) 408 for (const MachineMemOperand *Op2 : MI2.memoperands()) 409 if (alias(*Op1, *Op2, UseTBAA)) 410 return true; 411 return false; 412 } 413 414 // Add a DAG mutation object to the ordered list. 415 void VLIWPacketizerList::addMutation( 416 std::unique_ptr<ScheduleDAGMutation> Mutation) { 417 VLIWScheduler->addMutation(std::move(Mutation)); 418 } 419