1 //===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass inserts writes to the VXRM CSR as needed by vector instructions. 10 // Each instruction that uses VXRM carries an operand that contains its required 11 // VXRM value. This pass tries to optimize placement to avoid redundant writes 12 // to VXRM. 13 // 14 // This is done using 2 dataflow algorithms. The first is a forward data flow 15 // to calculate where a VXRM value is available. The second is a backwards 16 // dataflow to determine where a VXRM value is anticipated. 17 // 18 // Finally, we use the results of these two dataflows to insert VXRM writes 19 // where a value is anticipated, but not available. 20 // 21 // FIXME: This pass does not split critical edges, so there can still be some 22 // redundancy. 23 // 24 // FIXME: If we are willing to have writes that aren't always needed, we could 25 // reduce the number of VXRM writes in some cases. 26 //===----------------------------------------------------------------------===// 27 28 #include "MCTargetDesc/RISCVBaseInfo.h" 29 #include "RISCV.h" 30 #include "RISCVSubtarget.h" 31 #include "llvm/CodeGen/MachineFunctionPass.h" 32 #include <queue> 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "riscv-insert-write-vxrm" 37 #define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass" 38 39 namespace { 40 41 class VXRMInfo { 42 uint8_t VXRMImm = 0; 43 44 enum : uint8_t { 45 Uninitialized, 46 Static, 47 Unknown, 48 } State = Uninitialized; 49 50 public: 51 VXRMInfo() {} 52 53 static VXRMInfo getUnknown() { 54 VXRMInfo Info; 55 Info.setUnknown(); 56 return Info; 57 } 58 59 bool isValid() const { return State != Uninitialized; } 60 void setUnknown() { State = Unknown; } 61 bool isUnknown() const { return State == Unknown; } 62 63 bool isStatic() const { return State == Static; } 64 65 void setVXRMImm(unsigned Imm) { 66 assert(Imm <= 3 && "Unexpected VXRM value"); 67 VXRMImm = Imm; 68 State = Static; 69 } 70 unsigned getVXRMImm() const { 71 assert(isStatic() && VXRMImm <= 3 && "Unexpected state"); 72 return VXRMImm; 73 } 74 75 bool operator==(const VXRMInfo &Other) const { 76 // Uninitialized is only equal to another Uninitialized. 77 if (State != Other.State) 78 return false; 79 80 if (isStatic()) 81 return VXRMImm == Other.VXRMImm; 82 83 assert((isValid() || isUnknown()) && "Unexpected state"); 84 return true; 85 } 86 87 bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); } 88 89 // Calculate the VXRMInfo visible to a block assuming this and Other are 90 // both predecessors. 91 VXRMInfo intersect(const VXRMInfo &Other) const { 92 // If the new value isn't valid, ignore it. 93 if (!Other.isValid()) 94 return *this; 95 96 // If this value isn't valid, this must be the first predecessor, use it. 97 if (!isValid()) 98 return Other; 99 100 // If either is unknown, the result is unknown. 101 if (isUnknown() || Other.isUnknown()) 102 return VXRMInfo::getUnknown(); 103 104 // If we have an exact match, return this. 105 if (*this == Other) 106 return *this; 107 108 // Otherwise the result is unknown. 109 return VXRMInfo::getUnknown(); 110 } 111 112 // Calculate the VXRMInfo visible to a block assuming this and Other 113 // are both predecessors. To allow speculatively running WriteVXRM 114 // we will ignore Unknowns if one of this and Other have valid 115 // WriteVXRM. Rationale: WriteVXRM causes a pipeline flush in some 116 // uarchs and moving it outside loops is very important for some 117 // workloads. 118 VXRMInfo intersectAnticipated(const VXRMInfo &Other) const { 119 // If the new value isn't valid, ignore it. 120 if (!Other.isValid()) 121 return *this; 122 123 // If this value isn't valid, this must be the first predecessor, use it. 124 if (!isValid()) 125 return Other; 126 127 // If either is unknown, the result is the other one. 128 if (isUnknown()) 129 return Other; 130 if (Other.isUnknown()) 131 return *this; 132 133 // If we have an exact match, return this. 134 if (*this == Other) 135 return *this; 136 137 // Otherwise the result is unknown. 138 return VXRMInfo::getUnknown(); 139 } 140 141 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 142 /// Support for debugging, callable in GDB: V->dump() 143 LLVM_DUMP_METHOD void dump() const { 144 print(dbgs()); 145 dbgs() << "\n"; 146 } 147 148 void print(raw_ostream &OS) const { 149 OS << '{'; 150 if (!isValid()) 151 OS << "Uninitialized"; 152 else if (isUnknown()) 153 OS << "Unknown"; 154 else 155 OS << getVXRMImm(); 156 OS << '}'; 157 } 158 #endif 159 }; 160 161 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 162 LLVM_ATTRIBUTE_USED 163 inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) { 164 V.print(OS); 165 return OS; 166 } 167 #endif 168 169 struct BlockData { 170 // Indicates if the block uses VXRM. Uninitialized means no use. 171 VXRMInfo VXRMUse; 172 173 // Indicates the VXRM output from the block. Unitialized means transparent. 174 VXRMInfo VXRMOut; 175 176 // Keeps track of the available VXRM value at the start of the basic bloc. 177 VXRMInfo AvailableIn; 178 179 // Keeps track of the available VXRM value at the end of the basic block. 180 VXRMInfo AvailableOut; 181 182 // Keeps track of what VXRM is anticipated at the start of the basic block. 183 VXRMInfo AnticipatedIn; 184 185 // Keeps track of what VXRM is anticipated at the end of the basic block. 186 VXRMInfo AnticipatedOut; 187 188 // Keeps track of whether the block is already in the queue. 189 bool InQueue; 190 191 BlockData() = default; 192 }; 193 194 class RISCVInsertWriteVXRM : public MachineFunctionPass { 195 const TargetInstrInfo *TII; 196 197 std::vector<BlockData> BlockInfo; 198 std::queue<const MachineBasicBlock *> WorkList; 199 200 public: 201 static char ID; 202 203 RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {} 204 205 bool runOnMachineFunction(MachineFunction &MF) override; 206 207 void getAnalysisUsage(AnalysisUsage &AU) const override { 208 AU.setPreservesCFG(); 209 MachineFunctionPass::getAnalysisUsage(AU); 210 } 211 212 StringRef getPassName() const override { 213 return RISCV_INSERT_WRITE_VXRM_NAME; 214 } 215 216 private: 217 bool computeVXRMChanges(const MachineBasicBlock &MBB); 218 void computeAvailable(const MachineBasicBlock &MBB); 219 void computeAnticipated(const MachineFunction &MF, const MachineBasicBlock &MBB); 220 void emitWriteVXRM(MachineBasicBlock &MBB); 221 }; 222 223 } // end anonymous namespace 224 225 char RISCVInsertWriteVXRM::ID = 0; 226 227 INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME, 228 false, false) 229 230 static bool ignoresVXRM(const MachineInstr &MI) { 231 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 232 default: 233 return false; 234 case RISCV::VNCLIP_WI: 235 case RISCV::VNCLIPU_WI: 236 return MI.getOperand(3).getImm() == 0; 237 } 238 } 239 240 bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) { 241 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 242 243 bool NeedVXRMWrite = false; 244 for (const MachineInstr &MI : MBB) { 245 int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc()); 246 if (VXRMIdx >= 0 && !ignoresVXRM(MI)) { 247 unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm(); 248 249 if (!BBInfo.VXRMUse.isValid()) 250 BBInfo.VXRMUse.setVXRMImm(NewVXRMImm); 251 252 BBInfo.VXRMOut.setVXRMImm(NewVXRMImm); 253 NeedVXRMWrite = true; 254 continue; 255 } 256 257 if (MI.isCall() || MI.isInlineAsm() || 258 MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr)) { 259 if (!BBInfo.VXRMUse.isValid()) 260 BBInfo.VXRMUse.setUnknown(); 261 262 BBInfo.VXRMOut.setUnknown(); 263 } 264 } 265 266 return NeedVXRMWrite; 267 } 268 269 void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) { 270 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 271 272 BBInfo.InQueue = false; 273 274 VXRMInfo Available; 275 if (MBB.pred_empty()) { 276 Available.setUnknown(); 277 } else { 278 for (const MachineBasicBlock *P : MBB.predecessors()) 279 Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut); 280 } 281 282 // If we don't have any valid available info, wait until we do. 283 if (!Available.isValid()) 284 return; 285 286 if (Available != BBInfo.AvailableIn) { 287 BBInfo.AvailableIn = Available; 288 LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB) 289 << " changed to " << BBInfo.AvailableIn << "\n"); 290 } 291 292 if (BBInfo.VXRMOut.isValid()) 293 Available = BBInfo.VXRMOut; 294 295 if (Available == BBInfo.AvailableOut) 296 return; 297 298 BBInfo.AvailableOut = Available; 299 LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB) 300 << " changed to " << BBInfo.AvailableOut << "\n"); 301 302 // Add the successors to the work list so that we can propagate. 303 for (MachineBasicBlock *S : MBB.successors()) { 304 if (!BlockInfo[S->getNumber()].InQueue) { 305 BlockInfo[S->getNumber()].InQueue = true; 306 WorkList.push(S); 307 } 308 } 309 } 310 311 void RISCVInsertWriteVXRM::computeAnticipated(const MachineFunction &MF, const MachineBasicBlock &MBB) { 312 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 313 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 314 315 BBInfo.InQueue = false; 316 317 VXRMInfo Anticipated; 318 if (MBB.succ_empty()) { 319 Anticipated.setUnknown(); 320 } else { 321 for (const MachineBasicBlock *S : MBB.successors()) 322 if (ST.hasVXRMPipelineFlush()) 323 Anticipated = 324 Anticipated.intersectAnticipated(BlockInfo[S->getNumber()].AnticipatedIn); 325 else 326 Anticipated = 327 Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn); 328 } 329 330 // If we don't have any valid anticipated info, wait until we do. 331 if (!Anticipated.isValid()) 332 return; 333 334 if (Anticipated != BBInfo.AnticipatedOut) { 335 BBInfo.AnticipatedOut = Anticipated; 336 LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB) 337 << " changed to " << BBInfo.AnticipatedOut << "\n"); 338 } 339 340 // If this block reads VXRM, copy it. 341 if (BBInfo.VXRMUse.isValid()) 342 Anticipated = BBInfo.VXRMUse; 343 344 if (Anticipated == BBInfo.AnticipatedIn) 345 return; 346 347 BBInfo.AnticipatedIn = Anticipated; 348 LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB) 349 << " changed to " << BBInfo.AnticipatedIn << "\n"); 350 351 // Add the predecessors to the work list so that we can propagate. 352 for (MachineBasicBlock *P : MBB.predecessors()) { 353 if (!BlockInfo[P->getNumber()].InQueue) { 354 BlockInfo[P->getNumber()].InQueue = true; 355 WorkList.push(P); 356 } 357 } 358 } 359 360 void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) { 361 const BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 362 363 VXRMInfo Info = BBInfo.AvailableIn; 364 365 // Flag to indicates we need to insert a VXRM write. We want to delay it as 366 // late as possible in this block. 367 bool PendingInsert = false; 368 369 // Insert VXRM write if anticipated and not available. 370 if (BBInfo.AnticipatedIn.isStatic()) { 371 // If this is the entry block and the value is anticipated, insert. 372 if (MBB.isEntryBlock()) { 373 PendingInsert = true; 374 } else { 375 // Search for any predecessors that wouldn't satisfy our requirement and 376 // insert a write VXRM if needed. 377 // NOTE: If one predecessor is able to provide the requirement, but 378 // another isn't, it means we have a critical edge. The better placement 379 // would be to split the critical edge. 380 for (MachineBasicBlock *P : MBB.predecessors()) { 381 const BlockData &PInfo = BlockInfo[P->getNumber()]; 382 // If it's available out of the predecessor, then we're ok. 383 if (PInfo.AvailableOut.isStatic() && 384 PInfo.AvailableOut.getVXRMImm() == 385 BBInfo.AnticipatedIn.getVXRMImm()) 386 continue; 387 // If the predecessor anticipates this value for all its succesors, 388 // then a write to VXRM would have already occured before this block is 389 // executed. 390 if (PInfo.AnticipatedOut.isStatic() && 391 PInfo.AnticipatedOut.getVXRMImm() == 392 BBInfo.AnticipatedIn.getVXRMImm()) 393 continue; 394 PendingInsert = true; 395 break; 396 } 397 } 398 399 Info = BBInfo.AnticipatedIn; 400 } 401 402 for (MachineInstr &MI : MBB) { 403 int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc()); 404 if (VXRMIdx >= 0 && !ignoresVXRM(MI)) { 405 unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm(); 406 407 if (PendingInsert || !Info.isStatic() || 408 Info.getVXRMImm() != NewVXRMImm) { 409 assert((!PendingInsert || 410 (Info.isStatic() && Info.getVXRMImm() == NewVXRMImm)) && 411 "Pending VXRM insertion mismatch"); 412 LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs())); 413 BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm)) 414 .addImm(NewVXRMImm); 415 PendingInsert = false; 416 } 417 418 MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false, 419 /*IsImp*/ true)); 420 Info.setVXRMImm(NewVXRMImm); 421 continue; 422 } 423 424 if (MI.isCall() || MI.isInlineAsm() || 425 MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr)) 426 Info.setUnknown(); 427 } 428 429 // If all our successors anticipate a value, do the insert. 430 // NOTE: It's possible that not all predecessors of our successor provide the 431 // correct value. This can occur on critical edges. If we don't split the 432 // critical edge we'll also have a write vxrm in the succesor that is 433 // redundant with this one. 434 if (PendingInsert || 435 (BBInfo.AnticipatedOut.isStatic() && 436 (!Info.isStatic() || 437 Info.getVXRMImm() != BBInfo.AnticipatedOut.getVXRMImm()))) { 438 assert((!PendingInsert || 439 (Info.isStatic() && BBInfo.AnticipatedOut.isStatic() && 440 Info.getVXRMImm() == BBInfo.AnticipatedOut.getVXRMImm())) && 441 "Pending VXRM insertion mismatch"); 442 LLVM_DEBUG(dbgs() << "Inserting at end of " << printMBBReference(MBB) 443 << " changing to " << BBInfo.AnticipatedOut << "\n"); 444 BuildMI(MBB, MBB.getFirstTerminator(), DebugLoc(), 445 TII->get(RISCV::WriteVXRMImm)) 446 .addImm(BBInfo.AnticipatedOut.getVXRMImm()); 447 } 448 } 449 450 bool RISCVInsertWriteVXRM::runOnMachineFunction(MachineFunction &MF) { 451 // Skip if the vector extension is not enabled. 452 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 453 if (!ST.hasVInstructions()) 454 return false; 455 456 TII = ST.getInstrInfo(); 457 458 assert(BlockInfo.empty() && "Expect empty block infos"); 459 BlockInfo.resize(MF.getNumBlockIDs()); 460 461 // Phase 1 - collect block information. 462 bool NeedVXRMChange = false; 463 for (const MachineBasicBlock &MBB : MF) 464 NeedVXRMChange |= computeVXRMChanges(MBB); 465 466 if (!NeedVXRMChange) { 467 BlockInfo.clear(); 468 return false; 469 } 470 471 // Phase 2 - Compute available VXRM using a forward walk. 472 for (const MachineBasicBlock &MBB : MF) { 473 WorkList.push(&MBB); 474 BlockInfo[MBB.getNumber()].InQueue = true; 475 } 476 while (!WorkList.empty()) { 477 const MachineBasicBlock &MBB = *WorkList.front(); 478 WorkList.pop(); 479 computeAvailable(MBB); 480 } 481 482 // Phase 3 - Compute anticipated VXRM using a backwards walk. 483 for (const MachineBasicBlock &MBB : llvm::reverse(MF)) { 484 WorkList.push(&MBB); 485 BlockInfo[MBB.getNumber()].InQueue = true; 486 } 487 while (!WorkList.empty()) { 488 const MachineBasicBlock &MBB = *WorkList.front(); 489 WorkList.pop(); 490 computeAnticipated(MF, MBB); 491 } 492 493 // Phase 4 - Emit VXRM writes at the earliest place possible. 494 for (MachineBasicBlock &MBB : MF) 495 emitWriteVXRM(MBB); 496 497 BlockInfo.clear(); 498 499 return true; 500 } 501 502 FunctionPass *llvm::createRISCVInsertWriteVXRMPass() { 503 return new RISCVInsertWriteVXRM(); 504 } 505