1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a function pass that inserts VSETVLI instructions where 10 // needed. 11 // 12 // This pass consists of 3 phases: 13 // 14 // Phase 1 collects how each basic block affects VL/VTYPE. 15 // 16 // Phase 2 uses the information from phase 1 to do a data flow analysis to 17 // propagate the VL/VTYPE changes through the function. This gives us the 18 // VL/VTYPE at the start of each basic block. 19 // 20 // Phase 3 inserts VSETVLI instructions in each basic block. Information from 21 // phase 2 is used to prevent inserting a VSETVLI before the first vector 22 // instruction in the block if possible. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "RISCV.h" 27 #include "RISCVSubtarget.h" 28 #include "llvm/CodeGen/LiveIntervals.h" 29 #include "llvm/CodeGen/MachineFunctionPass.h" 30 #include <queue> 31 using namespace llvm; 32 33 #define DEBUG_TYPE "riscv-insert-vsetvli" 34 #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass" 35 36 static cl::opt<bool> DisableInsertVSETVLPHIOpt( 37 "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden, 38 cl::desc("Disable looking through phis when inserting vsetvlis.")); 39 40 namespace { 41 42 class VSETVLIInfo { 43 union { 44 Register AVLReg; 45 unsigned AVLImm; 46 }; 47 48 enum : uint8_t { 49 Uninitialized, 50 AVLIsReg, 51 AVLIsImm, 52 Unknown, 53 } State = Uninitialized; 54 55 // Fields from VTYPE. 56 RISCVII::VLMUL VLMul = RISCVII::LMUL_1; 57 uint8_t SEW = 0; 58 uint8_t TailAgnostic : 1; 59 uint8_t MaskAgnostic : 1; 60 uint8_t MaskRegOp : 1; 61 uint8_t StoreOp : 1; 62 uint8_t ScalarMovOp : 1; 63 uint8_t SEWLMULRatioOnly : 1; 64 65 public: 66 VSETVLIInfo() 67 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false), 68 StoreOp(false), ScalarMovOp(false), SEWLMULRatioOnly(false) {} 69 70 static VSETVLIInfo getUnknown() { 71 VSETVLIInfo Info; 72 Info.setUnknown(); 73 return Info; 74 } 75 76 bool isValid() const { return State != Uninitialized; } 77 void setUnknown() { State = Unknown; } 78 bool isUnknown() const { return State == Unknown; } 79 80 void setAVLReg(Register Reg) { 81 AVLReg = Reg; 82 State = AVLIsReg; 83 } 84 85 void setAVLImm(unsigned Imm) { 86 AVLImm = Imm; 87 State = AVLIsImm; 88 } 89 90 bool hasAVLImm() const { return State == AVLIsImm; } 91 bool hasAVLReg() const { return State == AVLIsReg; } 92 Register getAVLReg() const { 93 assert(hasAVLReg()); 94 return AVLReg; 95 } 96 unsigned getAVLImm() const { 97 assert(hasAVLImm()); 98 return AVLImm; 99 } 100 bool hasZeroAVL() const { 101 if (hasAVLImm()) 102 return getAVLImm() == 0; 103 return false; 104 } 105 bool hasNonZeroAVL() const { 106 if (hasAVLImm()) 107 return getAVLImm() > 0; 108 if (hasAVLReg()) 109 return getAVLReg() == RISCV::X0; 110 return false; 111 } 112 113 bool hasSameAVL(const VSETVLIInfo &Other) const { 114 assert(isValid() && Other.isValid() && 115 "Can't compare invalid VSETVLIInfos"); 116 assert(!isUnknown() && !Other.isUnknown() && 117 "Can't compare AVL in unknown state"); 118 if (hasAVLReg() && Other.hasAVLReg()) 119 return getAVLReg() == Other.getAVLReg(); 120 121 if (hasAVLImm() && Other.hasAVLImm()) 122 return getAVLImm() == Other.getAVLImm(); 123 124 return false; 125 } 126 127 void setVTYPE(unsigned VType) { 128 assert(isValid() && !isUnknown() && 129 "Can't set VTYPE for uninitialized or unknown"); 130 VLMul = RISCVVType::getVLMUL(VType); 131 SEW = RISCVVType::getSEW(VType); 132 TailAgnostic = RISCVVType::isTailAgnostic(VType); 133 MaskAgnostic = RISCVVType::isMaskAgnostic(VType); 134 } 135 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA, bool MRO, 136 bool IsStore, bool IsScalarMovOp) { 137 assert(isValid() && !isUnknown() && 138 "Can't set VTYPE for uninitialized or unknown"); 139 VLMul = L; 140 SEW = S; 141 TailAgnostic = TA; 142 MaskAgnostic = MA; 143 MaskRegOp = MRO; 144 StoreOp = IsStore; 145 ScalarMovOp = IsScalarMovOp; 146 } 147 148 unsigned encodeVTYPE() const { 149 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && 150 "Can't encode VTYPE for uninitialized or unknown"); 151 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 152 } 153 154 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } 155 156 bool hasSameSEW(const VSETVLIInfo &Other) const { 157 assert(isValid() && Other.isValid() && 158 "Can't compare invalid VSETVLIInfos"); 159 assert(!isUnknown() && !Other.isUnknown() && 160 "Can't compare VTYPE in unknown state"); 161 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 162 "Can't compare when only LMUL/SEW ratio is valid."); 163 return SEW == Other.SEW; 164 } 165 166 bool hasSameVTYPE(const VSETVLIInfo &Other) const { 167 assert(isValid() && Other.isValid() && 168 "Can't compare invalid VSETVLIInfos"); 169 assert(!isUnknown() && !Other.isUnknown() && 170 "Can't compare VTYPE in unknown state"); 171 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 172 "Can't compare when only LMUL/SEW ratio is valid."); 173 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == 174 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, 175 Other.MaskAgnostic); 176 } 177 178 static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) { 179 unsigned LMul; 180 bool Fractional; 181 std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul); 182 183 // Convert LMul to a fixed point value with 3 fractional bits. 184 LMul = Fractional ? (8 / LMul) : (LMul * 8); 185 186 assert(SEW >= 8 && "Unexpected SEW value"); 187 return (SEW * 8) / LMul; 188 } 189 190 unsigned getSEWLMULRatio() const { 191 assert(isValid() && !isUnknown() && 192 "Can't use VTYPE for uninitialized or unknown"); 193 return getSEWLMULRatio(SEW, VLMul); 194 } 195 196 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. 197 bool hasSameVLMAX(const VSETVLIInfo &Other) const { 198 assert(isValid() && Other.isValid() && 199 "Can't compare invalid VSETVLIInfos"); 200 assert(!isUnknown() && !Other.isUnknown() && 201 "Can't compare VTYPE in unknown state"); 202 return getSEWLMULRatio() == Other.getSEWLMULRatio(); 203 } 204 205 bool hasSamePolicy(const VSETVLIInfo &Other) const { 206 assert(isValid() && Other.isValid() && 207 "Can't compare invalid VSETVLIInfos"); 208 assert(!isUnknown() && !Other.isUnknown() && 209 "Can't compare VTYPE in unknown state"); 210 return TailAgnostic == Other.TailAgnostic && 211 MaskAgnostic == Other.MaskAgnostic; 212 } 213 214 bool hasCompatibleVTYPE(const VSETVLIInfo &InstrInfo, bool Strict) const { 215 // Simple case, see if full VTYPE matches. 216 if (hasSameVTYPE(InstrInfo)) 217 return true; 218 219 if (Strict) 220 return false; 221 222 // If this is a mask reg operation, it only cares about VLMAX. 223 // FIXME: Mask reg operations are probably ok if "this" VLMAX is larger 224 // than "InstrInfo". 225 // FIXME: The policy bits can probably be ignored for mask reg operations. 226 if (InstrInfo.MaskRegOp && hasSameVLMAX(InstrInfo) && 227 TailAgnostic == InstrInfo.TailAgnostic && 228 MaskAgnostic == InstrInfo.MaskAgnostic) 229 return true; 230 231 return false; 232 } 233 234 // Determine whether the vector instructions requirements represented by 235 // InstrInfo are compatible with the previous vsetvli instruction represented 236 // by this. 237 bool isCompatible(const VSETVLIInfo &InstrInfo, bool Strict) const { 238 assert(isValid() && InstrInfo.isValid() && 239 "Can't compare invalid VSETVLIInfos"); 240 assert(!InstrInfo.SEWLMULRatioOnly && 241 "Expected a valid VTYPE for instruction!"); 242 // Nothing is compatible with Unknown. 243 if (isUnknown() || InstrInfo.isUnknown()) 244 return false; 245 246 // If only our VLMAX ratio is valid, then this isn't compatible. 247 if (SEWLMULRatioOnly) 248 return false; 249 250 // If the instruction doesn't need an AVLReg and the SEW matches, consider 251 // it compatible. 252 if (!Strict && InstrInfo.hasAVLReg() && 253 InstrInfo.AVLReg == RISCV::NoRegister) { 254 if (SEW == InstrInfo.SEW) 255 return true; 256 } 257 258 // For vmv.s.x and vfmv.s.f, there is only two behaviors, VL = 0 and VL > 0. 259 // So it's compatible when we could make sure that both VL be the same 260 // situation. 261 if (!Strict && InstrInfo.ScalarMovOp && InstrInfo.hasAVLImm() && 262 ((hasNonZeroAVL() && InstrInfo.hasNonZeroAVL()) || 263 (hasZeroAVL() && InstrInfo.hasZeroAVL())) && 264 hasSameSEW(InstrInfo) && hasSamePolicy(InstrInfo)) 265 return true; 266 267 // The AVL must match. 268 if (!hasSameAVL(InstrInfo)) 269 return false; 270 271 if (hasCompatibleVTYPE(InstrInfo, Strict)) 272 return true; 273 274 // Strict matches must ensure a full VTYPE match. 275 if (Strict) 276 return false; 277 278 // Store instructions don't use the policy fields. 279 // TODO: Move into hasCompatibleVTYPE? 280 if (InstrInfo.StoreOp && VLMul == InstrInfo.VLMul && SEW == InstrInfo.SEW) 281 return true; 282 283 // Anything else is not compatible. 284 return false; 285 } 286 287 bool isCompatibleWithLoadStoreEEW(unsigned EEW, 288 const VSETVLIInfo &InstrInfo) const { 289 assert(isValid() && InstrInfo.isValid() && 290 "Can't compare invalid VSETVLIInfos"); 291 assert(!InstrInfo.SEWLMULRatioOnly && 292 "Expected a valid VTYPE for instruction!"); 293 assert(EEW == InstrInfo.SEW && "Mismatched EEW/SEW for store"); 294 295 if (isUnknown() || hasSEWLMULRatioOnly()) 296 return false; 297 298 if (!hasSameAVL(InstrInfo)) 299 return false; 300 301 // Stores can ignore the tail and mask policies. 302 if (!InstrInfo.StoreOp && (TailAgnostic != InstrInfo.TailAgnostic || 303 MaskAgnostic != InstrInfo.MaskAgnostic)) 304 return false; 305 306 return getSEWLMULRatio() == getSEWLMULRatio(EEW, InstrInfo.VLMul); 307 } 308 309 bool operator==(const VSETVLIInfo &Other) const { 310 // Uninitialized is only equal to another Uninitialized. 311 if (!isValid()) 312 return !Other.isValid(); 313 if (!Other.isValid()) 314 return !isValid(); 315 316 // Unknown is only equal to another Unknown. 317 if (isUnknown()) 318 return Other.isUnknown(); 319 if (Other.isUnknown()) 320 return isUnknown(); 321 322 if (!hasSameAVL(Other)) 323 return false; 324 325 // If only the VLMAX is valid, check that it is the same. 326 if (SEWLMULRatioOnly && Other.SEWLMULRatioOnly) 327 return hasSameVLMAX(Other); 328 329 // If the full VTYPE is valid, check that it is the same. 330 if (!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly) 331 return hasSameVTYPE(Other); 332 333 // If the SEWLMULRatioOnly bits are different, then they aren't equal. 334 return false; 335 } 336 337 // Calculate the VSETVLIInfo visible to a block assuming this and Other are 338 // both predecessors. 339 VSETVLIInfo intersect(const VSETVLIInfo &Other) const { 340 // If the new value isn't valid, ignore it. 341 if (!Other.isValid()) 342 return *this; 343 344 // If this value isn't valid, this must be the first predecessor, use it. 345 if (!isValid()) 346 return Other; 347 348 // If either is unknown, the result is unknown. 349 if (isUnknown() || Other.isUnknown()) 350 return VSETVLIInfo::getUnknown(); 351 352 // If we have an exact, match return this. 353 if (*this == Other) 354 return *this; 355 356 // Not an exact match, but maybe the AVL and VLMAX are the same. If so, 357 // return an SEW/LMUL ratio only value. 358 if (hasSameAVL(Other) && hasSameVLMAX(Other)) { 359 VSETVLIInfo MergeInfo = *this; 360 MergeInfo.SEWLMULRatioOnly = true; 361 return MergeInfo; 362 } 363 364 // Otherwise the result is unknown. 365 return VSETVLIInfo::getUnknown(); 366 } 367 368 // Calculate the VSETVLIInfo visible at the end of the block assuming this 369 // is the predecessor value, and Other is change for this block. 370 VSETVLIInfo merge(const VSETVLIInfo &Other) const { 371 assert(isValid() && "Can only merge with a valid VSETVLInfo"); 372 373 // Nothing changed from the predecessor, keep it. 374 if (!Other.isValid()) 375 return *this; 376 377 // If the change is compatible with the input, we won't create a VSETVLI 378 // and should keep the predecessor. 379 if (isCompatible(Other, /*Strict*/ true)) 380 return *this; 381 382 // Otherwise just use whatever is in this block. 383 return Other; 384 } 385 }; 386 387 struct BlockData { 388 // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers 389 // made by this block. Calculated in Phase 1. 390 VSETVLIInfo Change; 391 392 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this 393 // block. Calculated in Phase 2. 394 VSETVLIInfo Exit; 395 396 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor 397 // blocks. Calculated in Phase 2, and used by Phase 3. 398 VSETVLIInfo Pred; 399 400 // Keeps track of whether the block is already in the queue. 401 bool InQueue = false; 402 403 BlockData() {} 404 }; 405 406 class RISCVInsertVSETVLI : public MachineFunctionPass { 407 const TargetInstrInfo *TII; 408 MachineRegisterInfo *MRI; 409 410 std::vector<BlockData> BlockInfo; 411 std::queue<const MachineBasicBlock *> WorkList; 412 413 public: 414 static char ID; 415 416 RISCVInsertVSETVLI() : MachineFunctionPass(ID) { 417 initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry()); 418 } 419 bool runOnMachineFunction(MachineFunction &MF) override; 420 421 void getAnalysisUsage(AnalysisUsage &AU) const override { 422 AU.setPreservesCFG(); 423 MachineFunctionPass::getAnalysisUsage(AU); 424 } 425 426 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } 427 428 private: 429 bool needVSETVLI(const VSETVLIInfo &Require, const VSETVLIInfo &CurInfo); 430 bool needVSETVLIPHI(const VSETVLIInfo &Require, const MachineBasicBlock &MBB); 431 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 432 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 433 434 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB); 435 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); 436 void emitVSETVLIs(MachineBasicBlock &MBB); 437 }; 438 439 } // end anonymous namespace 440 441 char RISCVInsertVSETVLI::ID = 0; 442 443 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, 444 false, false) 445 446 static MachineInstr *elideCopies(MachineInstr *MI, 447 const MachineRegisterInfo *MRI) { 448 while (true) { 449 if (!MI->isFullCopy()) 450 return MI; 451 if (!Register::isVirtualRegister(MI->getOperand(1).getReg())) 452 return nullptr; 453 MI = MRI->getVRegDef(MI->getOperand(1).getReg()); 454 if (!MI) 455 return nullptr; 456 } 457 } 458 459 static bool isScalarMoveInstr(const MachineInstr &MI) { 460 switch (MI.getOpcode()) { 461 default: 462 return false; 463 case RISCV::PseudoVMV_S_X_M1: 464 case RISCV::PseudoVMV_S_X_M2: 465 case RISCV::PseudoVMV_S_X_M4: 466 case RISCV::PseudoVMV_S_X_M8: 467 case RISCV::PseudoVMV_S_X_MF2: 468 case RISCV::PseudoVMV_S_X_MF4: 469 case RISCV::PseudoVMV_S_X_MF8: 470 case RISCV::PseudoVFMV_S_F16_M1: 471 case RISCV::PseudoVFMV_S_F16_M2: 472 case RISCV::PseudoVFMV_S_F16_M4: 473 case RISCV::PseudoVFMV_S_F16_M8: 474 case RISCV::PseudoVFMV_S_F16_MF2: 475 case RISCV::PseudoVFMV_S_F16_MF4: 476 case RISCV::PseudoVFMV_S_F32_M1: 477 case RISCV::PseudoVFMV_S_F32_M2: 478 case RISCV::PseudoVFMV_S_F32_M4: 479 case RISCV::PseudoVFMV_S_F32_M8: 480 case RISCV::PseudoVFMV_S_F32_MF2: 481 case RISCV::PseudoVFMV_S_F64_M1: 482 case RISCV::PseudoVFMV_S_F64_M2: 483 case RISCV::PseudoVFMV_S_F64_M4: 484 case RISCV::PseudoVFMV_S_F64_M8: 485 return true; 486 } 487 } 488 489 static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, 490 const MachineRegisterInfo *MRI) { 491 VSETVLIInfo InstrInfo; 492 unsigned NumOperands = MI.getNumExplicitOperands(); 493 bool HasPolicy = RISCVII::hasVecPolicyOp(TSFlags); 494 495 // Default to tail agnostic unless the destination is tied to a source. 496 // Unless the source is undef. In that case the user would have some control 497 // over the tail values. Some pseudo instructions force a tail agnostic policy 498 // despite having a tied def. 499 bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags); 500 bool TailAgnostic = true; 501 // If the instruction has policy argument, use the argument. 502 if (HasPolicy) { 503 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); 504 TailAgnostic = Op.getImm() & 0x1; 505 } 506 507 unsigned UseOpIdx; 508 if (!(ForceTailAgnostic || (HasPolicy && TailAgnostic)) && 509 MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 510 TailAgnostic = false; 511 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic. 512 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 513 MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg()); 514 if (UseMI) { 515 UseMI = elideCopies(UseMI, MRI); 516 if (UseMI && UseMI->isImplicitDef()) 517 TailAgnostic = true; 518 } 519 } 520 521 // Remove the tail policy so we can find the SEW and VL. 522 if (HasPolicy) 523 --NumOperands; 524 525 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); 526 527 unsigned Log2SEW = MI.getOperand(NumOperands - 1).getImm(); 528 // A Log2SEW of 0 is an operation on mask registers only. 529 bool MaskRegOp = Log2SEW == 0; 530 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 531 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 532 533 // If there are no explicit defs, this is a store instruction which can 534 // ignore the tail and mask policies. 535 bool StoreOp = MI.getNumExplicitDefs() == 0; 536 bool ScalarMovOp = isScalarMoveInstr(MI); 537 538 if (RISCVII::hasVLOp(TSFlags)) { 539 const MachineOperand &VLOp = MI.getOperand(NumOperands - 2); 540 if (VLOp.isImm()) { 541 int64_t Imm = VLOp.getImm(); 542 // Conver the VLMax sentintel to X0 register. 543 if (Imm == RISCV::VLMaxSentinel) 544 InstrInfo.setAVLReg(RISCV::X0); 545 else 546 InstrInfo.setAVLImm(Imm); 547 } else { 548 InstrInfo.setAVLReg(VLOp.getReg()); 549 } 550 } else 551 InstrInfo.setAVLReg(RISCV::NoRegister); 552 InstrInfo.setVTYPE(VLMul, SEW, /*TailAgnostic*/ TailAgnostic, 553 /*MaskAgnostic*/ false, MaskRegOp, StoreOp, ScalarMovOp); 554 555 return InstrInfo; 556 } 557 558 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, 559 const VSETVLIInfo &Info, 560 const VSETVLIInfo &PrevInfo) { 561 DebugLoc DL = MI.getDebugLoc(); 562 563 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same 564 // VLMAX. 565 if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 566 Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { 567 BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLIX0)) 568 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 569 .addReg(RISCV::X0, RegState::Kill) 570 .addImm(Info.encodeVTYPE()) 571 .addReg(RISCV::VL, RegState::Implicit); 572 return; 573 } 574 575 if (Info.hasAVLImm()) { 576 BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETIVLI)) 577 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 578 .addImm(Info.getAVLImm()) 579 .addImm(Info.encodeVTYPE()); 580 return; 581 } 582 583 Register AVLReg = Info.getAVLReg(); 584 if (AVLReg == RISCV::NoRegister) { 585 // We can only use x0, x0 if there's no chance of the vtype change causing 586 // the previous vl to become invalid. 587 if (PrevInfo.isValid() && !PrevInfo.isUnknown() && 588 Info.hasSameVLMAX(PrevInfo)) { 589 BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLIX0)) 590 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 591 .addReg(RISCV::X0, RegState::Kill) 592 .addImm(Info.encodeVTYPE()) 593 .addReg(RISCV::VL, RegState::Implicit); 594 return; 595 } 596 // Otherwise use an AVL of 0 to avoid depending on previous vl. 597 BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETIVLI)) 598 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 599 .addImm(0) 600 .addImm(Info.encodeVTYPE()); 601 return; 602 } 603 604 if (AVLReg.isVirtual()) 605 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); 606 607 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the 608 // opcode if the AVLReg is X0 as they have different register classes for 609 // the AVL operand. 610 Register DestReg = RISCV::X0; 611 unsigned Opcode = RISCV::PseudoVSETVLI; 612 if (AVLReg == RISCV::X0) { 613 DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); 614 Opcode = RISCV::PseudoVSETVLIX0; 615 } 616 BuildMI(MBB, MI, DL, TII->get(Opcode)) 617 .addReg(DestReg, RegState::Define | RegState::Dead) 618 .addReg(AVLReg) 619 .addImm(Info.encodeVTYPE()); 620 } 621 622 // Return a VSETVLIInfo representing the changes made by this VSETVLI or 623 // VSETIVLI instruction. 624 static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { 625 VSETVLIInfo NewInfo; 626 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { 627 NewInfo.setAVLImm(MI.getOperand(1).getImm()); 628 } else { 629 assert(MI.getOpcode() == RISCV::PseudoVSETVLI || 630 MI.getOpcode() == RISCV::PseudoVSETVLIX0); 631 Register AVLReg = MI.getOperand(1).getReg(); 632 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && 633 "Can't handle X0, X0 vsetvli yet"); 634 NewInfo.setAVLReg(AVLReg); 635 } 636 NewInfo.setVTYPE(MI.getOperand(2).getImm()); 637 638 return NewInfo; 639 } 640 641 bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require, 642 const VSETVLIInfo &CurInfo) { 643 if (CurInfo.isCompatible(Require, /*Strict*/ false)) 644 return false; 645 646 // We didn't find a compatible value. If our AVL is a virtual register, 647 // it might be defined by a VSET(I)VLI. If it has the same VTYPE we need 648 // and the last VL/VTYPE we observed is the same, we don't need a 649 // VSETVLI here. 650 if (!CurInfo.isUnknown() && Require.hasAVLReg() && 651 Require.getAVLReg().isVirtual() && !CurInfo.hasSEWLMULRatioOnly() && 652 CurInfo.hasCompatibleVTYPE(Require, /*Strict*/ false)) { 653 if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) { 654 if (DefMI->getOpcode() == RISCV::PseudoVSETVLI || 655 DefMI->getOpcode() == RISCV::PseudoVSETVLIX0 || 656 DefMI->getOpcode() == RISCV::PseudoVSETIVLI) { 657 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 658 if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo)) 659 return false; 660 } 661 } 662 } 663 664 return true; 665 } 666 667 bool canSkipVSETVLIForLoadStore(const MachineInstr &MI, 668 const VSETVLIInfo &Require, 669 const VSETVLIInfo &CurInfo) { 670 unsigned EEW; 671 switch (MI.getOpcode()) { 672 default: 673 return false; 674 case RISCV::PseudoVLE8_V_M1: 675 case RISCV::PseudoVLE8_V_M1_MASK: 676 case RISCV::PseudoVLE8_V_M2: 677 case RISCV::PseudoVLE8_V_M2_MASK: 678 case RISCV::PseudoVLE8_V_M4: 679 case RISCV::PseudoVLE8_V_M4_MASK: 680 case RISCV::PseudoVLE8_V_M8: 681 case RISCV::PseudoVLE8_V_M8_MASK: 682 case RISCV::PseudoVLE8_V_MF2: 683 case RISCV::PseudoVLE8_V_MF2_MASK: 684 case RISCV::PseudoVLE8_V_MF4: 685 case RISCV::PseudoVLE8_V_MF4_MASK: 686 case RISCV::PseudoVLE8_V_MF8: 687 case RISCV::PseudoVLE8_V_MF8_MASK: 688 case RISCV::PseudoVLSE8_V_M1: 689 case RISCV::PseudoVLSE8_V_M1_MASK: 690 case RISCV::PseudoVLSE8_V_M2: 691 case RISCV::PseudoVLSE8_V_M2_MASK: 692 case RISCV::PseudoVLSE8_V_M4: 693 case RISCV::PseudoVLSE8_V_M4_MASK: 694 case RISCV::PseudoVLSE8_V_M8: 695 case RISCV::PseudoVLSE8_V_M8_MASK: 696 case RISCV::PseudoVLSE8_V_MF2: 697 case RISCV::PseudoVLSE8_V_MF2_MASK: 698 case RISCV::PseudoVLSE8_V_MF4: 699 case RISCV::PseudoVLSE8_V_MF4_MASK: 700 case RISCV::PseudoVLSE8_V_MF8: 701 case RISCV::PseudoVLSE8_V_MF8_MASK: 702 case RISCV::PseudoVSE8_V_M1: 703 case RISCV::PseudoVSE8_V_M1_MASK: 704 case RISCV::PseudoVSE8_V_M2: 705 case RISCV::PseudoVSE8_V_M2_MASK: 706 case RISCV::PseudoVSE8_V_M4: 707 case RISCV::PseudoVSE8_V_M4_MASK: 708 case RISCV::PseudoVSE8_V_M8: 709 case RISCV::PseudoVSE8_V_M8_MASK: 710 case RISCV::PseudoVSE8_V_MF2: 711 case RISCV::PseudoVSE8_V_MF2_MASK: 712 case RISCV::PseudoVSE8_V_MF4: 713 case RISCV::PseudoVSE8_V_MF4_MASK: 714 case RISCV::PseudoVSE8_V_MF8: 715 case RISCV::PseudoVSE8_V_MF8_MASK: 716 case RISCV::PseudoVSSE8_V_M1: 717 case RISCV::PseudoVSSE8_V_M1_MASK: 718 case RISCV::PseudoVSSE8_V_M2: 719 case RISCV::PseudoVSSE8_V_M2_MASK: 720 case RISCV::PseudoVSSE8_V_M4: 721 case RISCV::PseudoVSSE8_V_M4_MASK: 722 case RISCV::PseudoVSSE8_V_M8: 723 case RISCV::PseudoVSSE8_V_M8_MASK: 724 case RISCV::PseudoVSSE8_V_MF2: 725 case RISCV::PseudoVSSE8_V_MF2_MASK: 726 case RISCV::PseudoVSSE8_V_MF4: 727 case RISCV::PseudoVSSE8_V_MF4_MASK: 728 case RISCV::PseudoVSSE8_V_MF8: 729 case RISCV::PseudoVSSE8_V_MF8_MASK: 730 EEW = 8; 731 break; 732 case RISCV::PseudoVLE16_V_M1: 733 case RISCV::PseudoVLE16_V_M1_MASK: 734 case RISCV::PseudoVLE16_V_M2: 735 case RISCV::PseudoVLE16_V_M2_MASK: 736 case RISCV::PseudoVLE16_V_M4: 737 case RISCV::PseudoVLE16_V_M4_MASK: 738 case RISCV::PseudoVLE16_V_M8: 739 case RISCV::PseudoVLE16_V_M8_MASK: 740 case RISCV::PseudoVLE16_V_MF2: 741 case RISCV::PseudoVLE16_V_MF2_MASK: 742 case RISCV::PseudoVLE16_V_MF4: 743 case RISCV::PseudoVLE16_V_MF4_MASK: 744 case RISCV::PseudoVLSE16_V_M1: 745 case RISCV::PseudoVLSE16_V_M1_MASK: 746 case RISCV::PseudoVLSE16_V_M2: 747 case RISCV::PseudoVLSE16_V_M2_MASK: 748 case RISCV::PseudoVLSE16_V_M4: 749 case RISCV::PseudoVLSE16_V_M4_MASK: 750 case RISCV::PseudoVLSE16_V_M8: 751 case RISCV::PseudoVLSE16_V_M8_MASK: 752 case RISCV::PseudoVLSE16_V_MF2: 753 case RISCV::PseudoVLSE16_V_MF2_MASK: 754 case RISCV::PseudoVLSE16_V_MF4: 755 case RISCV::PseudoVLSE16_V_MF4_MASK: 756 case RISCV::PseudoVSE16_V_M1: 757 case RISCV::PseudoVSE16_V_M1_MASK: 758 case RISCV::PseudoVSE16_V_M2: 759 case RISCV::PseudoVSE16_V_M2_MASK: 760 case RISCV::PseudoVSE16_V_M4: 761 case RISCV::PseudoVSE16_V_M4_MASK: 762 case RISCV::PseudoVSE16_V_M8: 763 case RISCV::PseudoVSE16_V_M8_MASK: 764 case RISCV::PseudoVSE16_V_MF2: 765 case RISCV::PseudoVSE16_V_MF2_MASK: 766 case RISCV::PseudoVSE16_V_MF4: 767 case RISCV::PseudoVSE16_V_MF4_MASK: 768 case RISCV::PseudoVSSE16_V_M1: 769 case RISCV::PseudoVSSE16_V_M1_MASK: 770 case RISCV::PseudoVSSE16_V_M2: 771 case RISCV::PseudoVSSE16_V_M2_MASK: 772 case RISCV::PseudoVSSE16_V_M4: 773 case RISCV::PseudoVSSE16_V_M4_MASK: 774 case RISCV::PseudoVSSE16_V_M8: 775 case RISCV::PseudoVSSE16_V_M8_MASK: 776 case RISCV::PseudoVSSE16_V_MF2: 777 case RISCV::PseudoVSSE16_V_MF2_MASK: 778 case RISCV::PseudoVSSE16_V_MF4: 779 case RISCV::PseudoVSSE16_V_MF4_MASK: 780 EEW = 16; 781 break; 782 case RISCV::PseudoVLE32_V_M1: 783 case RISCV::PseudoVLE32_V_M1_MASK: 784 case RISCV::PseudoVLE32_V_M2: 785 case RISCV::PseudoVLE32_V_M2_MASK: 786 case RISCV::PseudoVLE32_V_M4: 787 case RISCV::PseudoVLE32_V_M4_MASK: 788 case RISCV::PseudoVLE32_V_M8: 789 case RISCV::PseudoVLE32_V_M8_MASK: 790 case RISCV::PseudoVLE32_V_MF2: 791 case RISCV::PseudoVLE32_V_MF2_MASK: 792 case RISCV::PseudoVLSE32_V_M1: 793 case RISCV::PseudoVLSE32_V_M1_MASK: 794 case RISCV::PseudoVLSE32_V_M2: 795 case RISCV::PseudoVLSE32_V_M2_MASK: 796 case RISCV::PseudoVLSE32_V_M4: 797 case RISCV::PseudoVLSE32_V_M4_MASK: 798 case RISCV::PseudoVLSE32_V_M8: 799 case RISCV::PseudoVLSE32_V_M8_MASK: 800 case RISCV::PseudoVLSE32_V_MF2: 801 case RISCV::PseudoVLSE32_V_MF2_MASK: 802 case RISCV::PseudoVSE32_V_M1: 803 case RISCV::PseudoVSE32_V_M1_MASK: 804 case RISCV::PseudoVSE32_V_M2: 805 case RISCV::PseudoVSE32_V_M2_MASK: 806 case RISCV::PseudoVSE32_V_M4: 807 case RISCV::PseudoVSE32_V_M4_MASK: 808 case RISCV::PseudoVSE32_V_M8: 809 case RISCV::PseudoVSE32_V_M8_MASK: 810 case RISCV::PseudoVSE32_V_MF2: 811 case RISCV::PseudoVSE32_V_MF2_MASK: 812 case RISCV::PseudoVSSE32_V_M1: 813 case RISCV::PseudoVSSE32_V_M1_MASK: 814 case RISCV::PseudoVSSE32_V_M2: 815 case RISCV::PseudoVSSE32_V_M2_MASK: 816 case RISCV::PseudoVSSE32_V_M4: 817 case RISCV::PseudoVSSE32_V_M4_MASK: 818 case RISCV::PseudoVSSE32_V_M8: 819 case RISCV::PseudoVSSE32_V_M8_MASK: 820 case RISCV::PseudoVSSE32_V_MF2: 821 case RISCV::PseudoVSSE32_V_MF2_MASK: 822 EEW = 32; 823 break; 824 case RISCV::PseudoVLE64_V_M1: 825 case RISCV::PseudoVLE64_V_M1_MASK: 826 case RISCV::PseudoVLE64_V_M2: 827 case RISCV::PseudoVLE64_V_M2_MASK: 828 case RISCV::PseudoVLE64_V_M4: 829 case RISCV::PseudoVLE64_V_M4_MASK: 830 case RISCV::PseudoVLE64_V_M8: 831 case RISCV::PseudoVLE64_V_M8_MASK: 832 case RISCV::PseudoVLSE64_V_M1: 833 case RISCV::PseudoVLSE64_V_M1_MASK: 834 case RISCV::PseudoVLSE64_V_M2: 835 case RISCV::PseudoVLSE64_V_M2_MASK: 836 case RISCV::PseudoVLSE64_V_M4: 837 case RISCV::PseudoVLSE64_V_M4_MASK: 838 case RISCV::PseudoVLSE64_V_M8: 839 case RISCV::PseudoVLSE64_V_M8_MASK: 840 case RISCV::PseudoVSE64_V_M1: 841 case RISCV::PseudoVSE64_V_M1_MASK: 842 case RISCV::PseudoVSE64_V_M2: 843 case RISCV::PseudoVSE64_V_M2_MASK: 844 case RISCV::PseudoVSE64_V_M4: 845 case RISCV::PseudoVSE64_V_M4_MASK: 846 case RISCV::PseudoVSE64_V_M8: 847 case RISCV::PseudoVSE64_V_M8_MASK: 848 case RISCV::PseudoVSSE64_V_M1: 849 case RISCV::PseudoVSSE64_V_M1_MASK: 850 case RISCV::PseudoVSSE64_V_M2: 851 case RISCV::PseudoVSSE64_V_M2_MASK: 852 case RISCV::PseudoVSSE64_V_M4: 853 case RISCV::PseudoVSSE64_V_M4_MASK: 854 case RISCV::PseudoVSSE64_V_M8: 855 case RISCV::PseudoVSSE64_V_M8_MASK: 856 EEW = 64; 857 break; 858 } 859 860 return CurInfo.isCompatibleWithLoadStoreEEW(EEW, Require); 861 } 862 863 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) { 864 bool HadVectorOp = false; 865 866 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 867 for (const MachineInstr &MI : MBB) { 868 // If this is an explicit VSETVLI or VSETIVLI, update our state. 869 if (MI.getOpcode() == RISCV::PseudoVSETVLI || 870 MI.getOpcode() == RISCV::PseudoVSETVLIX0 || 871 MI.getOpcode() == RISCV::PseudoVSETIVLI) { 872 HadVectorOp = true; 873 BBInfo.Change = getInfoForVSETVLI(MI); 874 continue; 875 } 876 877 uint64_t TSFlags = MI.getDesc().TSFlags; 878 if (RISCVII::hasSEWOp(TSFlags)) { 879 HadVectorOp = true; 880 881 VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); 882 883 if (!BBInfo.Change.isValid()) { 884 BBInfo.Change = NewInfo; 885 } else { 886 // If this instruction isn't compatible with the previous VL/VTYPE 887 // we need to insert a VSETVLI. 888 // If this is a unit-stride or strided load/store, we may be able to use 889 // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. 890 // NOTE: We only do this if the vtype we're comparing against was 891 // created in this block. We need the first and third phase to treat 892 // the store the same way. 893 if (!canSkipVSETVLIForLoadStore(MI, NewInfo, BBInfo.Change) && 894 needVSETVLI(NewInfo, BBInfo.Change)) 895 BBInfo.Change = NewInfo; 896 } 897 } 898 899 // If this is something that updates VL/VTYPE that we don't know about, set 900 // the state to unknown. 901 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 902 MI.modifiesRegister(RISCV::VTYPE)) { 903 BBInfo.Change = VSETVLIInfo::getUnknown(); 904 } 905 } 906 907 // Initial exit state is whatever change we found in the block. 908 BBInfo.Exit = BBInfo.Change; 909 910 return HadVectorOp; 911 } 912 913 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { 914 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 915 916 BBInfo.InQueue = false; 917 918 VSETVLIInfo InInfo; 919 if (MBB.pred_empty()) { 920 // There are no predecessors, so use the default starting status. 921 InInfo.setUnknown(); 922 } else { 923 for (MachineBasicBlock *P : MBB.predecessors()) 924 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); 925 } 926 927 // If we don't have any valid predecessor value, wait until we do. 928 if (!InInfo.isValid()) 929 return; 930 931 BBInfo.Pred = InInfo; 932 933 VSETVLIInfo TmpStatus = BBInfo.Pred.merge(BBInfo.Change); 934 935 // If the new exit value matches the old exit value, we don't need to revisit 936 // any blocks. 937 if (BBInfo.Exit == TmpStatus) 938 return; 939 940 BBInfo.Exit = TmpStatus; 941 942 // Add the successors to the work list so we can propagate the changed exit 943 // status. 944 for (MachineBasicBlock *S : MBB.successors()) 945 if (!BlockInfo[S->getNumber()].InQueue) 946 WorkList.push(S); 947 } 948 949 // If we weren't able to prove a vsetvli was directly unneeded, it might still 950 // be/ unneeded if the AVL is a phi node where all incoming values are VL 951 // outputs from the last VSETVLI in their respective basic blocks. 952 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, 953 const MachineBasicBlock &MBB) { 954 if (DisableInsertVSETVLPHIOpt) 955 return true; 956 957 if (!Require.hasAVLReg()) 958 return true; 959 960 Register AVLReg = Require.getAVLReg(); 961 if (!AVLReg.isVirtual()) 962 return true; 963 964 // We need the AVL to be produce by a PHI node in this basic block. 965 MachineInstr *PHI = MRI->getVRegDef(AVLReg); 966 if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) 967 return true; 968 969 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; 970 PHIOp += 2) { 971 Register InReg = PHI->getOperand(PHIOp).getReg(); 972 MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB(); 973 const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; 974 // If the exit from the predecessor has the VTYPE we are looking for 975 // we might be able to avoid a VSETVLI. 976 if (PBBInfo.Exit.isUnknown() || 977 !PBBInfo.Exit.hasCompatibleVTYPE(Require, /*Strict*/ false)) 978 return true; 979 980 // We need the PHI input to the be the output of a VSET(I)VLI. 981 MachineInstr *DefMI = MRI->getVRegDef(InReg); 982 if (!DefMI || (DefMI->getOpcode() != RISCV::PseudoVSETVLI && 983 DefMI->getOpcode() != RISCV::PseudoVSETVLIX0 && 984 DefMI->getOpcode() != RISCV::PseudoVSETIVLI)) 985 return true; 986 987 // We found a VSET(I)VLI make sure it matches the output of the 988 // predecessor block. 989 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 990 if (!DefInfo.hasSameAVL(PBBInfo.Exit) || 991 !DefInfo.hasSameVTYPE(PBBInfo.Exit)) 992 return true; 993 } 994 995 // If all the incoming values to the PHI checked out, we don't need 996 // to insert a VSETVLI. 997 return false; 998 } 999 1000 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { 1001 VSETVLIInfo CurInfo; 1002 // BBLocalInfo tracks the VL/VTYPE state the same way BBInfo.Change was 1003 // calculated in computeIncomingVLVTYPE. We need this to apply 1004 // canSkipVSETVLIForLoadStore the same way computeIncomingVLVTYPE did. We 1005 // can't include predecessor information in that decision to avoid disagreeing 1006 // with the global analysis. 1007 VSETVLIInfo BBLocalInfo; 1008 // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI. 1009 MachineInstr *PrevVSETVLIMI = nullptr; 1010 1011 for (MachineInstr &MI : MBB) { 1012 // If this is an explicit VSETVLI or VSETIVLI, update our state. 1013 if (MI.getOpcode() == RISCV::PseudoVSETVLI || 1014 MI.getOpcode() == RISCV::PseudoVSETVLIX0 || 1015 MI.getOpcode() == RISCV::PseudoVSETIVLI) { 1016 // Conservatively, mark the VL and VTYPE as live. 1017 assert(MI.getOperand(3).getReg() == RISCV::VL && 1018 MI.getOperand(4).getReg() == RISCV::VTYPE && 1019 "Unexpected operands where VL and VTYPE should be"); 1020 MI.getOperand(3).setIsDead(false); 1021 MI.getOperand(4).setIsDead(false); 1022 CurInfo = getInfoForVSETVLI(MI); 1023 BBLocalInfo = getInfoForVSETVLI(MI); 1024 PrevVSETVLIMI = &MI; 1025 continue; 1026 } 1027 1028 uint64_t TSFlags = MI.getDesc().TSFlags; 1029 if (RISCVII::hasSEWOp(TSFlags)) { 1030 VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI); 1031 if (RISCVII::hasVLOp(TSFlags)) { 1032 unsigned Offset = 2; 1033 if (RISCVII::hasVecPolicyOp(TSFlags)) 1034 Offset = 3; 1035 MachineOperand &VLOp = 1036 MI.getOperand(MI.getNumExplicitOperands() - Offset); 1037 if (VLOp.isReg()) { 1038 // Erase the AVL operand from the instruction. 1039 VLOp.setReg(RISCV::NoRegister); 1040 VLOp.setIsKill(false); 1041 } 1042 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, 1043 /*isImp*/ true)); 1044 } 1045 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, 1046 /*isImp*/ true)); 1047 1048 if (!CurInfo.isValid()) { 1049 // We haven't found any vector instructions or VL/VTYPE changes yet, 1050 // use the predecessor information. 1051 assert(BlockInfo[MBB.getNumber()].Pred.isValid() && 1052 "Expected a valid predecessor state."); 1053 // Don't use predecessor information if there was an earlier instruction 1054 // in this block that allowed a vsetvli to be skipped for load/store. 1055 if (!(BBLocalInfo.isValid() && 1056 canSkipVSETVLIForLoadStore(MI, NewInfo, BBLocalInfo)) && 1057 needVSETVLI(NewInfo, BlockInfo[MBB.getNumber()].Pred) && 1058 needVSETVLIPHI(NewInfo, MBB)) { 1059 insertVSETVLI(MBB, MI, NewInfo, BlockInfo[MBB.getNumber()].Pred); 1060 CurInfo = NewInfo; 1061 BBLocalInfo = NewInfo; 1062 } 1063 1064 // We must update BBLocalInfo for every vector instruction. 1065 if (!BBLocalInfo.isValid()) 1066 BBLocalInfo = NewInfo; 1067 } else { 1068 assert(BBLocalInfo.isValid()); 1069 // If this instruction isn't compatible with the previous VL/VTYPE 1070 // we need to insert a VSETVLI. 1071 // If this is a unit-stride or strided load/store, we may be able to use 1072 // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype. 1073 // NOTE: We can't use predecessor information for the store. We must 1074 // treat it the same as the first phase so that we produce the correct 1075 // vl/vtype for succesor blocks. 1076 if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) && 1077 needVSETVLI(NewInfo, CurInfo)) { 1078 // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it 1079 // with current VL/VTYPE. 1080 bool NeedInsertVSETVLI = true; 1081 if (PrevVSETVLIMI) { 1082 bool HasSameAVL = 1083 CurInfo.hasSameAVL(NewInfo) || 1084 (NewInfo.hasAVLReg() && NewInfo.getAVLReg().isVirtual() && 1085 NewInfo.getAVLReg() == PrevVSETVLIMI->getOperand(0).getReg()); 1086 // If these two VSETVLI have the same AVL and the same VLMAX, 1087 // we could merge these two VSETVLI. 1088 if (HasSameAVL && 1089 CurInfo.getSEWLMULRatio() == NewInfo.getSEWLMULRatio()) { 1090 PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); 1091 NeedInsertVSETVLI = false; 1092 } 1093 if (isScalarMoveInstr(MI) && 1094 ((CurInfo.hasNonZeroAVL() && NewInfo.hasNonZeroAVL()) || 1095 (CurInfo.hasZeroAVL() && NewInfo.hasZeroAVL())) && 1096 NewInfo.hasSameVLMAX(CurInfo)) { 1097 PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE()); 1098 NeedInsertVSETVLI = false; 1099 } 1100 } 1101 if (NeedInsertVSETVLI) 1102 insertVSETVLI(MBB, MI, NewInfo, CurInfo); 1103 CurInfo = NewInfo; 1104 BBLocalInfo = NewInfo; 1105 } 1106 } 1107 PrevVSETVLIMI = nullptr; 1108 } 1109 1110 // If this is something updates VL/VTYPE that we don't know about, set 1111 // the state to unknown. 1112 if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) || 1113 MI.modifiesRegister(RISCV::VTYPE)) { 1114 CurInfo = VSETVLIInfo::getUnknown(); 1115 BBLocalInfo = VSETVLIInfo::getUnknown(); 1116 PrevVSETVLIMI = nullptr; 1117 } 1118 } 1119 } 1120 1121 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { 1122 // Skip if the vector extension is not enabled. 1123 const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>(); 1124 if (!ST.hasVInstructions()) 1125 return false; 1126 1127 TII = ST.getInstrInfo(); 1128 MRI = &MF.getRegInfo(); 1129 1130 assert(BlockInfo.empty() && "Expect empty block infos"); 1131 BlockInfo.resize(MF.getNumBlockIDs()); 1132 1133 bool HaveVectorOp = false; 1134 1135 // Phase 1 - determine how VL/VTYPE are affected by the each block. 1136 for (const MachineBasicBlock &MBB : MF) 1137 HaveVectorOp |= computeVLVTYPEChanges(MBB); 1138 1139 // If we didn't find any instructions that need VSETVLI, we're done. 1140 if (HaveVectorOp) { 1141 // Phase 2 - determine the exit VL/VTYPE from each block. We add all 1142 // blocks to the list here, but will also add any that need to be revisited 1143 // during Phase 2 processing. 1144 for (const MachineBasicBlock &MBB : MF) { 1145 WorkList.push(&MBB); 1146 BlockInfo[MBB.getNumber()].InQueue = true; 1147 } 1148 while (!WorkList.empty()) { 1149 const MachineBasicBlock &MBB = *WorkList.front(); 1150 WorkList.pop(); 1151 computeIncomingVLVTYPE(MBB); 1152 } 1153 1154 // Phase 3 - add any vsetvli instructions needed in the block. Use the 1155 // Phase 2 information to avoid adding vsetvlis before the first vector 1156 // instruction in the block if the VL/VTYPE is satisfied by its 1157 // predecessors. 1158 for (MachineBasicBlock &MBB : MF) 1159 emitVSETVLIs(MBB); 1160 } 1161 1162 BlockInfo.clear(); 1163 1164 return HaveVectorOp; 1165 } 1166 1167 /// Returns an instance of the Insert VSETVLI pass. 1168 FunctionPass *llvm::createRISCVInsertVSETVLIPass() { 1169 return new RISCVInsertVSETVLI(); 1170 } 1171