1 //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a function pass that inserts VSETVLI instructions where 10 // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL 11 // instructions. 12 // 13 // This pass consists of 3 phases: 14 // 15 // Phase 1 collects how each basic block affects VL/VTYPE. 16 // 17 // Phase 2 uses the information from phase 1 to do a data flow analysis to 18 // propagate the VL/VTYPE changes through the function. This gives us the 19 // VL/VTYPE at the start of each basic block. 20 // 21 // Phase 3 inserts VSETVLI instructions in each basic block. Information from 22 // phase 2 is used to prevent inserting a VSETVLI before the first vector 23 // instruction in the block if possible. 24 // 25 //===----------------------------------------------------------------------===// 26 27 #include "RISCV.h" 28 #include "RISCVSubtarget.h" 29 #include "llvm/ADT/Statistic.h" 30 #include "llvm/CodeGen/LiveDebugVariables.h" 31 #include "llvm/CodeGen/LiveIntervals.h" 32 #include "llvm/CodeGen/LiveStacks.h" 33 #include "llvm/CodeGen/MachineFunctionPass.h" 34 #include <queue> 35 using namespace llvm; 36 37 #define DEBUG_TYPE "riscv-insert-vsetvli" 38 #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass" 39 40 STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted"); 41 STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced"); 42 43 static cl::opt<bool> EnsureWholeVectorRegisterMoveValidVTYPE( 44 DEBUG_TYPE "-whole-vector-register-move-valid-vtype", cl::Hidden, 45 cl::desc("Insert vsetvlis before vmvNr.vs to ensure vtype is valid and " 46 "vill is cleared"), 47 cl::init(true)); 48 49 namespace { 50 51 /// Given a virtual register \p Reg, return the corresponding VNInfo for it. 52 /// This will return nullptr if the virtual register is an implicit_def or 53 /// if LiveIntervals is not available. 54 static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI, 55 const LiveIntervals *LIS) { 56 assert(Reg.isVirtual()); 57 if (!LIS) 58 return nullptr; 59 auto &LI = LIS->getInterval(Reg); 60 SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI); 61 return LI.getVNInfoBefore(SI); 62 } 63 64 static unsigned getVLOpNum(const MachineInstr &MI) { 65 return RISCVII::getVLOpNum(MI.getDesc()); 66 } 67 68 static unsigned getSEWOpNum(const MachineInstr &MI) { 69 return RISCVII::getSEWOpNum(MI.getDesc()); 70 } 71 72 static bool isVectorConfigInstr(const MachineInstr &MI) { 73 return MI.getOpcode() == RISCV::PseudoVSETVLI || 74 MI.getOpcode() == RISCV::PseudoVSETVLIX0 || 75 MI.getOpcode() == RISCV::PseudoVSETIVLI; 76 } 77 78 /// Return true if this is 'vsetvli x0, x0, vtype' which preserves 79 /// VL and only sets VTYPE. 80 static bool isVLPreservingConfig(const MachineInstr &MI) { 81 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) 82 return false; 83 assert(RISCV::X0 == MI.getOperand(1).getReg()); 84 return RISCV::X0 == MI.getOperand(0).getReg(); 85 } 86 87 static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) { 88 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 89 default: 90 return false; 91 case RISCV::VFMV_S_F: 92 case RISCV::VFMV_V_F: 93 return true; 94 } 95 } 96 97 static bool isScalarExtractInstr(const MachineInstr &MI) { 98 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 99 default: 100 return false; 101 case RISCV::VMV_X_S: 102 case RISCV::VFMV_F_S: 103 return true; 104 } 105 } 106 107 static bool isScalarInsertInstr(const MachineInstr &MI) { 108 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 109 default: 110 return false; 111 case RISCV::VMV_S_X: 112 case RISCV::VFMV_S_F: 113 return true; 114 } 115 } 116 117 static bool isScalarSplatInstr(const MachineInstr &MI) { 118 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 119 default: 120 return false; 121 case RISCV::VMV_V_I: 122 case RISCV::VMV_V_X: 123 case RISCV::VFMV_V_F: 124 return true; 125 } 126 } 127 128 static bool isVSlideInstr(const MachineInstr &MI) { 129 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 130 default: 131 return false; 132 case RISCV::VSLIDEDOWN_VX: 133 case RISCV::VSLIDEDOWN_VI: 134 case RISCV::VSLIDEUP_VX: 135 case RISCV::VSLIDEUP_VI: 136 return true; 137 } 138 } 139 140 /// Get the EEW for a load or store instruction. Return std::nullopt if MI is 141 /// not a load or store which ignores SEW. 142 static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { 143 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 144 default: 145 return std::nullopt; 146 case RISCV::VLE8_V: 147 case RISCV::VLSE8_V: 148 case RISCV::VSE8_V: 149 case RISCV::VSSE8_V: 150 return 8; 151 case RISCV::VLE16_V: 152 case RISCV::VLSE16_V: 153 case RISCV::VSE16_V: 154 case RISCV::VSSE16_V: 155 return 16; 156 case RISCV::VLE32_V: 157 case RISCV::VLSE32_V: 158 case RISCV::VSE32_V: 159 case RISCV::VSSE32_V: 160 return 32; 161 case RISCV::VLE64_V: 162 case RISCV::VLSE64_V: 163 case RISCV::VSE64_V: 164 case RISCV::VSSE64_V: 165 return 64; 166 } 167 } 168 169 static bool isNonZeroLoadImmediate(const MachineInstr &MI) { 170 return MI.getOpcode() == RISCV::ADDI && 171 MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && 172 MI.getOperand(1).getReg() == RISCV::X0 && 173 MI.getOperand(2).getImm() != 0; 174 } 175 176 /// Return true if this is an operation on mask registers. Note that 177 /// this includes both arithmetic/logical ops and load/store (vlm/vsm). 178 static bool isMaskRegOp(const MachineInstr &MI) { 179 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 180 return false; 181 const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 182 // A Log2SEW of 0 is an operation on mask registers only. 183 return Log2SEW == 0; 184 } 185 186 /// Return true if the inactive elements in the result are entirely undefined. 187 /// Note that this is different from "agnostic" as defined by the vector 188 /// specification. Agnostic requires each lane to either be undisturbed, or 189 /// take the value -1; no other value is allowed. 190 static bool hasUndefinedPassthru(const MachineInstr &MI) { 191 192 unsigned UseOpIdx; 193 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) 194 // If there is no passthrough operand, then the pass through 195 // lanes are undefined. 196 return true; 197 198 // All undefined passthrus should be $noreg: see 199 // RISCVDAGToDAGISel::doPeepholeNoRegPassThru 200 const MachineOperand &UseMO = MI.getOperand(UseOpIdx); 201 return UseMO.getReg() == RISCV::NoRegister || UseMO.isUndef(); 202 } 203 204 /// Return true if \p MI is a copy that will be lowered to one or more vmvNr.vs. 205 static bool isVectorCopy(const TargetRegisterInfo *TRI, 206 const MachineInstr &MI) { 207 return MI.isCopy() && MI.getOperand(0).getReg().isPhysical() && 208 RISCVRegisterInfo::isRVVRegClass( 209 TRI->getMinimalPhysRegClass(MI.getOperand(0).getReg())); 210 } 211 212 /// Which subfields of VL or VTYPE have values we need to preserve? 213 struct DemandedFields { 214 // Some unknown property of VL is used. If demanded, must preserve entire 215 // value. 216 bool VLAny = false; 217 // Only zero vs non-zero is used. If demanded, can change non-zero values. 218 bool VLZeroness = false; 219 // What properties of SEW we need to preserve. 220 enum : uint8_t { 221 SEWEqual = 3, // The exact value of SEW needs to be preserved. 222 SEWGreaterThanOrEqualAndLessThan64 = 223 2, // SEW can be changed as long as it's greater 224 // than or equal to the original value, but must be less 225 // than 64. 226 SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater 227 // than or equal to the original value. 228 SEWNone = 0 // We don't need to preserve SEW at all. 229 } SEW = SEWNone; 230 enum : uint8_t { 231 LMULEqual = 2, // The exact value of LMUL needs to be preserved. 232 LMULLessThanOrEqualToM1 = 1, // We can use any LMUL <= M1. 233 LMULNone = 0 // We don't need to preserve LMUL at all. 234 } LMUL = LMULNone; 235 bool SEWLMULRatio = false; 236 bool TailPolicy = false; 237 bool MaskPolicy = false; 238 // If this is true, we demand that VTYPE is set to some legal state, i.e. that 239 // vill is unset. 240 bool VILL = false; 241 242 // Return true if any part of VTYPE was used 243 bool usedVTYPE() const { 244 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy || VILL; 245 } 246 247 // Return true if any property of VL was used 248 bool usedVL() { 249 return VLAny || VLZeroness; 250 } 251 252 // Mark all VTYPE subfields and properties as demanded 253 void demandVTYPE() { 254 SEW = SEWEqual; 255 LMUL = LMULEqual; 256 SEWLMULRatio = true; 257 TailPolicy = true; 258 MaskPolicy = true; 259 VILL = true; 260 } 261 262 // Mark all VL properties as demanded 263 void demandVL() { 264 VLAny = true; 265 VLZeroness = true; 266 } 267 268 static DemandedFields all() { 269 DemandedFields DF; 270 DF.demandVTYPE(); 271 DF.demandVL(); 272 return DF; 273 } 274 275 // Make this the result of demanding both the fields in this and B. 276 void doUnion(const DemandedFields &B) { 277 VLAny |= B.VLAny; 278 VLZeroness |= B.VLZeroness; 279 SEW = std::max(SEW, B.SEW); 280 LMUL = std::max(LMUL, B.LMUL); 281 SEWLMULRatio |= B.SEWLMULRatio; 282 TailPolicy |= B.TailPolicy; 283 MaskPolicy |= B.MaskPolicy; 284 VILL |= B.VILL; 285 } 286 287 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 288 /// Support for debugging, callable in GDB: V->dump() 289 LLVM_DUMP_METHOD void dump() const { 290 print(dbgs()); 291 dbgs() << "\n"; 292 } 293 294 /// Implement operator<<. 295 void print(raw_ostream &OS) const { 296 OS << "{"; 297 OS << "VLAny=" << VLAny << ", "; 298 OS << "VLZeroness=" << VLZeroness << ", "; 299 OS << "SEW="; 300 switch (SEW) { 301 case SEWEqual: 302 OS << "SEWEqual"; 303 break; 304 case SEWGreaterThanOrEqual: 305 OS << "SEWGreaterThanOrEqual"; 306 break; 307 case SEWGreaterThanOrEqualAndLessThan64: 308 OS << "SEWGreaterThanOrEqualAndLessThan64"; 309 break; 310 case SEWNone: 311 OS << "SEWNone"; 312 break; 313 }; 314 OS << ", "; 315 OS << "LMUL="; 316 switch (LMUL) { 317 case LMULEqual: 318 OS << "LMULEqual"; 319 break; 320 case LMULLessThanOrEqualToM1: 321 OS << "LMULLessThanOrEqualToM1"; 322 break; 323 case LMULNone: 324 OS << "LMULNone"; 325 break; 326 }; 327 OS << ", "; 328 OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; 329 OS << "TailPolicy=" << TailPolicy << ", "; 330 OS << "MaskPolicy=" << MaskPolicy << ", "; 331 OS << "VILL=" << VILL; 332 OS << "}"; 333 } 334 #endif 335 }; 336 337 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 338 LLVM_ATTRIBUTE_USED 339 inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) { 340 DF.print(OS); 341 return OS; 342 } 343 #endif 344 345 static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) { 346 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL); 347 return Fractional || LMul == 1; 348 } 349 350 /// Return true if moving from CurVType to NewVType is 351 /// indistinguishable from the perspective of an instruction (or set 352 /// of instructions) which use only the Used subfields and properties. 353 static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, 354 const DemandedFields &Used) { 355 switch (Used.SEW) { 356 case DemandedFields::SEWNone: 357 break; 358 case DemandedFields::SEWEqual: 359 if (RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType)) 360 return false; 361 break; 362 case DemandedFields::SEWGreaterThanOrEqual: 363 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType)) 364 return false; 365 break; 366 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: 367 if (RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType) || 368 RISCVVType::getSEW(NewVType) >= 64) 369 return false; 370 break; 371 } 372 373 switch (Used.LMUL) { 374 case DemandedFields::LMULNone: 375 break; 376 case DemandedFields::LMULEqual: 377 if (RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType)) 378 return false; 379 break; 380 case DemandedFields::LMULLessThanOrEqualToM1: 381 if (!isLMUL1OrSmaller(RISCVVType::getVLMUL(NewVType))) 382 return false; 383 break; 384 } 385 386 if (Used.SEWLMULRatio) { 387 auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType), 388 RISCVVType::getVLMUL(CurVType)); 389 auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType), 390 RISCVVType::getVLMUL(NewVType)); 391 if (Ratio1 != Ratio2) 392 return false; 393 } 394 395 if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) != 396 RISCVVType::isTailAgnostic(NewVType)) 397 return false; 398 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) != 399 RISCVVType::isMaskAgnostic(NewVType)) 400 return false; 401 return true; 402 } 403 404 /// Return the fields and properties demanded by the provided instruction. 405 DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { 406 // This function works in coalesceVSETVLI too. We can still use the value of a 407 // SEW, VL, or Policy operand even though it might not be the exact value in 408 // the VL or VTYPE, since we only care about what the instruction originally 409 // demanded. 410 411 // Most instructions don't use any of these subfeilds. 412 DemandedFields Res; 413 // Start conservative if registers are used 414 if (MI.isCall() || MI.isInlineAsm() || 415 MI.readsRegister(RISCV::VL, /*TRI=*/nullptr)) 416 Res.demandVL(); 417 if (MI.isCall() || MI.isInlineAsm() || 418 MI.readsRegister(RISCV::VTYPE, /*TRI=*/nullptr)) 419 Res.demandVTYPE(); 420 // Start conservative on the unlowered form too 421 uint64_t TSFlags = MI.getDesc().TSFlags; 422 if (RISCVII::hasSEWOp(TSFlags)) { 423 Res.demandVTYPE(); 424 if (RISCVII::hasVLOp(TSFlags)) 425 if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 426 !VLOp.isReg() || !VLOp.isUndef()) 427 Res.demandVL(); 428 429 // Behavior is independent of mask policy. 430 if (!RISCVII::usesMaskPolicy(TSFlags)) 431 Res.MaskPolicy = false; 432 } 433 434 // Loads and stores with implicit EEW do not demand SEW or LMUL directly. 435 // They instead demand the ratio of the two which is used in computing 436 // EMUL, but which allows us the flexibility to change SEW and LMUL 437 // provided we don't change the ratio. 438 // Note: We assume that the instructions initial SEW is the EEW encoded 439 // in the opcode. This is asserted when constructing the VSETVLIInfo. 440 if (getEEWForLoadStore(MI)) { 441 Res.SEW = DemandedFields::SEWNone; 442 Res.LMUL = DemandedFields::LMULNone; 443 } 444 445 // Store instructions don't use the policy fields. 446 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { 447 Res.TailPolicy = false; 448 Res.MaskPolicy = false; 449 } 450 451 // If this is a mask reg operation, it only cares about VLMAX. 452 // TODO: Possible extensions to this logic 453 // * Probably ok if available VLMax is larger than demanded 454 // * The policy bits can probably be ignored.. 455 if (isMaskRegOp(MI)) { 456 Res.SEW = DemandedFields::SEWNone; 457 Res.LMUL = DemandedFields::LMULNone; 458 } 459 460 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. 461 if (isScalarInsertInstr(MI)) { 462 Res.LMUL = DemandedFields::LMULNone; 463 Res.SEWLMULRatio = false; 464 Res.VLAny = false; 465 // For vmv.s.x and vfmv.s.f, if the passthru is *undefined*, we don't 466 // need to preserve any other bits and are thus compatible with any larger, 467 // etype and can disregard policy bits. Warning: It's tempting to try doing 468 // this for any tail agnostic operation, but we can't as TA requires 469 // tail lanes to either be the original value or -1. We are writing 470 // unknown bits to the lanes here. 471 if (hasUndefinedPassthru(MI)) { 472 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) 473 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; 474 else 475 Res.SEW = DemandedFields::SEWGreaterThanOrEqual; 476 Res.TailPolicy = false; 477 } 478 } 479 480 // vmv.x.s, and vfmv.f.s are unconditional and ignore everything except SEW. 481 if (isScalarExtractInstr(MI)) { 482 assert(!RISCVII::hasVLOp(TSFlags)); 483 Res.LMUL = DemandedFields::LMULNone; 484 Res.SEWLMULRatio = false; 485 Res.TailPolicy = false; 486 Res.MaskPolicy = false; 487 } 488 489 if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) { 490 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 491 // A slidedown/slideup with an *undefined* passthru can freely clobber 492 // elements not copied from the source vector (e.g. masked off, tail, or 493 // slideup's prefix). Notes: 494 // * We can't modify SEW here since the slide amount is in units of SEW. 495 // * VL=1 is special only because we have existing support for zero vs 496 // non-zero VL. We could generalize this if we had a VL > C predicate. 497 // * The LMUL1 restriction is for machines whose latency may depend on VL. 498 // * As above, this is only legal for tail "undefined" not "agnostic". 499 if (isVSlideInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 && 500 hasUndefinedPassthru(MI)) { 501 Res.VLAny = false; 502 Res.VLZeroness = true; 503 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1; 504 Res.TailPolicy = false; 505 } 506 507 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the 508 // same semantically as vmv.s.x. This is particularly useful since we don't 509 // have an immediate form of vmv.s.x, and thus frequently use vmv.v.i in 510 // it's place. Since a splat is non-constant time in LMUL, we do need to be 511 // careful to not increase the number of active vector registers (unlike for 512 // vmv.s.x.) 513 if (isScalarSplatInstr(MI) && VLOp.isImm() && VLOp.getImm() == 1 && 514 hasUndefinedPassthru(MI)) { 515 Res.LMUL = DemandedFields::LMULLessThanOrEqualToM1; 516 Res.SEWLMULRatio = false; 517 Res.VLAny = false; 518 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) 519 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; 520 else 521 Res.SEW = DemandedFields::SEWGreaterThanOrEqual; 522 Res.TailPolicy = false; 523 } 524 } 525 526 // In §32.16.6, whole vector register moves have a dependency on SEW. At the 527 // MIR level though we don't encode the element type, and it gives the same 528 // result whatever the SEW may be. 529 // 530 // However it does need valid SEW, i.e. vill must be cleared. The entry to a 531 // function, calls and inline assembly may all set it, so make sure we clear 532 // it for whole register copies. Do this by leaving VILL demanded. 533 if (isVectorCopy(ST->getRegisterInfo(), MI)) { 534 Res.LMUL = DemandedFields::LMULNone; 535 Res.SEW = DemandedFields::SEWNone; 536 Res.SEWLMULRatio = false; 537 Res.TailPolicy = false; 538 Res.MaskPolicy = false; 539 } 540 541 return Res; 542 } 543 544 /// Defines the abstract state with which the forward dataflow models the 545 /// values of the VL and VTYPE registers after insertion. 546 class VSETVLIInfo { 547 struct AVLDef { 548 // Every AVLDef should have a VNInfo, unless we're running without 549 // LiveIntervals in which case this will be nullptr. 550 const VNInfo *ValNo; 551 Register DefReg; 552 }; 553 union { 554 AVLDef AVLRegDef; 555 unsigned AVLImm; 556 }; 557 558 enum : uint8_t { 559 Uninitialized, 560 AVLIsReg, 561 AVLIsImm, 562 AVLIsVLMAX, 563 Unknown, // AVL and VTYPE are fully unknown 564 } State = Uninitialized; 565 566 // Fields from VTYPE. 567 RISCVII::VLMUL VLMul = RISCVII::LMUL_1; 568 uint8_t SEW = 0; 569 uint8_t TailAgnostic : 1; 570 uint8_t MaskAgnostic : 1; 571 uint8_t SEWLMULRatioOnly : 1; 572 573 public: 574 VSETVLIInfo() 575 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), 576 SEWLMULRatioOnly(false) {} 577 578 static VSETVLIInfo getUnknown() { 579 VSETVLIInfo Info; 580 Info.setUnknown(); 581 return Info; 582 } 583 584 bool isValid() const { return State != Uninitialized; } 585 void setUnknown() { State = Unknown; } 586 bool isUnknown() const { return State == Unknown; } 587 588 void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) { 589 assert(AVLReg.isVirtual()); 590 AVLRegDef.ValNo = VNInfo; 591 AVLRegDef.DefReg = AVLReg; 592 State = AVLIsReg; 593 } 594 595 void setAVLImm(unsigned Imm) { 596 AVLImm = Imm; 597 State = AVLIsImm; 598 } 599 600 void setAVLVLMAX() { State = AVLIsVLMAX; } 601 602 bool hasAVLImm() const { return State == AVLIsImm; } 603 bool hasAVLReg() const { return State == AVLIsReg; } 604 bool hasAVLVLMAX() const { return State == AVLIsVLMAX; } 605 Register getAVLReg() const { 606 assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual()); 607 return AVLRegDef.DefReg; 608 } 609 unsigned getAVLImm() const { 610 assert(hasAVLImm()); 611 return AVLImm; 612 } 613 const VNInfo *getAVLVNInfo() const { 614 assert(hasAVLReg()); 615 return AVLRegDef.ValNo; 616 } 617 // Most AVLIsReg infos will have a single defining MachineInstr, unless it was 618 // a PHI node. In that case getAVLVNInfo()->def will point to the block 619 // boundary slot and this will return nullptr. If LiveIntervals isn't 620 // available, nullptr is also returned. 621 const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const { 622 assert(hasAVLReg()); 623 if (!LIS || getAVLVNInfo()->isPHIDef()) 624 return nullptr; 625 auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def); 626 assert(MI); 627 return MI; 628 } 629 630 void setAVL(const VSETVLIInfo &Info) { 631 assert(Info.isValid()); 632 if (Info.isUnknown()) 633 setUnknown(); 634 else if (Info.hasAVLReg()) 635 setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg()); 636 else if (Info.hasAVLVLMAX()) 637 setAVLVLMAX(); 638 else { 639 assert(Info.hasAVLImm()); 640 setAVLImm(Info.getAVLImm()); 641 } 642 } 643 644 unsigned getSEW() const { return SEW; } 645 RISCVII::VLMUL getVLMUL() const { return VLMul; } 646 bool getTailAgnostic() const { return TailAgnostic; } 647 bool getMaskAgnostic() const { return MaskAgnostic; } 648 649 bool hasNonZeroAVL(const LiveIntervals *LIS) const { 650 if (hasAVLImm()) 651 return getAVLImm() > 0; 652 if (hasAVLReg()) { 653 if (auto *DefMI = getAVLDefMI(LIS)) 654 return isNonZeroLoadImmediate(*DefMI); 655 } 656 if (hasAVLVLMAX()) 657 return true; 658 return false; 659 } 660 661 bool hasEquallyZeroAVL(const VSETVLIInfo &Other, 662 const LiveIntervals *LIS) const { 663 if (hasSameAVL(Other)) 664 return true; 665 return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS)); 666 } 667 668 bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const { 669 if (hasAVLReg() && Other.hasAVLReg()) { 670 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() && 671 "we either have intervals or we don't"); 672 if (!getAVLVNInfo()) 673 return getAVLReg() == Other.getAVLReg(); 674 return getAVLVNInfo()->id == Other.getAVLVNInfo()->id && 675 getAVLReg() == Other.getAVLReg(); 676 } 677 678 if (hasAVLImm() && Other.hasAVLImm()) 679 return getAVLImm() == Other.getAVLImm(); 680 681 if (hasAVLVLMAX()) 682 return Other.hasAVLVLMAX() && hasSameVLMAX(Other); 683 684 return false; 685 } 686 687 // Return true if the two lattice values are guaranteed to have 688 // the same AVL value at runtime. 689 bool hasSameAVL(const VSETVLIInfo &Other) const { 690 // Without LiveIntervals, we don't know which instruction defines a 691 // register. Since a register may be redefined, this means all AVLIsReg 692 // states must be treated as possibly distinct. 693 if (hasAVLReg() && Other.hasAVLReg()) { 694 assert(!getAVLVNInfo() == !Other.getAVLVNInfo() && 695 "we either have intervals or we don't"); 696 if (!getAVLVNInfo()) 697 return false; 698 } 699 return hasSameAVLLatticeValue(Other); 700 } 701 702 void setVTYPE(unsigned VType) { 703 assert(isValid() && !isUnknown() && 704 "Can't set VTYPE for uninitialized or unknown"); 705 VLMul = RISCVVType::getVLMUL(VType); 706 SEW = RISCVVType::getSEW(VType); 707 TailAgnostic = RISCVVType::isTailAgnostic(VType); 708 MaskAgnostic = RISCVVType::isMaskAgnostic(VType); 709 } 710 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { 711 assert(isValid() && !isUnknown() && 712 "Can't set VTYPE for uninitialized or unknown"); 713 VLMul = L; 714 SEW = S; 715 TailAgnostic = TA; 716 MaskAgnostic = MA; 717 } 718 719 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; } 720 721 unsigned encodeVTYPE() const { 722 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && 723 "Can't encode VTYPE for uninitialized or unknown"); 724 return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 725 } 726 727 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } 728 729 bool hasSameVTYPE(const VSETVLIInfo &Other) const { 730 assert(isValid() && Other.isValid() && 731 "Can't compare invalid VSETVLIInfos"); 732 assert(!isUnknown() && !Other.isUnknown() && 733 "Can't compare VTYPE in unknown state"); 734 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && 735 "Can't compare when only LMUL/SEW ratio is valid."); 736 return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) == 737 std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic, 738 Other.MaskAgnostic); 739 } 740 741 unsigned getSEWLMULRatio() const { 742 assert(isValid() && !isUnknown() && 743 "Can't use VTYPE for uninitialized or unknown"); 744 return RISCVVType::getSEWLMULRatio(SEW, VLMul); 745 } 746 747 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. 748 // Note that having the same VLMAX ensures that both share the same 749 // function from AVL to VL; that is, they must produce the same VL value 750 // for any given AVL value. 751 bool hasSameVLMAX(const VSETVLIInfo &Other) const { 752 assert(isValid() && Other.isValid() && 753 "Can't compare invalid VSETVLIInfos"); 754 assert(!isUnknown() && !Other.isUnknown() && 755 "Can't compare VTYPE in unknown state"); 756 return getSEWLMULRatio() == Other.getSEWLMULRatio(); 757 } 758 759 bool hasCompatibleVTYPE(const DemandedFields &Used, 760 const VSETVLIInfo &Require) const { 761 return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used); 762 } 763 764 // Determine whether the vector instructions requirements represented by 765 // Require are compatible with the previous vsetvli instruction represented 766 // by this. MI is the instruction whose requirements we're considering. 767 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require, 768 const LiveIntervals *LIS) const { 769 assert(isValid() && Require.isValid() && 770 "Can't compare invalid VSETVLIInfos"); 771 // Nothing is compatible with Unknown. 772 if (isUnknown() || Require.isUnknown()) 773 return false; 774 775 // If only our VLMAX ratio is valid, then this isn't compatible. 776 if (SEWLMULRatioOnly || Require.SEWLMULRatioOnly) 777 return false; 778 779 if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require))) 780 return false; 781 782 if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS)) 783 return false; 784 785 return hasCompatibleVTYPE(Used, Require); 786 } 787 788 bool operator==(const VSETVLIInfo &Other) const { 789 // Uninitialized is only equal to another Uninitialized. 790 if (!isValid()) 791 return !Other.isValid(); 792 if (!Other.isValid()) 793 return !isValid(); 794 795 // Unknown is only equal to another Unknown. 796 if (isUnknown()) 797 return Other.isUnknown(); 798 if (Other.isUnknown()) 799 return isUnknown(); 800 801 if (!hasSameAVLLatticeValue(Other)) 802 return false; 803 804 // If the SEWLMULRatioOnly bits are different, then they aren't equal. 805 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) 806 return false; 807 808 // If only the VLMAX is valid, check that it is the same. 809 if (SEWLMULRatioOnly) 810 return hasSameVLMAX(Other); 811 812 // If the full VTYPE is valid, check that it is the same. 813 return hasSameVTYPE(Other); 814 } 815 816 bool operator!=(const VSETVLIInfo &Other) const { 817 return !(*this == Other); 818 } 819 820 // Calculate the VSETVLIInfo visible to a block assuming this and Other are 821 // both predecessors. 822 VSETVLIInfo intersect(const VSETVLIInfo &Other) const { 823 // If the new value isn't valid, ignore it. 824 if (!Other.isValid()) 825 return *this; 826 827 // If this value isn't valid, this must be the first predecessor, use it. 828 if (!isValid()) 829 return Other; 830 831 // If either is unknown, the result is unknown. 832 if (isUnknown() || Other.isUnknown()) 833 return VSETVLIInfo::getUnknown(); 834 835 // If we have an exact, match return this. 836 if (*this == Other) 837 return *this; 838 839 // Not an exact match, but maybe the AVL and VLMAX are the same. If so, 840 // return an SEW/LMUL ratio only value. 841 if (hasSameAVL(Other) && hasSameVLMAX(Other)) { 842 VSETVLIInfo MergeInfo = *this; 843 MergeInfo.SEWLMULRatioOnly = true; 844 return MergeInfo; 845 } 846 847 // Otherwise the result is unknown. 848 return VSETVLIInfo::getUnknown(); 849 } 850 851 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 852 /// Support for debugging, callable in GDB: V->dump() 853 LLVM_DUMP_METHOD void dump() const { 854 print(dbgs()); 855 dbgs() << "\n"; 856 } 857 858 /// Implement operator<<. 859 /// @{ 860 void print(raw_ostream &OS) const { 861 OS << "{"; 862 if (!isValid()) 863 OS << "Uninitialized"; 864 if (isUnknown()) 865 OS << "unknown"; 866 if (hasAVLReg()) 867 OS << "AVLReg=" << llvm::printReg(getAVLReg()); 868 if (hasAVLImm()) 869 OS << "AVLImm=" << (unsigned)AVLImm; 870 if (hasAVLVLMAX()) 871 OS << "AVLVLMAX"; 872 OS << ", " 873 << "VLMul=" << (unsigned)VLMul << ", " 874 << "SEW=" << (unsigned)SEW << ", " 875 << "TailAgnostic=" << (bool)TailAgnostic << ", " 876 << "MaskAgnostic=" << (bool)MaskAgnostic << ", " 877 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}"; 878 } 879 #endif 880 }; 881 882 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 883 LLVM_ATTRIBUTE_USED 884 inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { 885 V.print(OS); 886 return OS; 887 } 888 #endif 889 890 struct BlockData { 891 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this 892 // block. Calculated in Phase 2. 893 VSETVLIInfo Exit; 894 895 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor 896 // blocks. Calculated in Phase 2, and used by Phase 3. 897 VSETVLIInfo Pred; 898 899 // Keeps track of whether the block is already in the queue. 900 bool InQueue = false; 901 902 BlockData() = default; 903 }; 904 905 class RISCVInsertVSETVLI : public MachineFunctionPass { 906 const RISCVSubtarget *ST; 907 const TargetInstrInfo *TII; 908 MachineRegisterInfo *MRI; 909 // Possibly null! 910 LiveIntervals *LIS; 911 912 std::vector<BlockData> BlockInfo; 913 std::queue<const MachineBasicBlock *> WorkList; 914 915 public: 916 static char ID; 917 918 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {} 919 bool runOnMachineFunction(MachineFunction &MF) override; 920 921 void getAnalysisUsage(AnalysisUsage &AU) const override { 922 AU.setPreservesCFG(); 923 924 AU.addUsedIfAvailable<LiveIntervalsWrapperPass>(); 925 AU.addPreserved<LiveIntervalsWrapperPass>(); 926 AU.addPreserved<SlotIndexesWrapperPass>(); 927 AU.addPreserved<LiveDebugVariablesWrapperLegacy>(); 928 AU.addPreserved<LiveStacksWrapperLegacy>(); 929 930 MachineFunctionPass::getAnalysisUsage(AU); 931 } 932 933 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } 934 935 private: 936 bool needVSETVLI(const DemandedFields &Used, const VSETVLIInfo &Require, 937 const VSETVLIInfo &CurInfo) const; 938 bool needVSETVLIPHI(const VSETVLIInfo &Require, 939 const MachineBasicBlock &MBB) const; 940 void insertVSETVLI(MachineBasicBlock &MBB, 941 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 942 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); 943 944 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const; 945 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const; 946 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB, 947 VSETVLIInfo &Info) const; 948 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); 949 void emitVSETVLIs(MachineBasicBlock &MBB); 950 void doPRE(MachineBasicBlock &MBB); 951 void insertReadVL(MachineBasicBlock &MBB); 952 953 bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI, 954 const DemandedFields &Used) const; 955 void coalesceVSETVLIs(MachineBasicBlock &MBB) const; 956 957 VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const; 958 VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const; 959 void forwardVSETVLIAVL(VSETVLIInfo &Info) const; 960 }; 961 962 } // end anonymous namespace 963 964 char RISCVInsertVSETVLI::ID = 0; 965 char &llvm::RISCVInsertVSETVLIID = RISCVInsertVSETVLI::ID; 966 967 INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, 968 false, false) 969 970 // If the AVL is defined by a vsetvli's output vl with the same VLMAX, we can 971 // replace the AVL operand with the AVL of the defining vsetvli. E.g. 972 // 973 // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1 974 // $x0 = PseudoVSETVLI %vl:gpr, SEW=32, LMUL=M1 975 // -> 976 // %vl = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1 977 // $x0 = PseudoVSETVLI %avl:gpr, SEW=32, LMUL=M1 978 void RISCVInsertVSETVLI::forwardVSETVLIAVL(VSETVLIInfo &Info) const { 979 if (!Info.hasAVLReg()) 980 return; 981 const MachineInstr *DefMI = Info.getAVLDefMI(LIS); 982 if (!DefMI || !isVectorConfigInstr(*DefMI)) 983 return; 984 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI); 985 if (!DefInstrInfo.hasSameVLMAX(Info)) 986 return; 987 Info.setAVL(DefInstrInfo); 988 } 989 990 // Return a VSETVLIInfo representing the changes made by this VSETVLI or 991 // VSETIVLI instruction. 992 VSETVLIInfo 993 RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const { 994 VSETVLIInfo NewInfo; 995 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { 996 NewInfo.setAVLImm(MI.getOperand(1).getImm()); 997 } else { 998 assert(MI.getOpcode() == RISCV::PseudoVSETVLI || 999 MI.getOpcode() == RISCV::PseudoVSETVLIX0); 1000 Register AVLReg = MI.getOperand(1).getReg(); 1001 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && 1002 "Can't handle X0, X0 vsetvli yet"); 1003 if (AVLReg == RISCV::X0) 1004 NewInfo.setAVLVLMAX(); 1005 else if (MI.getOperand(1).isUndef()) 1006 // Otherwise use an AVL of 1 to avoid depending on previous vl. 1007 NewInfo.setAVLImm(1); 1008 else { 1009 VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS); 1010 NewInfo.setAVLRegDef(VNI, AVLReg); 1011 } 1012 } 1013 NewInfo.setVTYPE(MI.getOperand(2).getImm()); 1014 1015 forwardVSETVLIAVL(NewInfo); 1016 1017 return NewInfo; 1018 } 1019 1020 static unsigned computeVLMAX(unsigned VLEN, unsigned SEW, 1021 RISCVII::VLMUL VLMul) { 1022 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMul); 1023 if (Fractional) 1024 VLEN = VLEN / LMul; 1025 else 1026 VLEN = VLEN * LMul; 1027 return VLEN/SEW; 1028 } 1029 1030 VSETVLIInfo 1031 RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { 1032 VSETVLIInfo InstrInfo; 1033 const uint64_t TSFlags = MI.getDesc().TSFlags; 1034 1035 bool TailAgnostic = true; 1036 bool MaskAgnostic = true; 1037 if (!hasUndefinedPassthru(MI)) { 1038 // Start with undisturbed. 1039 TailAgnostic = false; 1040 MaskAgnostic = false; 1041 1042 // If there is a policy operand, use it. 1043 if (RISCVII::hasVecPolicyOp(TSFlags)) { 1044 const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1); 1045 uint64_t Policy = Op.getImm(); 1046 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 1047 "Invalid Policy Value"); 1048 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; 1049 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; 1050 } 1051 1052 // Some pseudo instructions force a tail agnostic policy despite having a 1053 // tied def. 1054 if (RISCVII::doesForceTailAgnostic(TSFlags)) 1055 TailAgnostic = true; 1056 1057 if (!RISCVII::usesMaskPolicy(TSFlags)) 1058 MaskAgnostic = true; 1059 } 1060 1061 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); 1062 1063 unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm(); 1064 // A Log2SEW of 0 is an operation on mask registers only. 1065 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 1066 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 1067 1068 if (RISCVII::hasVLOp(TSFlags)) { 1069 const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 1070 if (VLOp.isImm()) { 1071 int64_t Imm = VLOp.getImm(); 1072 // Conver the VLMax sentintel to X0 register. 1073 if (Imm == RISCV::VLMaxSentinel) { 1074 // If we know the exact VLEN, see if we can use the constant encoding 1075 // for the VLMAX instead. This reduces register pressure slightly. 1076 const unsigned VLMAX = computeVLMAX(ST->getRealMaxVLen(), SEW, VLMul); 1077 if (ST->getRealMinVLen() == ST->getRealMaxVLen() && VLMAX <= 31) 1078 InstrInfo.setAVLImm(VLMAX); 1079 else 1080 InstrInfo.setAVLVLMAX(); 1081 } 1082 else 1083 InstrInfo.setAVLImm(Imm); 1084 } else if (VLOp.isUndef()) { 1085 // Otherwise use an AVL of 1 to avoid depending on previous vl. 1086 InstrInfo.setAVLImm(1); 1087 } else { 1088 VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS); 1089 InstrInfo.setAVLRegDef(VNI, VLOp.getReg()); 1090 } 1091 } else { 1092 assert(isScalarExtractInstr(MI)); 1093 // Pick a random value for state tracking purposes, will be ignored via 1094 // the demanded fields mechanism 1095 InstrInfo.setAVLImm(1); 1096 } 1097 #ifndef NDEBUG 1098 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) { 1099 assert(SEW == EEW && "Initial SEW doesn't match expected EEW"); 1100 } 1101 #endif 1102 InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic); 1103 1104 forwardVSETVLIAVL(InstrInfo); 1105 1106 return InstrInfo; 1107 } 1108 1109 void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, 1110 MachineBasicBlock::iterator InsertPt, DebugLoc DL, 1111 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { 1112 1113 ++NumInsertedVSETVL; 1114 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) { 1115 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same 1116 // VLMAX. 1117 if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) { 1118 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 1119 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 1120 .addReg(RISCV::X0, RegState::Kill) 1121 .addImm(Info.encodeVTYPE()) 1122 .addReg(RISCV::VL, RegState::Implicit); 1123 if (LIS) 1124 LIS->InsertMachineInstrInMaps(*MI); 1125 return; 1126 } 1127 1128 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If 1129 // it has the same VLMAX we want and the last VL/VTYPE we observed is the 1130 // same, we can use the X0, X0 form. 1131 if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) { 1132 if (const MachineInstr *DefMI = Info.getAVLDefMI(LIS); 1133 DefMI && isVectorConfigInstr(*DefMI)) { 1134 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1135 if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) { 1136 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 1137 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 1138 .addReg(RISCV::X0, RegState::Kill) 1139 .addImm(Info.encodeVTYPE()) 1140 .addReg(RISCV::VL, RegState::Implicit); 1141 if (LIS) 1142 LIS->InsertMachineInstrInMaps(*MI); 1143 return; 1144 } 1145 } 1146 } 1147 } 1148 1149 if (Info.hasAVLImm()) { 1150 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) 1151 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 1152 .addImm(Info.getAVLImm()) 1153 .addImm(Info.encodeVTYPE()); 1154 if (LIS) 1155 LIS->InsertMachineInstrInMaps(*MI); 1156 return; 1157 } 1158 1159 if (Info.hasAVLVLMAX()) { 1160 Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); 1161 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) 1162 .addReg(DestReg, RegState::Define | RegState::Dead) 1163 .addReg(RISCV::X0, RegState::Kill) 1164 .addImm(Info.encodeVTYPE()); 1165 if (LIS) { 1166 LIS->InsertMachineInstrInMaps(*MI); 1167 LIS->createAndComputeVirtRegInterval(DestReg); 1168 } 1169 return; 1170 } 1171 1172 Register AVLReg = Info.getAVLReg(); 1173 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); 1174 auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI)) 1175 .addReg(RISCV::X0, RegState::Define | RegState::Dead) 1176 .addReg(AVLReg) 1177 .addImm(Info.encodeVTYPE()); 1178 if (LIS) { 1179 LIS->InsertMachineInstrInMaps(*MI); 1180 LiveInterval &LI = LIS->getInterval(AVLReg); 1181 SlotIndex SI = LIS->getInstructionIndex(*MI).getRegSlot(); 1182 // If the AVL value isn't live at MI, do a quick check to see if it's easily 1183 // extendable. Otherwise, we need to copy it. 1184 if (LI.getVNInfoBefore(SI) != Info.getAVLVNInfo()) { 1185 if (!LI.liveAt(SI) && LI.containsOneValue()) 1186 LIS->extendToIndices(LI, SI); 1187 else { 1188 Register AVLCopyReg = 1189 MRI->createVirtualRegister(&RISCV::GPRNoX0RegClass); 1190 MachineBasicBlock::iterator II; 1191 if (Info.getAVLVNInfo()->isPHIDef()) 1192 II = LIS->getMBBFromIndex(Info.getAVLVNInfo()->def)->getFirstNonPHI(); 1193 else { 1194 II = LIS->getInstructionFromIndex(Info.getAVLVNInfo()->def); 1195 II = std::next(II); 1196 } 1197 assert(II.isValid()); 1198 auto AVLCopy = 1199 BuildMI(*II->getParent(), II, DL, TII->get(RISCV::COPY), AVLCopyReg) 1200 .addReg(AVLReg); 1201 LIS->InsertMachineInstrInMaps(*AVLCopy); 1202 MI->getOperand(1).setReg(AVLCopyReg); 1203 LIS->createAndComputeVirtRegInterval(AVLCopyReg); 1204 } 1205 } 1206 } 1207 } 1208 1209 /// Return true if a VSETVLI is required to transition from CurInfo to Require 1210 /// given a set of DemandedFields \p Used. 1211 bool RISCVInsertVSETVLI::needVSETVLI(const DemandedFields &Used, 1212 const VSETVLIInfo &Require, 1213 const VSETVLIInfo &CurInfo) const { 1214 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) 1215 return true; 1216 1217 if (CurInfo.isCompatible(Used, Require, LIS)) 1218 return false; 1219 1220 return true; 1221 } 1222 1223 // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we 1224 // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more 1225 // places. 1226 static VSETVLIInfo adjustIncoming(const VSETVLIInfo &PrevInfo, 1227 const VSETVLIInfo &NewInfo, 1228 DemandedFields &Demanded) { 1229 VSETVLIInfo Info = NewInfo; 1230 1231 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() && 1232 !PrevInfo.isUnknown()) { 1233 if (auto NewVLMul = RISCVVType::getSameRatioLMUL( 1234 PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW())) 1235 Info.setVLMul(*NewVLMul); 1236 Demanded.LMUL = DemandedFields::LMULEqual; 1237 } 1238 1239 return Info; 1240 } 1241 1242 // Given an incoming state reaching MI, minimally modifies that state so that it 1243 // is compatible with MI. The resulting state is guaranteed to be semantically 1244 // legal for MI, but may not be the state requested by MI. 1245 void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, 1246 const MachineInstr &MI) const { 1247 if (isVectorCopy(ST->getRegisterInfo(), MI) && 1248 (Info.isUnknown() || !Info.isValid() || Info.hasSEWLMULRatioOnly())) { 1249 // Use an arbitrary but valid AVL and VTYPE so vill will be cleared. It may 1250 // be coalesced into another vsetvli since we won't demand any fields. 1251 VSETVLIInfo NewInfo; // Need a new VSETVLIInfo to clear SEWLMULRatioOnly 1252 NewInfo.setAVLImm(1); 1253 NewInfo.setVTYPE(RISCVII::VLMUL::LMUL_1, /*sew*/ 8, /*ta*/ true, 1254 /*ma*/ true); 1255 Info = NewInfo; 1256 return; 1257 } 1258 1259 if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags)) 1260 return; 1261 1262 DemandedFields Demanded = getDemanded(MI, ST); 1263 1264 const VSETVLIInfo NewInfo = computeInfoForInstr(MI); 1265 assert(NewInfo.isValid() && !NewInfo.isUnknown()); 1266 if (Info.isValid() && !needVSETVLI(Demanded, NewInfo, Info)) 1267 return; 1268 1269 const VSETVLIInfo PrevInfo = Info; 1270 if (!Info.isValid() || Info.isUnknown()) 1271 Info = NewInfo; 1272 1273 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded); 1274 1275 // If MI only demands that VL has the same zeroness, we only need to set the 1276 // AVL if the zeroness differs. This removes a vsetvli entirely if the types 1277 // match or allows use of cheaper avl preserving variant if VLMAX doesn't 1278 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype" 1279 // variant, so we avoid the transform to prevent extending live range of an 1280 // avl register operand. 1281 // TODO: We can probably relax this for immediates. 1282 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) && 1283 IncomingInfo.hasSameVLMAX(PrevInfo); 1284 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero)) 1285 Info.setAVL(IncomingInfo); 1286 1287 Info.setVTYPE( 1288 ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info) 1289 .getVLMUL(), 1290 ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(), 1291 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later 1292 // if needed. 1293 (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() || 1294 IncomingInfo.getTailAgnostic(), 1295 (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() || 1296 IncomingInfo.getMaskAgnostic()); 1297 1298 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep 1299 // the AVL. 1300 if (Info.hasSEWLMULRatioOnly()) { 1301 VSETVLIInfo RatiolessInfo = IncomingInfo; 1302 RatiolessInfo.setAVL(Info); 1303 Info = RatiolessInfo; 1304 } 1305 } 1306 1307 // Given a state with which we evaluated MI (see transferBefore above for why 1308 // this might be different that the state MI requested), modify the state to 1309 // reflect the changes MI might make. 1310 void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, 1311 const MachineInstr &MI) const { 1312 if (isVectorConfigInstr(MI)) { 1313 Info = getInfoForVSETVLI(MI); 1314 return; 1315 } 1316 1317 if (RISCV::isFaultFirstLoad(MI)) { 1318 // Update AVL to vl-output of the fault first load. 1319 assert(MI.getOperand(1).getReg().isVirtual()); 1320 if (LIS) { 1321 auto &LI = LIS->getInterval(MI.getOperand(1).getReg()); 1322 SlotIndex SI = 1323 LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot(); 1324 VNInfo *VNI = LI.getVNInfoAt(SI); 1325 Info.setAVLRegDef(VNI, MI.getOperand(1).getReg()); 1326 } else 1327 Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg()); 1328 return; 1329 } 1330 1331 // If this is something that updates VL/VTYPE that we don't know about, set 1332 // the state to unknown. 1333 if (MI.isCall() || MI.isInlineAsm() || 1334 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || 1335 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) 1336 Info = VSETVLIInfo::getUnknown(); 1337 } 1338 1339 bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB, 1340 VSETVLIInfo &Info) const { 1341 bool HadVectorOp = false; 1342 1343 Info = BlockInfo[MBB.getNumber()].Pred; 1344 for (const MachineInstr &MI : MBB) { 1345 transferBefore(Info, MI); 1346 1347 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags) || 1348 isVectorCopy(ST->getRegisterInfo(), MI)) 1349 HadVectorOp = true; 1350 1351 transferAfter(Info, MI); 1352 } 1353 1354 return HadVectorOp; 1355 } 1356 1357 void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { 1358 1359 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1360 1361 BBInfo.InQueue = false; 1362 1363 // Start with the previous entry so that we keep the most conservative state 1364 // we have ever found. 1365 VSETVLIInfo InInfo = BBInfo.Pred; 1366 if (MBB.pred_empty()) { 1367 // There are no predecessors, so use the default starting status. 1368 InInfo.setUnknown(); 1369 } else { 1370 for (MachineBasicBlock *P : MBB.predecessors()) 1371 InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit); 1372 } 1373 1374 // If we don't have any valid predecessor value, wait until we do. 1375 if (!InInfo.isValid()) 1376 return; 1377 1378 // If no change, no need to rerun block 1379 if (InInfo == BBInfo.Pred) 1380 return; 1381 1382 BBInfo.Pred = InInfo; 1383 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) 1384 << " changed to " << BBInfo.Pred << "\n"); 1385 1386 // Note: It's tempting to cache the state changes here, but due to the 1387 // compatibility checks performed a blocks output state can change based on 1388 // the input state. To cache, we'd have to add logic for finding 1389 // never-compatible state changes. 1390 VSETVLIInfo TmpStatus; 1391 computeVLVTYPEChanges(MBB, TmpStatus); 1392 1393 // If the new exit value matches the old exit value, we don't need to revisit 1394 // any blocks. 1395 if (BBInfo.Exit == TmpStatus) 1396 return; 1397 1398 BBInfo.Exit = TmpStatus; 1399 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) 1400 << " changed to " << BBInfo.Exit << "\n"); 1401 1402 // Add the successors to the work list so we can propagate the changed exit 1403 // status. 1404 for (MachineBasicBlock *S : MBB.successors()) 1405 if (!BlockInfo[S->getNumber()].InQueue) { 1406 BlockInfo[S->getNumber()].InQueue = true; 1407 WorkList.push(S); 1408 } 1409 } 1410 1411 // If we weren't able to prove a vsetvli was directly unneeded, it might still 1412 // be unneeded if the AVL was a phi node where all incoming values are VL 1413 // outputs from the last VSETVLI in their respective basic blocks. 1414 bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, 1415 const MachineBasicBlock &MBB) const { 1416 if (!Require.hasAVLReg()) 1417 return true; 1418 1419 if (!LIS) 1420 return true; 1421 1422 // We need the AVL to have been produced by a PHI node in this basic block. 1423 const VNInfo *Valno = Require.getAVLVNInfo(); 1424 if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB) 1425 return true; 1426 1427 const LiveRange &LR = LIS->getInterval(Require.getAVLReg()); 1428 1429 for (auto *PBB : MBB.predecessors()) { 1430 const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit; 1431 1432 // We need the PHI input to the be the output of a VSET(I)VLI. 1433 const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB)); 1434 if (!Value) 1435 return true; 1436 MachineInstr *DefMI = LIS->getInstructionFromIndex(Value->def); 1437 if (!DefMI || !isVectorConfigInstr(*DefMI)) 1438 return true; 1439 1440 // We found a VSET(I)VLI make sure it matches the output of the 1441 // predecessor block. 1442 VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI); 1443 if (DefInfo != PBBExit) 1444 return true; 1445 1446 // Require has the same VL as PBBExit, so if the exit from the 1447 // predecessor has the VTYPE we are looking for we might be able 1448 // to avoid a VSETVLI. 1449 if (PBBExit.isUnknown() || !PBBExit.hasSameVTYPE(Require)) 1450 return true; 1451 } 1452 1453 // If all the incoming values to the PHI checked out, we don't need 1454 // to insert a VSETVLI. 1455 return false; 1456 } 1457 1458 void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { 1459 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; 1460 // Track whether the prefix of the block we've scanned is transparent 1461 // (meaning has not yet changed the abstract state). 1462 bool PrefixTransparent = true; 1463 for (MachineInstr &MI : MBB) { 1464 const VSETVLIInfo PrevInfo = CurInfo; 1465 transferBefore(CurInfo, MI); 1466 1467 // If this is an explicit VSETVLI or VSETIVLI, update our state. 1468 if (isVectorConfigInstr(MI)) { 1469 // Conservatively, mark the VL and VTYPE as live. 1470 assert(MI.getOperand(3).getReg() == RISCV::VL && 1471 MI.getOperand(4).getReg() == RISCV::VTYPE && 1472 "Unexpected operands where VL and VTYPE should be"); 1473 MI.getOperand(3).setIsDead(false); 1474 MI.getOperand(4).setIsDead(false); 1475 PrefixTransparent = false; 1476 } 1477 1478 if (EnsureWholeVectorRegisterMoveValidVTYPE && 1479 isVectorCopy(ST->getRegisterInfo(), MI)) { 1480 if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) { 1481 insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo); 1482 PrefixTransparent = false; 1483 } 1484 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, 1485 /*isImp*/ true)); 1486 } 1487 1488 uint64_t TSFlags = MI.getDesc().TSFlags; 1489 if (RISCVII::hasSEWOp(TSFlags)) { 1490 if (!PrevInfo.isCompatible(DemandedFields::all(), CurInfo, LIS)) { 1491 // If this is the first implicit state change, and the state change 1492 // requested can be proven to produce the same register contents, we 1493 // can skip emitting the actual state change and continue as if we 1494 // had since we know the GPR result of the implicit state change 1495 // wouldn't be used and VL/VTYPE registers are correct. Note that 1496 // we *do* need to model the state as if it changed as while the 1497 // register contents are unchanged, the abstract model can change. 1498 if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB)) 1499 insertVSETVLI(MBB, MI, MI.getDebugLoc(), CurInfo, PrevInfo); 1500 PrefixTransparent = false; 1501 } 1502 1503 if (RISCVII::hasVLOp(TSFlags)) { 1504 MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); 1505 if (VLOp.isReg()) { 1506 Register Reg = VLOp.getReg(); 1507 1508 // Erase the AVL operand from the instruction. 1509 VLOp.setReg(RISCV::NoRegister); 1510 VLOp.setIsKill(false); 1511 if (LIS) { 1512 LiveInterval &LI = LIS->getInterval(Reg); 1513 SmallVector<MachineInstr *> DeadMIs; 1514 LIS->shrinkToUses(&LI, &DeadMIs); 1515 // We might have separate components that need split due to 1516 // needVSETVLIPHI causing us to skip inserting a new VL def. 1517 SmallVector<LiveInterval *> SplitLIs; 1518 LIS->splitSeparateComponents(LI, SplitLIs); 1519 1520 // If the AVL was an immediate > 31, then it would have been emitted 1521 // as an ADDI. However, the ADDI might not have been used in the 1522 // vsetvli, or a vsetvli might not have been emitted, so it may be 1523 // dead now. 1524 for (MachineInstr *DeadMI : DeadMIs) { 1525 if (!TII->isAddImmediate(*DeadMI, Reg)) 1526 continue; 1527 LIS->RemoveMachineInstrFromMaps(*DeadMI); 1528 DeadMI->eraseFromParent(); 1529 } 1530 } 1531 } 1532 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, 1533 /*isImp*/ true)); 1534 } 1535 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, 1536 /*isImp*/ true)); 1537 } 1538 1539 if (MI.isCall() || MI.isInlineAsm() || 1540 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || 1541 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) 1542 PrefixTransparent = false; 1543 1544 transferAfter(CurInfo, MI); 1545 } 1546 1547 const auto &Info = BlockInfo[MBB.getNumber()]; 1548 if (CurInfo != Info.Exit) { 1549 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n"); 1550 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n"); 1551 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n"); 1552 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n"); 1553 } 1554 assert(CurInfo == Info.Exit && "InsertVSETVLI dataflow invariant violated"); 1555 } 1556 1557 /// Perform simple partial redundancy elimination of the VSETVLI instructions 1558 /// we're about to insert by looking for cases where we can PRE from the 1559 /// beginning of one block to the end of one of its predecessors. Specifically, 1560 /// this is geared to catch the common case of a fixed length vsetvl in a single 1561 /// block loop when it could execute once in the preheader instead. 1562 void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { 1563 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) 1564 return; 1565 1566 MachineBasicBlock *UnavailablePred = nullptr; 1567 VSETVLIInfo AvailableInfo; 1568 for (MachineBasicBlock *P : MBB.predecessors()) { 1569 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; 1570 if (PredInfo.isUnknown()) { 1571 if (UnavailablePred) 1572 return; 1573 UnavailablePred = P; 1574 } else if (!AvailableInfo.isValid()) { 1575 AvailableInfo = PredInfo; 1576 } else if (AvailableInfo != PredInfo) { 1577 return; 1578 } 1579 } 1580 1581 // Unreachable, single pred, or full redundancy. Note that FRE is handled by 1582 // phase 3. 1583 if (!UnavailablePred || !AvailableInfo.isValid()) 1584 return; 1585 1586 if (!LIS) 1587 return; 1588 1589 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of 1590 // the unavailable pred. 1591 if (AvailableInfo.hasSEWLMULRatioOnly()) 1592 return; 1593 1594 // Critical edge - TODO: consider splitting? 1595 if (UnavailablePred->succ_size() != 1) 1596 return; 1597 1598 // If the AVL value is a register (other than our VLMAX sentinel), 1599 // we need to prove the value is available at the point we're going 1600 // to insert the vsetvli at. 1601 if (AvailableInfo.hasAVLReg()) { 1602 SlotIndex SI = AvailableInfo.getAVLVNInfo()->def; 1603 // This is an inline dominance check which covers the case of 1604 // UnavailablePred being the preheader of a loop. 1605 if (LIS->getMBBFromIndex(SI) != UnavailablePred) 1606 return; 1607 if (!UnavailablePred->terminators().empty() && 1608 SI >= LIS->getInstructionIndex(*UnavailablePred->getFirstTerminator())) 1609 return; 1610 } 1611 1612 // Model the effect of changing the input state of the block MBB to 1613 // AvailableInfo. We're looking for two issues here; one legality, 1614 // one profitability. 1615 // 1) If the block doesn't use some of the fields from VL or VTYPE, we 1616 // may hit the end of the block with a different end state. We can 1617 // not make this change without reflowing later blocks as well. 1618 // 2) If we don't actually remove a transition, inserting a vsetvli 1619 // into the predecessor block would be correct, but unprofitable. 1620 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred; 1621 VSETVLIInfo CurInfo = AvailableInfo; 1622 int TransitionsRemoved = 0; 1623 for (const MachineInstr &MI : MBB) { 1624 const VSETVLIInfo LastInfo = CurInfo; 1625 const VSETVLIInfo LastOldInfo = OldInfo; 1626 transferBefore(CurInfo, MI); 1627 transferBefore(OldInfo, MI); 1628 if (CurInfo == LastInfo) 1629 TransitionsRemoved++; 1630 if (LastOldInfo == OldInfo) 1631 TransitionsRemoved--; 1632 transferAfter(CurInfo, MI); 1633 transferAfter(OldInfo, MI); 1634 if (CurInfo == OldInfo) 1635 // Convergence. All transitions after this must match by construction. 1636 break; 1637 } 1638 if (CurInfo != OldInfo || TransitionsRemoved <= 0) 1639 // Issues 1 and 2 above 1640 return; 1641 1642 // Finally, update both data flow state and insert the actual vsetvli. 1643 // Doing both keeps the code in sync with the dataflow results, which 1644 // is critical for correctness of phase 3. 1645 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit; 1646 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " 1647 << UnavailablePred->getName() << " with state " 1648 << AvailableInfo << "\n"); 1649 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; 1650 BlockInfo[MBB.getNumber()].Pred = AvailableInfo; 1651 1652 // Note there's an implicit assumption here that terminators never use 1653 // or modify VL or VTYPE. Also, fallthrough will return end(). 1654 auto InsertPt = UnavailablePred->getFirstInstrTerminator(); 1655 insertVSETVLI(*UnavailablePred, InsertPt, 1656 UnavailablePred->findDebugLoc(InsertPt), 1657 AvailableInfo, OldExit); 1658 } 1659 1660 // Return true if we can mutate PrevMI to match MI without changing any the 1661 // fields which would be observed. 1662 bool RISCVInsertVSETVLI::canMutatePriorConfig( 1663 const MachineInstr &PrevMI, const MachineInstr &MI, 1664 const DemandedFields &Used) const { 1665 // If the VL values aren't equal, return false if either a) the former is 1666 // demanded, or b) we can't rewrite the former to be the later for 1667 // implementation reasons. 1668 if (!isVLPreservingConfig(MI)) { 1669 if (Used.VLAny) 1670 return false; 1671 1672 if (Used.VLZeroness) { 1673 if (isVLPreservingConfig(PrevMI)) 1674 return false; 1675 if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI), 1676 LIS)) 1677 return false; 1678 } 1679 1680 auto &AVL = MI.getOperand(1); 1681 1682 // If the AVL is a register, we need to make sure its definition is the same 1683 // at PrevMI as it was at MI. 1684 if (AVL.isReg() && AVL.getReg() != RISCV::X0) { 1685 VNInfo *VNI = getVNInfoFromReg(AVL.getReg(), MI, LIS); 1686 VNInfo *PrevVNI = getVNInfoFromReg(AVL.getReg(), PrevMI, LIS); 1687 if (!VNI || !PrevVNI || VNI != PrevVNI) 1688 return false; 1689 } 1690 } 1691 1692 assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm()); 1693 auto PriorVType = PrevMI.getOperand(2).getImm(); 1694 auto VType = MI.getOperand(2).getImm(); 1695 return areCompatibleVTYPEs(PriorVType, VType, Used); 1696 } 1697 1698 void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const { 1699 MachineInstr *NextMI = nullptr; 1700 // We can have arbitrary code in successors, so VL and VTYPE 1701 // must be considered demanded. 1702 DemandedFields Used; 1703 Used.demandVL(); 1704 Used.demandVTYPE(); 1705 SmallVector<MachineInstr*> ToDelete; 1706 1707 auto dropAVLUse = [&](MachineOperand &MO) { 1708 if (!MO.isReg() || !MO.getReg().isVirtual()) 1709 return; 1710 Register OldVLReg = MO.getReg(); 1711 MO.setReg(RISCV::NoRegister); 1712 1713 if (LIS) 1714 LIS->shrinkToUses(&LIS->getInterval(OldVLReg)); 1715 1716 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg); 1717 if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) && 1718 MRI->use_nodbg_empty(OldVLReg)) 1719 ToDelete.push_back(VLOpDef); 1720 }; 1721 1722 for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) { 1723 1724 if (!isVectorConfigInstr(MI)) { 1725 Used.doUnion(getDemanded(MI, ST)); 1726 if (MI.isCall() || MI.isInlineAsm() || 1727 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || 1728 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) 1729 NextMI = nullptr; 1730 continue; 1731 } 1732 1733 if (!MI.getOperand(0).isDead()) 1734 Used.demandVL(); 1735 1736 if (NextMI) { 1737 if (!Used.usedVL() && !Used.usedVTYPE()) { 1738 dropAVLUse(MI.getOperand(1)); 1739 if (LIS) 1740 LIS->RemoveMachineInstrFromMaps(MI); 1741 MI.eraseFromParent(); 1742 NumCoalescedVSETVL++; 1743 // Leave NextMI unchanged 1744 continue; 1745 } 1746 1747 if (canMutatePriorConfig(MI, *NextMI, Used)) { 1748 if (!isVLPreservingConfig(*NextMI)) { 1749 Register DefReg = NextMI->getOperand(0).getReg(); 1750 1751 MI.getOperand(0).setReg(DefReg); 1752 MI.getOperand(0).setIsDead(false); 1753 1754 // The def of DefReg moved to MI, so extend the LiveInterval up to 1755 // it. 1756 if (DefReg.isVirtual() && LIS) { 1757 LiveInterval &DefLI = LIS->getInterval(DefReg); 1758 SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot(); 1759 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex()); 1760 LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI); 1761 DefLI.addSegment(S); 1762 DefVNI->def = MISlot; 1763 // Mark DefLI as spillable if it was previously unspillable 1764 DefLI.setWeight(0); 1765 1766 // DefReg may have had no uses, in which case we need to shrink 1767 // the LiveInterval up to MI. 1768 LIS->shrinkToUses(&DefLI); 1769 } 1770 1771 dropAVLUse(MI.getOperand(1)); 1772 if (NextMI->getOperand(1).isImm()) 1773 MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm()); 1774 else 1775 MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), 1776 false); 1777 1778 MI.setDesc(NextMI->getDesc()); 1779 } 1780 MI.getOperand(2).setImm(NextMI->getOperand(2).getImm()); 1781 1782 dropAVLUse(NextMI->getOperand(1)); 1783 if (LIS) 1784 LIS->RemoveMachineInstrFromMaps(*NextMI); 1785 NextMI->eraseFromParent(); 1786 NumCoalescedVSETVL++; 1787 // fallthrough 1788 } 1789 } 1790 NextMI = &MI; 1791 Used = getDemanded(MI, ST); 1792 } 1793 1794 // Loop over the dead AVL values, and delete them now. This has 1795 // to be outside the above loop to avoid invalidating iterators. 1796 for (auto *MI : ToDelete) { 1797 if (LIS) { 1798 LIS->removeInterval(MI->getOperand(0).getReg()); 1799 LIS->RemoveMachineInstrFromMaps(*MI); 1800 } 1801 MI->eraseFromParent(); 1802 } 1803 } 1804 1805 void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { 1806 for (auto I = MBB.begin(), E = MBB.end(); I != E;) { 1807 MachineInstr &MI = *I++; 1808 if (RISCV::isFaultFirstLoad(MI)) { 1809 Register VLOutput = MI.getOperand(1).getReg(); 1810 assert(VLOutput.isVirtual()); 1811 if (!MI.getOperand(1).isDead()) { 1812 auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(), 1813 TII->get(RISCV::PseudoReadVL), VLOutput); 1814 // Move the LiveInterval's definition down to PseudoReadVL. 1815 if (LIS) { 1816 SlotIndex NewDefSI = 1817 LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot(); 1818 LiveInterval &DefLI = LIS->getInterval(VLOutput); 1819 VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex()); 1820 DefLI.removeSegment(DefLI.beginIndex(), NewDefSI); 1821 DefVNI->def = NewDefSI; 1822 } 1823 } 1824 // We don't use the vl output of the VLEFF/VLSEGFF anymore. 1825 MI.getOperand(1).setReg(RISCV::X0); 1826 } 1827 } 1828 } 1829 1830 bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { 1831 // Skip if the vector extension is not enabled. 1832 ST = &MF.getSubtarget<RISCVSubtarget>(); 1833 if (!ST->hasVInstructions()) 1834 return false; 1835 1836 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n"); 1837 1838 TII = ST->getInstrInfo(); 1839 MRI = &MF.getRegInfo(); 1840 auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>(); 1841 LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; 1842 1843 assert(BlockInfo.empty() && "Expect empty block infos"); 1844 BlockInfo.resize(MF.getNumBlockIDs()); 1845 1846 bool HaveVectorOp = false; 1847 1848 // Phase 1 - determine how VL/VTYPE are affected by the each block. 1849 for (const MachineBasicBlock &MBB : MF) { 1850 VSETVLIInfo TmpStatus; 1851 HaveVectorOp |= computeVLVTYPEChanges(MBB, TmpStatus); 1852 // Initial exit state is whatever change we found in the block. 1853 BlockData &BBInfo = BlockInfo[MBB.getNumber()]; 1854 BBInfo.Exit = TmpStatus; 1855 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) 1856 << " is " << BBInfo.Exit << "\n"); 1857 1858 } 1859 1860 // If we didn't find any instructions that need VSETVLI, we're done. 1861 if (!HaveVectorOp) { 1862 BlockInfo.clear(); 1863 return false; 1864 } 1865 1866 // Phase 2 - determine the exit VL/VTYPE from each block. We add all 1867 // blocks to the list here, but will also add any that need to be revisited 1868 // during Phase 2 processing. 1869 for (const MachineBasicBlock &MBB : MF) { 1870 WorkList.push(&MBB); 1871 BlockInfo[MBB.getNumber()].InQueue = true; 1872 } 1873 while (!WorkList.empty()) { 1874 const MachineBasicBlock &MBB = *WorkList.front(); 1875 WorkList.pop(); 1876 computeIncomingVLVTYPE(MBB); 1877 } 1878 1879 // Perform partial redundancy elimination of vsetvli transitions. 1880 for (MachineBasicBlock &MBB : MF) 1881 doPRE(MBB); 1882 1883 // Phase 3 - add any vsetvli instructions needed in the block. Use the 1884 // Phase 2 information to avoid adding vsetvlis before the first vector 1885 // instruction in the block if the VL/VTYPE is satisfied by its 1886 // predecessors. 1887 for (MachineBasicBlock &MBB : MF) 1888 emitVSETVLIs(MBB); 1889 1890 // Now that all vsetvlis are explicit, go through and do block local 1891 // DSE and peephole based demanded fields based transforms. Note that 1892 // this *must* be done outside the main dataflow so long as we allow 1893 // any cross block analysis within the dataflow. We can't have both 1894 // demanded fields based mutation and non-local analysis in the 1895 // dataflow at the same time without introducing inconsistencies. 1896 for (MachineBasicBlock &MBB : MF) 1897 coalesceVSETVLIs(MBB); 1898 1899 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output 1900 // of VLEFF/VLSEGFF. 1901 for (MachineBasicBlock &MBB : MF) 1902 insertReadVL(MBB); 1903 1904 BlockInfo.clear(); 1905 return HaveVectorOp; 1906 } 1907 1908 /// Returns an instance of the Insert VSETVLI pass. 1909 FunctionPass *llvm::createRISCVInsertVSETVLIPass() { 1910 return new RISCVInsertVSETVLI(); 1911 } 1912