1 //===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the RISC-V implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVInstrInfo.h" 14 #include "MCTargetDesc/RISCVBaseInfo.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCV.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "RISCVSubtarget.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/Analysis/MemoryLocation.h" 22 #include "llvm/Analysis/ValueTracking.h" 23 #include "llvm/CodeGen/LiveIntervals.h" 24 #include "llvm/CodeGen/LiveVariables.h" 25 #include "llvm/CodeGen/MachineCombinerPattern.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/MachineTraceMetrics.h" 29 #include "llvm/CodeGen/RegisterScavenging.h" 30 #include "llvm/CodeGen/StackMaps.h" 31 #include "llvm/IR/DebugInfoMetadata.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/MC/MCInstBuilder.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/ErrorHandling.h" 36 37 using namespace llvm; 38 39 #define GEN_CHECK_COMPRESS_INSTR 40 #include "RISCVGenCompressInstEmitter.inc" 41 42 #define GET_INSTRINFO_CTOR_DTOR 43 #define GET_INSTRINFO_NAMED_OPS 44 #include "RISCVGenInstrInfo.inc" 45 46 static cl::opt<bool> PreferWholeRegisterMove( 47 "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden, 48 cl::desc("Prefer whole register move for vector registers.")); 49 50 static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy( 51 "riscv-force-machine-combiner-strategy", cl::Hidden, 52 cl::desc("Force machine combiner to use a specific strategy for machine " 53 "trace metrics evaluation."), 54 cl::init(MachineTraceStrategy::TS_NumStrategies), 55 cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local", 56 "Local strategy."), 57 clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr", 58 "MinInstrCount strategy."))); 59 60 namespace llvm::RISCVVPseudosTable { 61 62 using namespace RISCV; 63 64 #define GET_RISCVVPseudosTable_IMPL 65 #include "RISCVGenSearchableTables.inc" 66 67 } // namespace llvm::RISCVVPseudosTable 68 69 namespace llvm::RISCV { 70 71 #define GET_RISCVMaskedPseudosTable_IMPL 72 #include "RISCVGenSearchableTables.inc" 73 74 } // end namespace llvm::RISCV 75 76 RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI) 77 : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP), 78 STI(STI) {} 79 80 MCInst RISCVInstrInfo::getNop() const { 81 if (STI.hasStdExtCOrZca()) 82 return MCInstBuilder(RISCV::C_NOP); 83 return MCInstBuilder(RISCV::ADDI) 84 .addReg(RISCV::X0) 85 .addReg(RISCV::X0) 86 .addImm(0); 87 } 88 89 Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 90 int &FrameIndex) const { 91 unsigned Dummy; 92 return isLoadFromStackSlot(MI, FrameIndex, Dummy); 93 } 94 95 Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 96 int &FrameIndex, 97 unsigned &MemBytes) const { 98 switch (MI.getOpcode()) { 99 default: 100 return 0; 101 case RISCV::LB: 102 case RISCV::LBU: 103 MemBytes = 1; 104 break; 105 case RISCV::LH: 106 case RISCV::LH_INX: 107 case RISCV::LHU: 108 case RISCV::FLH: 109 MemBytes = 2; 110 break; 111 case RISCV::LW: 112 case RISCV::LW_INX: 113 case RISCV::FLW: 114 case RISCV::LWU: 115 MemBytes = 4; 116 break; 117 case RISCV::LD: 118 case RISCV::FLD: 119 MemBytes = 8; 120 break; 121 } 122 123 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 124 MI.getOperand(2).getImm() == 0) { 125 FrameIndex = MI.getOperand(1).getIndex(); 126 return MI.getOperand(0).getReg(); 127 } 128 129 return 0; 130 } 131 132 Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 133 int &FrameIndex) const { 134 unsigned Dummy; 135 return isStoreToStackSlot(MI, FrameIndex, Dummy); 136 } 137 138 Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 139 int &FrameIndex, 140 unsigned &MemBytes) const { 141 switch (MI.getOpcode()) { 142 default: 143 return 0; 144 case RISCV::SB: 145 MemBytes = 1; 146 break; 147 case RISCV::SH: 148 case RISCV::SH_INX: 149 case RISCV::FSH: 150 MemBytes = 2; 151 break; 152 case RISCV::SW: 153 case RISCV::SW_INX: 154 case RISCV::FSW: 155 MemBytes = 4; 156 break; 157 case RISCV::SD: 158 case RISCV::FSD: 159 MemBytes = 8; 160 break; 161 } 162 163 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 164 MI.getOperand(2).getImm() == 0) { 165 FrameIndex = MI.getOperand(1).getIndex(); 166 return MI.getOperand(0).getReg(); 167 } 168 169 return 0; 170 } 171 172 bool RISCVInstrInfo::isReallyTriviallyReMaterializable( 173 const MachineInstr &MI) const { 174 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { 175 case RISCV::VMV_V_X: 176 case RISCV::VFMV_V_F: 177 case RISCV::VMV_V_I: 178 case RISCV::VMV_S_X: 179 case RISCV::VFMV_S_F: 180 case RISCV::VID_V: 181 return MI.getOperand(1).isUndef(); 182 default: 183 return TargetInstrInfo::isReallyTriviallyReMaterializable(MI); 184 } 185 } 186 187 static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg, 188 unsigned NumRegs) { 189 return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs; 190 } 191 192 static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI, 193 const MachineBasicBlock &MBB, 194 MachineBasicBlock::const_iterator MBBI, 195 MachineBasicBlock::const_iterator &DefMBBI, 196 RISCVII::VLMUL LMul) { 197 if (PreferWholeRegisterMove) 198 return false; 199 200 assert(MBBI->getOpcode() == TargetOpcode::COPY && 201 "Unexpected COPY instruction."); 202 Register SrcReg = MBBI->getOperand(1).getReg(); 203 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 204 205 bool FoundDef = false; 206 bool FirstVSetVLI = false; 207 unsigned FirstSEW = 0; 208 while (MBBI != MBB.begin()) { 209 --MBBI; 210 if (MBBI->isMetaInstruction()) 211 continue; 212 213 if (MBBI->getOpcode() == RISCV::PseudoVSETVLI || 214 MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 || 215 MBBI->getOpcode() == RISCV::PseudoVSETIVLI) { 216 // There is a vsetvli between COPY and source define instruction. 217 // vy = def_vop ... (producing instruction) 218 // ... 219 // vsetvli 220 // ... 221 // vx = COPY vy 222 if (!FoundDef) { 223 if (!FirstVSetVLI) { 224 FirstVSetVLI = true; 225 unsigned FirstVType = MBBI->getOperand(2).getImm(); 226 RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType); 227 FirstSEW = RISCVVType::getSEW(FirstVType); 228 // The first encountered vsetvli must have the same lmul as the 229 // register class of COPY. 230 if (FirstLMul != LMul) 231 return false; 232 } 233 // Only permit `vsetvli x0, x0, vtype` between COPY and the source 234 // define instruction. 235 if (MBBI->getOperand(0).getReg() != RISCV::X0) 236 return false; 237 if (MBBI->getOperand(1).isImm()) 238 return false; 239 if (MBBI->getOperand(1).getReg() != RISCV::X0) 240 return false; 241 continue; 242 } 243 244 // MBBI is the first vsetvli before the producing instruction. 245 unsigned VType = MBBI->getOperand(2).getImm(); 246 // If there is a vsetvli between COPY and the producing instruction. 247 if (FirstVSetVLI) { 248 // If SEW is different, return false. 249 if (RISCVVType::getSEW(VType) != FirstSEW) 250 return false; 251 } 252 253 // If the vsetvli is tail undisturbed, keep the whole register move. 254 if (!RISCVVType::isTailAgnostic(VType)) 255 return false; 256 257 // The checking is conservative. We only have register classes for 258 // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v 259 // for fractional LMUL operations. However, we could not use the vsetvli 260 // lmul for widening operations. The result of widening operation is 261 // 2 x LMUL. 262 return LMul == RISCVVType::getVLMUL(VType); 263 } else if (MBBI->isInlineAsm() || MBBI->isCall()) { 264 return false; 265 } else if (MBBI->getNumDefs()) { 266 // Check all the instructions which will change VL. 267 // For example, vleff has implicit def VL. 268 if (MBBI->modifiesRegister(RISCV::VL, /*TRI=*/nullptr)) 269 return false; 270 271 // Only converting whole register copies to vmv.v.v when the defining 272 // value appears in the explicit operands. 273 for (const MachineOperand &MO : MBBI->explicit_operands()) { 274 if (!MO.isReg() || !MO.isDef()) 275 continue; 276 if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) { 277 // We only permit the source of COPY has the same LMUL as the defined 278 // operand. 279 // There are cases we need to keep the whole register copy if the LMUL 280 // is different. 281 // For example, 282 // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m 283 // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2 284 // # The COPY may be created by vlmul_trunc intrinsic. 285 // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4 286 // 287 // After widening, the valid value will be 4 x e32 elements. If we 288 // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements. 289 // FIXME: The COPY of subregister of Zvlsseg register will not be able 290 // to convert to vmv.v.[v|i] under the constraint. 291 if (MO.getReg() != SrcReg) 292 return false; 293 294 // In widening reduction instructions with LMUL_1 input vector case, 295 // only checking the LMUL is insufficient due to reduction result is 296 // always LMUL_1. 297 // For example, 298 // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu 299 // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27 300 // $v26 = COPY killed renamable $v8 301 // After widening, The valid value will be 1 x e16 elements. If we 302 // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements. 303 uint64_t TSFlags = MBBI->getDesc().TSFlags; 304 if (RISCVII::isRVVWideningReduction(TSFlags)) 305 return false; 306 307 // If the producing instruction does not depend on vsetvli, do not 308 // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD. 309 if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags)) 310 return false; 311 312 // Found the definition. 313 FoundDef = true; 314 DefMBBI = MBBI; 315 break; 316 } 317 } 318 } 319 } 320 321 return false; 322 } 323 324 void RISCVInstrInfo::copyPhysRegVector( 325 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 326 const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc, 327 const TargetRegisterClass *RegClass) const { 328 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 329 RISCVII::VLMUL LMul = RISCVRI::getLMul(RegClass->TSFlags); 330 unsigned NF = RISCVRI::getNF(RegClass->TSFlags); 331 332 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); 333 uint16_t DstEncoding = TRI->getEncodingValue(DstReg); 334 auto [LMulVal, Fractional] = RISCVVType::decodeVLMUL(LMul); 335 assert(!Fractional && "It is impossible be fractional lmul here."); 336 unsigned NumRegs = NF * LMulVal; 337 bool ReversedCopy = 338 forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NumRegs); 339 if (ReversedCopy) { 340 // If the src and dest overlap when copying a tuple, we need to copy the 341 // registers in reverse. 342 SrcEncoding += NumRegs - 1; 343 DstEncoding += NumRegs - 1; 344 } 345 346 unsigned I = 0; 347 auto GetCopyInfo = [&](uint16_t SrcEncoding, uint16_t DstEncoding) 348 -> std::tuple<RISCVII::VLMUL, const TargetRegisterClass &, unsigned, 349 unsigned, unsigned> { 350 if (ReversedCopy) { 351 // For reversed copying, if there are enough aligned registers(8/4/2), we 352 // can do a larger copy(LMUL8/4/2). 353 // Besides, we have already known that DstEncoding is larger than 354 // SrcEncoding in forwardCopyWillClobberTuple, so the difference between 355 // DstEncoding and SrcEncoding should be >= LMUL value we try to use to 356 // avoid clobbering. 357 uint16_t Diff = DstEncoding - SrcEncoding; 358 if (I + 8 <= NumRegs && Diff >= 8 && SrcEncoding % 8 == 7 && 359 DstEncoding % 8 == 7) 360 return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V, 361 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8}; 362 if (I + 4 <= NumRegs && Diff >= 4 && SrcEncoding % 4 == 3 && 363 DstEncoding % 4 == 3) 364 return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V, 365 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4}; 366 if (I + 2 <= NumRegs && Diff >= 2 && SrcEncoding % 2 == 1 && 367 DstEncoding % 2 == 1) 368 return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V, 369 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2}; 370 // Or we should do LMUL1 copying. 371 return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V, 372 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1}; 373 } 374 375 // For forward copying, if source register encoding and destination register 376 // encoding are aligned to 8/4/2, we can do a LMUL8/4/2 copying. 377 if (I + 8 <= NumRegs && SrcEncoding % 8 == 0 && DstEncoding % 8 == 0) 378 return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V, 379 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8}; 380 if (I + 4 <= NumRegs && SrcEncoding % 4 == 0 && DstEncoding % 4 == 0) 381 return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V, 382 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4}; 383 if (I + 2 <= NumRegs && SrcEncoding % 2 == 0 && DstEncoding % 2 == 0) 384 return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V, 385 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2}; 386 // Or we should do LMUL1 copying. 387 return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V, 388 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1}; 389 }; 390 auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass, 391 uint16_t Encoding) { 392 MCRegister Reg = RISCV::V0 + Encoding; 393 if (RISCVRI::getLMul(RegClass.TSFlags) == RISCVII::LMUL_1) 394 return Reg; 395 return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass); 396 }; 397 while (I != NumRegs) { 398 // For non-segment copying, we only do this once as the registers are always 399 // aligned. 400 // For segment copying, we may do this several times. If the registers are 401 // aligned to larger LMUL, we can eliminate some copyings. 402 auto [LMulCopied, RegClass, Opc, VVOpc, VIOpc] = 403 GetCopyInfo(SrcEncoding, DstEncoding); 404 auto [NumCopied, _] = RISCVVType::decodeVLMUL(LMulCopied); 405 406 MachineBasicBlock::const_iterator DefMBBI; 407 if (LMul == LMulCopied && 408 isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) { 409 Opc = VVOpc; 410 if (DefMBBI->getOpcode() == VIOpc) 411 Opc = VIOpc; 412 } 413 414 // Emit actual copying. 415 // For reversed copying, the encoding should be decreased. 416 MCRegister ActualSrcReg = FindRegWithEncoding( 417 RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding); 418 MCRegister ActualDstReg = FindRegWithEncoding( 419 RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding); 420 421 auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg); 422 bool UseVMV_V_I = RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_I; 423 bool UseVMV = UseVMV_V_I || RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_V; 424 if (UseVMV) 425 MIB.addReg(ActualDstReg, RegState::Undef); 426 if (UseVMV_V_I) 427 MIB = MIB.add(DefMBBI->getOperand(2)); 428 else 429 MIB = MIB.addReg(ActualSrcReg, getKillRegState(KillSrc)); 430 if (UseVMV) { 431 const MCInstrDesc &Desc = DefMBBI->getDesc(); 432 MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL 433 unsigned Log2SEW = 434 DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm(); 435 MIB.addImm(Log2SEW ? Log2SEW : 3); // SEW 436 MIB.addImm(0); // tu, mu 437 MIB.addReg(RISCV::VL, RegState::Implicit); 438 MIB.addReg(RISCV::VTYPE, RegState::Implicit); 439 } 440 441 // If we are copying reversely, we should decrease the encoding. 442 SrcEncoding += (ReversedCopy ? -NumCopied : NumCopied); 443 DstEncoding += (ReversedCopy ? -NumCopied : NumCopied); 444 I += NumCopied; 445 } 446 } 447 448 void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 449 MachineBasicBlock::iterator MBBI, 450 const DebugLoc &DL, MCRegister DstReg, 451 MCRegister SrcReg, bool KillSrc, 452 bool RenamableDest, bool RenamableSrc) const { 453 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 454 455 if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) { 456 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg) 457 .addReg(SrcReg, 458 getKillRegState(KillSrc) | getRenamableRegState(RenamableSrc)) 459 .addImm(0); 460 return; 461 } 462 463 if (RISCV::GPRF16RegClass.contains(DstReg, SrcReg)) { 464 BuildMI(MBB, MBBI, DL, get(RISCV::PseudoMV_FPR16INX), DstReg) 465 .addReg(SrcReg, 466 getKillRegState(KillSrc) | getRenamableRegState(RenamableSrc)); 467 return; 468 } 469 470 if (RISCV::GPRF32RegClass.contains(DstReg, SrcReg)) { 471 BuildMI(MBB, MBBI, DL, get(RISCV::PseudoMV_FPR32INX), DstReg) 472 .addReg(SrcReg, 473 getKillRegState(KillSrc) | getRenamableRegState(RenamableSrc)); 474 return; 475 } 476 477 if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) { 478 // Emit an ADDI for both parts of GPRPair. 479 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), 480 TRI->getSubReg(DstReg, RISCV::sub_gpr_even)) 481 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even), 482 getKillRegState(KillSrc)) 483 .addImm(0); 484 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), 485 TRI->getSubReg(DstReg, RISCV::sub_gpr_odd)) 486 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd), 487 getKillRegState(KillSrc)) 488 .addImm(0); 489 return; 490 } 491 492 // Handle copy from csr 493 if (RISCV::VCSRRegClass.contains(SrcReg) && 494 RISCV::GPRRegClass.contains(DstReg)) { 495 BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg) 496 .addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding) 497 .addReg(RISCV::X0); 498 return; 499 } 500 501 if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) { 502 unsigned Opc; 503 if (STI.hasStdExtZfh()) { 504 Opc = RISCV::FSGNJ_H; 505 } else { 506 assert(STI.hasStdExtF() && 507 (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) && 508 "Unexpected extensions"); 509 // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S. 510 DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16, 511 &RISCV::FPR32RegClass); 512 SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16, 513 &RISCV::FPR32RegClass); 514 Opc = RISCV::FSGNJ_S; 515 } 516 BuildMI(MBB, MBBI, DL, get(Opc), DstReg) 517 .addReg(SrcReg, getKillRegState(KillSrc)) 518 .addReg(SrcReg, getKillRegState(KillSrc)); 519 return; 520 } 521 522 if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) { 523 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg) 524 .addReg(SrcReg, getKillRegState(KillSrc)) 525 .addReg(SrcReg, getKillRegState(KillSrc)); 526 return; 527 } 528 529 if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) { 530 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg) 531 .addReg(SrcReg, getKillRegState(KillSrc)) 532 .addReg(SrcReg, getKillRegState(KillSrc)); 533 return; 534 } 535 536 if (RISCV::FPR32RegClass.contains(DstReg) && 537 RISCV::GPRRegClass.contains(SrcReg)) { 538 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg) 539 .addReg(SrcReg, getKillRegState(KillSrc)); 540 return; 541 } 542 543 if (RISCV::GPRRegClass.contains(DstReg) && 544 RISCV::FPR32RegClass.contains(SrcReg)) { 545 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg) 546 .addReg(SrcReg, getKillRegState(KillSrc)); 547 return; 548 } 549 550 if (RISCV::FPR64RegClass.contains(DstReg) && 551 RISCV::GPRRegClass.contains(SrcReg)) { 552 assert(STI.getXLen() == 64 && "Unexpected GPR size"); 553 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg) 554 .addReg(SrcReg, getKillRegState(KillSrc)); 555 return; 556 } 557 558 if (RISCV::GPRRegClass.contains(DstReg) && 559 RISCV::FPR64RegClass.contains(SrcReg)) { 560 assert(STI.getXLen() == 64 && "Unexpected GPR size"); 561 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg) 562 .addReg(SrcReg, getKillRegState(KillSrc)); 563 return; 564 } 565 566 // VR->VR copies. 567 const TargetRegisterClass *RegClass = 568 TRI->getCommonMinimalPhysRegClass(SrcReg, DstReg); 569 if (RISCVRegisterInfo::isRVVRegClass(RegClass)) { 570 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RegClass); 571 return; 572 } 573 574 llvm_unreachable("Impossible reg-to-reg copy"); 575 } 576 577 void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 578 MachineBasicBlock::iterator I, 579 Register SrcReg, bool IsKill, int FI, 580 const TargetRegisterClass *RC, 581 const TargetRegisterInfo *TRI, 582 Register VReg, 583 MachineInstr::MIFlag Flags) const { 584 MachineFunction *MF = MBB.getParent(); 585 MachineFrameInfo &MFI = MF->getFrameInfo(); 586 587 unsigned Opcode; 588 bool IsScalableVector = true; 589 if (RISCV::GPRRegClass.hasSubClassEq(RC)) { 590 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? 591 RISCV::SW : RISCV::SD; 592 IsScalableVector = false; 593 } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) { 594 Opcode = RISCV::SH_INX; 595 IsScalableVector = false; 596 } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) { 597 Opcode = RISCV::SW_INX; 598 IsScalableVector = false; 599 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { 600 Opcode = RISCV::PseudoRV32ZdinxSD; 601 IsScalableVector = false; 602 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { 603 Opcode = RISCV::FSH; 604 IsScalableVector = false; 605 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { 606 Opcode = RISCV::FSW; 607 IsScalableVector = false; 608 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { 609 Opcode = RISCV::FSD; 610 IsScalableVector = false; 611 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { 612 Opcode = RISCV::VS1R_V; 613 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { 614 Opcode = RISCV::VS2R_V; 615 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { 616 Opcode = RISCV::VS4R_V; 617 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { 618 Opcode = RISCV::VS8R_V; 619 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) 620 Opcode = RISCV::PseudoVSPILL2_M1; 621 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) 622 Opcode = RISCV::PseudoVSPILL2_M2; 623 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) 624 Opcode = RISCV::PseudoVSPILL2_M4; 625 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) 626 Opcode = RISCV::PseudoVSPILL3_M1; 627 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) 628 Opcode = RISCV::PseudoVSPILL3_M2; 629 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) 630 Opcode = RISCV::PseudoVSPILL4_M1; 631 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) 632 Opcode = RISCV::PseudoVSPILL4_M2; 633 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) 634 Opcode = RISCV::PseudoVSPILL5_M1; 635 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) 636 Opcode = RISCV::PseudoVSPILL6_M1; 637 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) 638 Opcode = RISCV::PseudoVSPILL7_M1; 639 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) 640 Opcode = RISCV::PseudoVSPILL8_M1; 641 else 642 llvm_unreachable("Can't store this register to stack slot"); 643 644 if (IsScalableVector) { 645 MachineMemOperand *MMO = MF->getMachineMemOperand( 646 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 647 LocationSize::beforeOrAfterPointer(), MFI.getObjectAlign(FI)); 648 649 MFI.setStackID(FI, TargetStackID::ScalableVector); 650 BuildMI(MBB, I, DebugLoc(), get(Opcode)) 651 .addReg(SrcReg, getKillRegState(IsKill)) 652 .addFrameIndex(FI) 653 .addMemOperand(MMO) 654 .setMIFlag(Flags); 655 } else { 656 MachineMemOperand *MMO = MF->getMachineMemOperand( 657 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 658 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 659 660 BuildMI(MBB, I, DebugLoc(), get(Opcode)) 661 .addReg(SrcReg, getKillRegState(IsKill)) 662 .addFrameIndex(FI) 663 .addImm(0) 664 .addMemOperand(MMO) 665 .setMIFlag(Flags); 666 } 667 } 668 669 void RISCVInstrInfo::loadRegFromStackSlot( 670 MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg, 671 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, 672 Register VReg, MachineInstr::MIFlag Flags) const { 673 MachineFunction *MF = MBB.getParent(); 674 MachineFrameInfo &MFI = MF->getFrameInfo(); 675 DebugLoc DL = 676 Flags & MachineInstr::FrameDestroy ? MBB.findDebugLoc(I) : DebugLoc(); 677 678 unsigned Opcode; 679 bool IsScalableVector = true; 680 if (RISCV::GPRRegClass.hasSubClassEq(RC)) { 681 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? 682 RISCV::LW : RISCV::LD; 683 IsScalableVector = false; 684 } else if (RISCV::GPRF16RegClass.hasSubClassEq(RC)) { 685 Opcode = RISCV::LH_INX; 686 IsScalableVector = false; 687 } else if (RISCV::GPRF32RegClass.hasSubClassEq(RC)) { 688 Opcode = RISCV::LW_INX; 689 IsScalableVector = false; 690 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { 691 Opcode = RISCV::PseudoRV32ZdinxLD; 692 IsScalableVector = false; 693 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { 694 Opcode = RISCV::FLH; 695 IsScalableVector = false; 696 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { 697 Opcode = RISCV::FLW; 698 IsScalableVector = false; 699 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { 700 Opcode = RISCV::FLD; 701 IsScalableVector = false; 702 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { 703 Opcode = RISCV::VL1RE8_V; 704 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { 705 Opcode = RISCV::VL2RE8_V; 706 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { 707 Opcode = RISCV::VL4RE8_V; 708 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { 709 Opcode = RISCV::VL8RE8_V; 710 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) 711 Opcode = RISCV::PseudoVRELOAD2_M1; 712 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) 713 Opcode = RISCV::PseudoVRELOAD2_M2; 714 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) 715 Opcode = RISCV::PseudoVRELOAD2_M4; 716 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) 717 Opcode = RISCV::PseudoVRELOAD3_M1; 718 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) 719 Opcode = RISCV::PseudoVRELOAD3_M2; 720 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) 721 Opcode = RISCV::PseudoVRELOAD4_M1; 722 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) 723 Opcode = RISCV::PseudoVRELOAD4_M2; 724 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) 725 Opcode = RISCV::PseudoVRELOAD5_M1; 726 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) 727 Opcode = RISCV::PseudoVRELOAD6_M1; 728 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) 729 Opcode = RISCV::PseudoVRELOAD7_M1; 730 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) 731 Opcode = RISCV::PseudoVRELOAD8_M1; 732 else 733 llvm_unreachable("Can't load this register from stack slot"); 734 735 if (IsScalableVector) { 736 MachineMemOperand *MMO = MF->getMachineMemOperand( 737 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 738 LocationSize::beforeOrAfterPointer(), MFI.getObjectAlign(FI)); 739 740 MFI.setStackID(FI, TargetStackID::ScalableVector); 741 BuildMI(MBB, I, DL, get(Opcode), DstReg) 742 .addFrameIndex(FI) 743 .addMemOperand(MMO) 744 .setMIFlag(Flags); 745 } else { 746 MachineMemOperand *MMO = MF->getMachineMemOperand( 747 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 748 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 749 750 BuildMI(MBB, I, DL, get(Opcode), DstReg) 751 .addFrameIndex(FI) 752 .addImm(0) 753 .addMemOperand(MMO) 754 .setMIFlag(Flags); 755 } 756 } 757 758 MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( 759 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 760 MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, 761 VirtRegMap *VRM) const { 762 // The below optimizations narrow the load so they are only valid for little 763 // endian. 764 // TODO: Support big endian by adding an offset into the frame object? 765 if (MF.getDataLayout().isBigEndian()) 766 return nullptr; 767 768 // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w. 769 if (Ops.size() != 1 || Ops[0] != 1) 770 return nullptr; 771 772 unsigned LoadOpc; 773 switch (MI.getOpcode()) { 774 default: 775 if (RISCV::isSEXT_W(MI)) { 776 LoadOpc = RISCV::LW; 777 break; 778 } 779 if (RISCV::isZEXT_W(MI)) { 780 LoadOpc = RISCV::LWU; 781 break; 782 } 783 if (RISCV::isZEXT_B(MI)) { 784 LoadOpc = RISCV::LBU; 785 break; 786 } 787 if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VMV_X_S) { 788 unsigned Log2SEW = 789 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); 790 if (STI.getXLen() < (1U << Log2SEW)) 791 return nullptr; 792 switch (Log2SEW) { 793 case 3: 794 LoadOpc = RISCV::LB; 795 break; 796 case 4: 797 LoadOpc = RISCV::LH; 798 break; 799 case 5: 800 LoadOpc = RISCV::LW; 801 break; 802 case 6: 803 LoadOpc = RISCV::LD; 804 break; 805 default: 806 llvm_unreachable("Unexpected SEW"); 807 } 808 break; 809 } 810 if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VFMV_F_S) { 811 unsigned Log2SEW = 812 MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm(); 813 switch (Log2SEW) { 814 case 4: 815 LoadOpc = RISCV::FLH; 816 break; 817 case 5: 818 LoadOpc = RISCV::FLW; 819 break; 820 case 6: 821 LoadOpc = RISCV::FLD; 822 break; 823 default: 824 llvm_unreachable("Unexpected SEW"); 825 } 826 break; 827 } 828 return nullptr; 829 case RISCV::SEXT_H: 830 LoadOpc = RISCV::LH; 831 break; 832 case RISCV::SEXT_B: 833 LoadOpc = RISCV::LB; 834 break; 835 case RISCV::ZEXT_H_RV32: 836 case RISCV::ZEXT_H_RV64: 837 LoadOpc = RISCV::LHU; 838 break; 839 } 840 841 Register DstReg = MI.getOperand(0).getReg(); 842 return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc), 843 DstReg) 844 .addFrameIndex(FrameIndex) 845 .addImm(0); 846 } 847 848 void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, 849 MachineBasicBlock::iterator MBBI, 850 const DebugLoc &DL, Register DstReg, uint64_t Val, 851 MachineInstr::MIFlag Flag, bool DstRenamable, 852 bool DstIsDead) const { 853 Register SrcReg = RISCV::X0; 854 855 // For RV32, allow a sign or unsigned 32 bit value. 856 if (!STI.is64Bit() && !isInt<32>(Val)) { 857 // If have a uimm32 it will still fit in a register so we can allow it. 858 if (!isUInt<32>(Val)) 859 report_fatal_error("Should only materialize 32-bit constants for RV32"); 860 861 // Sign extend for generateInstSeq. 862 Val = SignExtend64<32>(Val); 863 } 864 865 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI); 866 assert(!Seq.empty()); 867 868 bool SrcRenamable = false; 869 unsigned Num = 0; 870 871 for (const RISCVMatInt::Inst &Inst : Seq) { 872 bool LastItem = ++Num == Seq.size(); 873 unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) | 874 getRenamableRegState(DstRenamable); 875 unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) | 876 getRenamableRegState(SrcRenamable); 877 switch (Inst.getOpndKind()) { 878 case RISCVMatInt::Imm: 879 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 880 .addReg(DstReg, RegState::Define | DstRegState) 881 .addImm(Inst.getImm()) 882 .setMIFlag(Flag); 883 break; 884 case RISCVMatInt::RegX0: 885 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 886 .addReg(DstReg, RegState::Define | DstRegState) 887 .addReg(SrcReg, SrcRegState) 888 .addReg(RISCV::X0) 889 .setMIFlag(Flag); 890 break; 891 case RISCVMatInt::RegReg: 892 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 893 .addReg(DstReg, RegState::Define | DstRegState) 894 .addReg(SrcReg, SrcRegState) 895 .addReg(SrcReg, SrcRegState) 896 .setMIFlag(Flag); 897 break; 898 case RISCVMatInt::RegImm: 899 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 900 .addReg(DstReg, RegState::Define | DstRegState) 901 .addReg(SrcReg, SrcRegState) 902 .addImm(Inst.getImm()) 903 .setMIFlag(Flag); 904 break; 905 } 906 907 // Only the first instruction has X0 as its source. 908 SrcReg = DstReg; 909 SrcRenamable = DstRenamable; 910 } 911 } 912 913 static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) { 914 switch (Opc) { 915 default: 916 return RISCVCC::COND_INVALID; 917 case RISCV::CV_BEQIMM: 918 return RISCVCC::COND_EQ; 919 case RISCV::CV_BNEIMM: 920 return RISCVCC::COND_NE; 921 case RISCV::BEQ: 922 return RISCVCC::COND_EQ; 923 case RISCV::BNE: 924 return RISCVCC::COND_NE; 925 case RISCV::BLT: 926 return RISCVCC::COND_LT; 927 case RISCV::BGE: 928 return RISCVCC::COND_GE; 929 case RISCV::BLTU: 930 return RISCVCC::COND_LTU; 931 case RISCV::BGEU: 932 return RISCVCC::COND_GEU; 933 } 934 } 935 936 // The contents of values added to Cond are not examined outside of 937 // RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we 938 // push BranchOpcode, Reg1, Reg2. 939 static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target, 940 SmallVectorImpl<MachineOperand> &Cond) { 941 // Block ends with fall-through condbranch. 942 assert(LastInst.getDesc().isConditionalBranch() && 943 "Unknown conditional branch"); 944 Target = LastInst.getOperand(2).getMBB(); 945 unsigned CC = getCondFromBranchOpc(LastInst.getOpcode()); 946 Cond.push_back(MachineOperand::CreateImm(CC)); 947 Cond.push_back(LastInst.getOperand(0)); 948 Cond.push_back(LastInst.getOperand(1)); 949 } 950 951 unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC, bool Imm) { 952 switch (CC) { 953 default: 954 llvm_unreachable("Unknown condition code!"); 955 case RISCVCC::COND_EQ: 956 return Imm ? RISCV::CV_BEQIMM : RISCV::BEQ; 957 case RISCVCC::COND_NE: 958 return Imm ? RISCV::CV_BNEIMM : RISCV::BNE; 959 case RISCVCC::COND_LT: 960 return RISCV::BLT; 961 case RISCVCC::COND_GE: 962 return RISCV::BGE; 963 case RISCVCC::COND_LTU: 964 return RISCV::BLTU; 965 case RISCVCC::COND_GEU: 966 return RISCV::BGEU; 967 } 968 } 969 970 const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC, 971 bool Imm) const { 972 return get(RISCVCC::getBrCond(CC, Imm)); 973 } 974 975 RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) { 976 switch (CC) { 977 default: 978 llvm_unreachable("Unrecognized conditional branch"); 979 case RISCVCC::COND_EQ: 980 return RISCVCC::COND_NE; 981 case RISCVCC::COND_NE: 982 return RISCVCC::COND_EQ; 983 case RISCVCC::COND_LT: 984 return RISCVCC::COND_GE; 985 case RISCVCC::COND_GE: 986 return RISCVCC::COND_LT; 987 case RISCVCC::COND_LTU: 988 return RISCVCC::COND_GEU; 989 case RISCVCC::COND_GEU: 990 return RISCVCC::COND_LTU; 991 } 992 } 993 994 bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, 995 MachineBasicBlock *&TBB, 996 MachineBasicBlock *&FBB, 997 SmallVectorImpl<MachineOperand> &Cond, 998 bool AllowModify) const { 999 TBB = FBB = nullptr; 1000 Cond.clear(); 1001 1002 // If the block has no terminators, it just falls into the block after it. 1003 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 1004 if (I == MBB.end() || !isUnpredicatedTerminator(*I)) 1005 return false; 1006 1007 // Count the number of terminators and find the first unconditional or 1008 // indirect branch. 1009 MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end(); 1010 int NumTerminators = 0; 1011 for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J); 1012 J++) { 1013 NumTerminators++; 1014 if (J->getDesc().isUnconditionalBranch() || 1015 J->getDesc().isIndirectBranch()) { 1016 FirstUncondOrIndirectBr = J.getReverse(); 1017 } 1018 } 1019 1020 // If AllowModify is true, we can erase any terminators after 1021 // FirstUncondOrIndirectBR. 1022 if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) { 1023 while (std::next(FirstUncondOrIndirectBr) != MBB.end()) { 1024 std::next(FirstUncondOrIndirectBr)->eraseFromParent(); 1025 NumTerminators--; 1026 } 1027 I = FirstUncondOrIndirectBr; 1028 } 1029 1030 // We can't handle blocks that end in an indirect branch. 1031 if (I->getDesc().isIndirectBranch()) 1032 return true; 1033 1034 // We can't handle Generic branch opcodes from Global ISel. 1035 if (I->isPreISelOpcode()) 1036 return true; 1037 1038 // We can't handle blocks with more than 2 terminators. 1039 if (NumTerminators > 2) 1040 return true; 1041 1042 // Handle a single unconditional branch. 1043 if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) { 1044 TBB = getBranchDestBlock(*I); 1045 return false; 1046 } 1047 1048 // Handle a single conditional branch. 1049 if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) { 1050 parseCondBranch(*I, TBB, Cond); 1051 return false; 1052 } 1053 1054 // Handle a conditional branch followed by an unconditional branch. 1055 if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() && 1056 I->getDesc().isUnconditionalBranch()) { 1057 parseCondBranch(*std::prev(I), TBB, Cond); 1058 FBB = getBranchDestBlock(*I); 1059 return false; 1060 } 1061 1062 // Otherwise, we can't handle this. 1063 return true; 1064 } 1065 1066 unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, 1067 int *BytesRemoved) const { 1068 if (BytesRemoved) 1069 *BytesRemoved = 0; 1070 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 1071 if (I == MBB.end()) 1072 return 0; 1073 1074 if (!I->getDesc().isUnconditionalBranch() && 1075 !I->getDesc().isConditionalBranch()) 1076 return 0; 1077 1078 // Remove the branch. 1079 if (BytesRemoved) 1080 *BytesRemoved += getInstSizeInBytes(*I); 1081 I->eraseFromParent(); 1082 1083 I = MBB.end(); 1084 1085 if (I == MBB.begin()) 1086 return 1; 1087 --I; 1088 if (!I->getDesc().isConditionalBranch()) 1089 return 1; 1090 1091 // Remove the branch. 1092 if (BytesRemoved) 1093 *BytesRemoved += getInstSizeInBytes(*I); 1094 I->eraseFromParent(); 1095 return 2; 1096 } 1097 1098 // Inserts a branch into the end of the specific MachineBasicBlock, returning 1099 // the number of instructions inserted. 1100 unsigned RISCVInstrInfo::insertBranch( 1101 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 1102 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { 1103 if (BytesAdded) 1104 *BytesAdded = 0; 1105 1106 // Shouldn't be a fall through. 1107 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 1108 assert((Cond.size() == 3 || Cond.size() == 0) && 1109 "RISC-V branch conditions have two components!"); 1110 1111 // Unconditional branch. 1112 if (Cond.empty()) { 1113 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB); 1114 if (BytesAdded) 1115 *BytesAdded += getInstSizeInBytes(MI); 1116 return 1; 1117 } 1118 1119 // Either a one or two-way conditional branch. 1120 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1121 MachineInstr &CondMI = *BuildMI(&MBB, DL, getBrCond(CC, Cond[2].isImm())) 1122 .add(Cond[1]) 1123 .add(Cond[2]) 1124 .addMBB(TBB); 1125 if (BytesAdded) 1126 *BytesAdded += getInstSizeInBytes(CondMI); 1127 1128 // One-way conditional branch. 1129 if (!FBB) 1130 return 1; 1131 1132 // Two-way conditional branch. 1133 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB); 1134 if (BytesAdded) 1135 *BytesAdded += getInstSizeInBytes(MI); 1136 return 2; 1137 } 1138 1139 void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, 1140 MachineBasicBlock &DestBB, 1141 MachineBasicBlock &RestoreBB, 1142 const DebugLoc &DL, int64_t BrOffset, 1143 RegScavenger *RS) const { 1144 assert(RS && "RegScavenger required for long branching"); 1145 assert(MBB.empty() && 1146 "new block should be inserted for expanding unconditional branch"); 1147 assert(MBB.pred_size() == 1); 1148 assert(RestoreBB.empty() && 1149 "restore block should be inserted for restoring clobbered registers"); 1150 1151 MachineFunction *MF = MBB.getParent(); 1152 MachineRegisterInfo &MRI = MF->getRegInfo(); 1153 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>(); 1154 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1155 1156 if (!isInt<32>(BrOffset)) 1157 report_fatal_error( 1158 "Branch offsets outside of the signed 32-bit range not supported"); 1159 1160 // FIXME: A virtual register must be used initially, as the register 1161 // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch 1162 // uses the same workaround). 1163 Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRJALRRegClass); 1164 auto II = MBB.end(); 1165 // We may also update the jump target to RestoreBB later. 1166 MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump)) 1167 .addReg(ScratchReg, RegState::Define | RegState::Dead) 1168 .addMBB(&DestBB, RISCVII::MO_CALL); 1169 1170 RS->enterBasicBlockEnd(MBB); 1171 Register TmpGPR = 1172 RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(), 1173 /*RestoreAfter=*/false, /*SpAdj=*/0, 1174 /*AllowSpill=*/false); 1175 if (TmpGPR != RISCV::NoRegister) 1176 RS->setRegUsed(TmpGPR); 1177 else { 1178 // The case when there is no scavenged register needs special handling. 1179 1180 // Pick s11 because it doesn't make a difference. 1181 TmpGPR = RISCV::X27; 1182 1183 int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex(); 1184 if (FrameIndex == -1) 1185 report_fatal_error("underestimated function size"); 1186 1187 storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex, 1188 &RISCV::GPRRegClass, TRI, Register()); 1189 TRI->eliminateFrameIndex(std::prev(MI.getIterator()), 1190 /*SpAdj=*/0, /*FIOperandNum=*/1); 1191 1192 MI.getOperand(1).setMBB(&RestoreBB); 1193 1194 loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex, 1195 &RISCV::GPRRegClass, TRI, Register()); 1196 TRI->eliminateFrameIndex(RestoreBB.back(), 1197 /*SpAdj=*/0, /*FIOperandNum=*/1); 1198 } 1199 1200 MRI.replaceRegWith(ScratchReg, TmpGPR); 1201 MRI.clearVirtRegs(); 1202 } 1203 1204 bool RISCVInstrInfo::reverseBranchCondition( 1205 SmallVectorImpl<MachineOperand> &Cond) const { 1206 assert((Cond.size() == 3) && "Invalid branch condition!"); 1207 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1208 Cond[0].setImm(getOppositeBranchCondition(CC)); 1209 return false; 1210 } 1211 1212 bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const { 1213 MachineBasicBlock *MBB = MI.getParent(); 1214 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1215 1216 MachineBasicBlock *TBB, *FBB; 1217 SmallVector<MachineOperand, 3> Cond; 1218 if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false)) 1219 return false; 1220 1221 RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1222 assert(CC != RISCVCC::COND_INVALID); 1223 1224 if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE) 1225 return false; 1226 1227 // For two constants C0 and C1 from 1228 // ``` 1229 // li Y, C0 1230 // li Z, C1 1231 // ``` 1232 // 1. if C1 = C0 + 1 1233 // we can turn: 1234 // (a) blt Y, X -> bge X, Z 1235 // (b) bge Y, X -> blt X, Z 1236 // 1237 // 2. if C1 = C0 - 1 1238 // we can turn: 1239 // (a) blt X, Y -> bge Z, X 1240 // (b) bge X, Y -> blt Z, X 1241 // 1242 // To make sure this optimization is really beneficial, we only 1243 // optimize for cases where Y had only one use (i.e. only used by the branch). 1244 1245 // Right now we only care about LI (i.e. ADDI x0, imm) 1246 auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool { 1247 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() && 1248 MI->getOperand(1).getReg() == RISCV::X0) { 1249 Imm = MI->getOperand(2).getImm(); 1250 return true; 1251 } 1252 return false; 1253 }; 1254 // Either a load from immediate instruction or X0. 1255 auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool { 1256 if (!Op.isReg()) 1257 return false; 1258 Register Reg = Op.getReg(); 1259 return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm); 1260 }; 1261 1262 MachineOperand &LHS = MI.getOperand(0); 1263 MachineOperand &RHS = MI.getOperand(1); 1264 // Try to find the register for constant Z; return 1265 // invalid register otherwise. 1266 auto searchConst = [&](int64_t C1) -> Register { 1267 MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend(); 1268 auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool { 1269 int64_t Imm; 1270 return isLoadImm(&I, Imm) && Imm == C1 && 1271 I.getOperand(0).getReg().isVirtual(); 1272 }); 1273 if (DefC1 != E) 1274 return DefC1->getOperand(0).getReg(); 1275 1276 return Register(); 1277 }; 1278 1279 bool Modify = false; 1280 int64_t C0; 1281 if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) { 1282 // Might be case 1. 1283 // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need 1284 // to worry about unsigned overflow here) 1285 if (C0 < INT64_MAX) 1286 if (Register RegZ = searchConst(C0 + 1)) { 1287 reverseBranchCondition(Cond); 1288 Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false); 1289 Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); 1290 // We might extend the live range of Z, clear its kill flag to 1291 // account for this. 1292 MRI.clearKillFlags(RegZ); 1293 Modify = true; 1294 } 1295 } else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) { 1296 // Might be case 2. 1297 // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX 1298 // when C0 is zero. 1299 if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0) 1300 if (Register RegZ = searchConst(C0 - 1)) { 1301 reverseBranchCondition(Cond); 1302 Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); 1303 Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false); 1304 // We might extend the live range of Z, clear its kill flag to 1305 // account for this. 1306 MRI.clearKillFlags(RegZ); 1307 Modify = true; 1308 } 1309 } 1310 1311 if (!Modify) 1312 return false; 1313 1314 // Build the new branch and remove the old one. 1315 BuildMI(*MBB, MI, MI.getDebugLoc(), 1316 getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm()))) 1317 .add(Cond[1]) 1318 .add(Cond[2]) 1319 .addMBB(TBB); 1320 MI.eraseFromParent(); 1321 1322 return true; 1323 } 1324 1325 MachineBasicBlock * 1326 RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 1327 assert(MI.getDesc().isBranch() && "Unexpected opcode!"); 1328 // The branch target is always the last operand. 1329 int NumOp = MI.getNumExplicitOperands(); 1330 return MI.getOperand(NumOp - 1).getMBB(); 1331 } 1332 1333 bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp, 1334 int64_t BrOffset) const { 1335 unsigned XLen = STI.getXLen(); 1336 // Ideally we could determine the supported branch offset from the 1337 // RISCVII::FormMask, but this can't be used for Pseudo instructions like 1338 // PseudoBR. 1339 switch (BranchOp) { 1340 default: 1341 llvm_unreachable("Unexpected opcode!"); 1342 case RISCV::BEQ: 1343 case RISCV::BNE: 1344 case RISCV::BLT: 1345 case RISCV::BGE: 1346 case RISCV::BLTU: 1347 case RISCV::BGEU: 1348 case RISCV::CV_BEQIMM: 1349 case RISCV::CV_BNEIMM: 1350 return isIntN(13, BrOffset); 1351 case RISCV::JAL: 1352 case RISCV::PseudoBR: 1353 return isIntN(21, BrOffset); 1354 case RISCV::PseudoJump: 1355 return isIntN(32, SignExtend64(BrOffset + 0x800, XLen)); 1356 } 1357 } 1358 1359 // If the operation has a predicated pseudo instruction, return the pseudo 1360 // instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END. 1361 // TODO: Support more operations. 1362 unsigned getPredicatedOpcode(unsigned Opcode) { 1363 switch (Opcode) { 1364 case RISCV::ADD: return RISCV::PseudoCCADD; break; 1365 case RISCV::SUB: return RISCV::PseudoCCSUB; break; 1366 case RISCV::SLL: return RISCV::PseudoCCSLL; break; 1367 case RISCV::SRL: return RISCV::PseudoCCSRL; break; 1368 case RISCV::SRA: return RISCV::PseudoCCSRA; break; 1369 case RISCV::AND: return RISCV::PseudoCCAND; break; 1370 case RISCV::OR: return RISCV::PseudoCCOR; break; 1371 case RISCV::XOR: return RISCV::PseudoCCXOR; break; 1372 1373 case RISCV::ADDI: return RISCV::PseudoCCADDI; break; 1374 case RISCV::SLLI: return RISCV::PseudoCCSLLI; break; 1375 case RISCV::SRLI: return RISCV::PseudoCCSRLI; break; 1376 case RISCV::SRAI: return RISCV::PseudoCCSRAI; break; 1377 case RISCV::ANDI: return RISCV::PseudoCCANDI; break; 1378 case RISCV::ORI: return RISCV::PseudoCCORI; break; 1379 case RISCV::XORI: return RISCV::PseudoCCXORI; break; 1380 1381 case RISCV::ADDW: return RISCV::PseudoCCADDW; break; 1382 case RISCV::SUBW: return RISCV::PseudoCCSUBW; break; 1383 case RISCV::SLLW: return RISCV::PseudoCCSLLW; break; 1384 case RISCV::SRLW: return RISCV::PseudoCCSRLW; break; 1385 case RISCV::SRAW: return RISCV::PseudoCCSRAW; break; 1386 1387 case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break; 1388 case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break; 1389 case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break; 1390 case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break; 1391 1392 case RISCV::ANDN: return RISCV::PseudoCCANDN; break; 1393 case RISCV::ORN: return RISCV::PseudoCCORN; break; 1394 case RISCV::XNOR: return RISCV::PseudoCCXNOR; break; 1395 } 1396 1397 return RISCV::INSTRUCTION_LIST_END; 1398 } 1399 1400 /// Identify instructions that can be folded into a CCMOV instruction, and 1401 /// return the defining instruction. 1402 static MachineInstr *canFoldAsPredicatedOp(Register Reg, 1403 const MachineRegisterInfo &MRI, 1404 const TargetInstrInfo *TII) { 1405 if (!Reg.isVirtual()) 1406 return nullptr; 1407 if (!MRI.hasOneNonDBGUse(Reg)) 1408 return nullptr; 1409 MachineInstr *MI = MRI.getVRegDef(Reg); 1410 if (!MI) 1411 return nullptr; 1412 // Check if MI can be predicated and folded into the CCMOV. 1413 if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END) 1414 return nullptr; 1415 // Don't predicate li idiom. 1416 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() && 1417 MI->getOperand(1).getReg() == RISCV::X0) 1418 return nullptr; 1419 // Check if MI has any other defs or physreg uses. 1420 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) { 1421 // Reject frame index operands, PEI can't handle the predicated pseudos. 1422 if (MO.isFI() || MO.isCPI() || MO.isJTI()) 1423 return nullptr; 1424 if (!MO.isReg()) 1425 continue; 1426 // MI can't have any tied operands, that would conflict with predication. 1427 if (MO.isTied()) 1428 return nullptr; 1429 if (MO.isDef()) 1430 return nullptr; 1431 // Allow constant physregs. 1432 if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(MO.getReg())) 1433 return nullptr; 1434 } 1435 bool DontMoveAcrossStores = true; 1436 if (!MI->isSafeToMove(DontMoveAcrossStores)) 1437 return nullptr; 1438 return MI; 1439 } 1440 1441 bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI, 1442 SmallVectorImpl<MachineOperand> &Cond, 1443 unsigned &TrueOp, unsigned &FalseOp, 1444 bool &Optimizable) const { 1445 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR && 1446 "Unknown select instruction"); 1447 // CCMOV operands: 1448 // 0: Def. 1449 // 1: LHS of compare. 1450 // 2: RHS of compare. 1451 // 3: Condition code. 1452 // 4: False use. 1453 // 5: True use. 1454 TrueOp = 5; 1455 FalseOp = 4; 1456 Cond.push_back(MI.getOperand(1)); 1457 Cond.push_back(MI.getOperand(2)); 1458 Cond.push_back(MI.getOperand(3)); 1459 // We can only fold when we support short forward branch opt. 1460 Optimizable = STI.hasShortForwardBranchOpt(); 1461 return false; 1462 } 1463 1464 MachineInstr * 1465 RISCVInstrInfo::optimizeSelect(MachineInstr &MI, 1466 SmallPtrSetImpl<MachineInstr *> &SeenMIs, 1467 bool PreferFalse) const { 1468 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR && 1469 "Unknown select instruction"); 1470 if (!STI.hasShortForwardBranchOpt()) 1471 return nullptr; 1472 1473 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1474 MachineInstr *DefMI = 1475 canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this); 1476 bool Invert = !DefMI; 1477 if (!DefMI) 1478 DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this); 1479 if (!DefMI) 1480 return nullptr; 1481 1482 // Find new register class to use. 1483 MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4); 1484 Register DestReg = MI.getOperand(0).getReg(); 1485 const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); 1486 if (!MRI.constrainRegClass(DestReg, PreviousClass)) 1487 return nullptr; 1488 1489 unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode()); 1490 assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!"); 1491 1492 // Create a new predicated version of DefMI. 1493 MachineInstrBuilder NewMI = 1494 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg); 1495 1496 // Copy the condition portion. 1497 NewMI.add(MI.getOperand(1)); 1498 NewMI.add(MI.getOperand(2)); 1499 1500 // Add condition code, inverting if necessary. 1501 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 1502 if (Invert) 1503 CC = RISCVCC::getOppositeBranchCondition(CC); 1504 NewMI.addImm(CC); 1505 1506 // Copy the false register. 1507 NewMI.add(FalseReg); 1508 1509 // Copy all the DefMI operands. 1510 const MCInstrDesc &DefDesc = DefMI->getDesc(); 1511 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i) 1512 NewMI.add(DefMI->getOperand(i)); 1513 1514 // Update SeenMIs set: register newly created MI and erase removed DefMI. 1515 SeenMIs.insert(NewMI); 1516 SeenMIs.erase(DefMI); 1517 1518 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on 1519 // DefMI would be invalid when tranferred inside the loop. Checking for a 1520 // loop is expensive, but at least remove kill flags if they are in different 1521 // BBs. 1522 if (DefMI->getParent() != MI.getParent()) 1523 NewMI->clearKillInfo(); 1524 1525 // The caller will erase MI, but not DefMI. 1526 DefMI->eraseFromParent(); 1527 return NewMI; 1528 } 1529 1530 unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 1531 if (MI.isMetaInstruction()) 1532 return 0; 1533 1534 unsigned Opcode = MI.getOpcode(); 1535 1536 if (Opcode == TargetOpcode::INLINEASM || 1537 Opcode == TargetOpcode::INLINEASM_BR) { 1538 const MachineFunction &MF = *MI.getParent()->getParent(); 1539 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), 1540 *MF.getTarget().getMCAsmInfo()); 1541 } 1542 1543 if (!MI.memoperands_empty()) { 1544 MachineMemOperand *MMO = *(MI.memoperands_begin()); 1545 if (STI.hasStdExtZihintntl() && MMO->isNonTemporal()) { 1546 if (STI.hasStdExtCOrZca() && STI.enableRVCHintInstrs()) { 1547 if (isCompressibleInst(MI, STI)) 1548 return 4; // c.ntl.all + c.load/c.store 1549 return 6; // c.ntl.all + load/store 1550 } 1551 return 8; // ntl.all + load/store 1552 } 1553 } 1554 1555 if (Opcode == TargetOpcode::BUNDLE) 1556 return getInstBundleLength(MI); 1557 1558 if (MI.getParent() && MI.getParent()->getParent()) { 1559 if (isCompressibleInst(MI, STI)) 1560 return 2; 1561 } 1562 1563 switch (Opcode) { 1564 case RISCV::PseudoMV_FPR16INX: 1565 case RISCV::PseudoMV_FPR32INX: 1566 // MV is always compressible to either c.mv or c.li rd, 0. 1567 return STI.hasStdExtCOrZca() ? 2 : 4; 1568 case TargetOpcode::STACKMAP: 1569 // The upper bound for a stackmap intrinsic is the full length of its shadow 1570 return StackMapOpers(&MI).getNumPatchBytes(); 1571 case TargetOpcode::PATCHPOINT: 1572 // The size of the patchpoint intrinsic is the number of bytes requested 1573 return PatchPointOpers(&MI).getNumPatchBytes(); 1574 case TargetOpcode::STATEPOINT: { 1575 // The size of the statepoint intrinsic is the number of bytes requested 1576 unsigned NumBytes = StatepointOpers(&MI).getNumPatchBytes(); 1577 // No patch bytes means at most a PseudoCall is emitted 1578 return std::max(NumBytes, 8U); 1579 } 1580 case TargetOpcode::PATCHABLE_FUNCTION_ENTER: 1581 case TargetOpcode::PATCHABLE_FUNCTION_EXIT: 1582 case TargetOpcode::PATCHABLE_TAIL_CALL: { 1583 const MachineFunction &MF = *MI.getParent()->getParent(); 1584 const Function &F = MF.getFunction(); 1585 if (Opcode == TargetOpcode::PATCHABLE_FUNCTION_ENTER && 1586 F.hasFnAttribute("patchable-function-entry")) { 1587 unsigned Num; 1588 if (F.getFnAttribute("patchable-function-entry") 1589 .getValueAsString() 1590 .getAsInteger(10, Num)) 1591 return get(Opcode).getSize(); 1592 1593 // Number of C.NOP or NOP 1594 return (STI.hasStdExtCOrZca() ? 2 : 4) * Num; 1595 } 1596 // XRay uses C.JAL + 21 or 33 C.NOP for each sled in RV32 and RV64, 1597 // respectively. 1598 return STI.is64Bit() ? 68 : 44; 1599 } 1600 default: 1601 return get(Opcode).getSize(); 1602 } 1603 } 1604 1605 unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const { 1606 unsigned Size = 0; 1607 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 1608 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 1609 while (++I != E && I->isInsideBundle()) { 1610 assert(!I->isBundle() && "No nested bundle!"); 1611 Size += getInstSizeInBytes(*I); 1612 } 1613 return Size; 1614 } 1615 1616 bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 1617 const unsigned Opcode = MI.getOpcode(); 1618 switch (Opcode) { 1619 default: 1620 break; 1621 case RISCV::FSGNJ_D: 1622 case RISCV::FSGNJ_S: 1623 case RISCV::FSGNJ_H: 1624 case RISCV::FSGNJ_D_INX: 1625 case RISCV::FSGNJ_D_IN32X: 1626 case RISCV::FSGNJ_S_INX: 1627 case RISCV::FSGNJ_H_INX: 1628 // The canonical floating-point move is fsgnj rd, rs, rs. 1629 return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && 1630 MI.getOperand(1).getReg() == MI.getOperand(2).getReg(); 1631 case RISCV::ADDI: 1632 case RISCV::ORI: 1633 case RISCV::XORI: 1634 return (MI.getOperand(1).isReg() && 1635 MI.getOperand(1).getReg() == RISCV::X0) || 1636 (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0); 1637 } 1638 return MI.isAsCheapAsAMove(); 1639 } 1640 1641 std::optional<DestSourcePair> 1642 RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 1643 if (MI.isMoveReg()) 1644 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1645 switch (MI.getOpcode()) { 1646 default: 1647 break; 1648 case RISCV::ADDI: 1649 // Operand 1 can be a frameindex but callers expect registers 1650 if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && 1651 MI.getOperand(2).getImm() == 0) 1652 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1653 break; 1654 case RISCV::FSGNJ_D: 1655 case RISCV::FSGNJ_S: 1656 case RISCV::FSGNJ_H: 1657 case RISCV::FSGNJ_D_INX: 1658 case RISCV::FSGNJ_D_IN32X: 1659 case RISCV::FSGNJ_S_INX: 1660 case RISCV::FSGNJ_H_INX: 1661 // The canonical floating-point move is fsgnj rd, rs, rs. 1662 if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && 1663 MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) 1664 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1665 break; 1666 } 1667 return std::nullopt; 1668 } 1669 1670 MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const { 1671 if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) { 1672 // The option is unused. Choose Local strategy only for in-order cores. When 1673 // scheduling model is unspecified, use MinInstrCount strategy as more 1674 // generic one. 1675 const auto &SchedModel = STI.getSchedModel(); 1676 return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder()) 1677 ? MachineTraceStrategy::TS_MinInstrCount 1678 : MachineTraceStrategy::TS_Local; 1679 } 1680 // The strategy was forced by the option. 1681 return ForceMachineCombinerStrategy; 1682 } 1683 1684 void RISCVInstrInfo::finalizeInsInstrs( 1685 MachineInstr &Root, unsigned &Pattern, 1686 SmallVectorImpl<MachineInstr *> &InsInstrs) const { 1687 int16_t FrmOpIdx = 1688 RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm); 1689 if (FrmOpIdx < 0) { 1690 assert(all_of(InsInstrs, 1691 [](MachineInstr *MI) { 1692 return RISCV::getNamedOperandIdx(MI->getOpcode(), 1693 RISCV::OpName::frm) < 0; 1694 }) && 1695 "New instructions require FRM whereas the old one does not have it"); 1696 return; 1697 } 1698 1699 const MachineOperand &FRM = Root.getOperand(FrmOpIdx); 1700 MachineFunction &MF = *Root.getMF(); 1701 1702 for (auto *NewMI : InsInstrs) { 1703 // We'd already added the FRM operand. 1704 if (static_cast<unsigned>(RISCV::getNamedOperandIdx( 1705 NewMI->getOpcode(), RISCV::OpName::frm)) != NewMI->getNumOperands()) 1706 continue; 1707 MachineInstrBuilder MIB(MF, NewMI); 1708 MIB.add(FRM); 1709 if (FRM.getImm() == RISCVFPRndMode::DYN) 1710 MIB.addUse(RISCV::FRM, RegState::Implicit); 1711 } 1712 } 1713 1714 static bool isFADD(unsigned Opc) { 1715 switch (Opc) { 1716 default: 1717 return false; 1718 case RISCV::FADD_H: 1719 case RISCV::FADD_S: 1720 case RISCV::FADD_D: 1721 return true; 1722 } 1723 } 1724 1725 static bool isFSUB(unsigned Opc) { 1726 switch (Opc) { 1727 default: 1728 return false; 1729 case RISCV::FSUB_H: 1730 case RISCV::FSUB_S: 1731 case RISCV::FSUB_D: 1732 return true; 1733 } 1734 } 1735 1736 static bool isFMUL(unsigned Opc) { 1737 switch (Opc) { 1738 default: 1739 return false; 1740 case RISCV::FMUL_H: 1741 case RISCV::FMUL_S: 1742 case RISCV::FMUL_D: 1743 return true; 1744 } 1745 } 1746 1747 bool RISCVInstrInfo::isVectorAssociativeAndCommutative(const MachineInstr &Inst, 1748 bool Invert) const { 1749 #define OPCODE_LMUL_CASE(OPC) \ 1750 case RISCV::OPC##_M1: \ 1751 case RISCV::OPC##_M2: \ 1752 case RISCV::OPC##_M4: \ 1753 case RISCV::OPC##_M8: \ 1754 case RISCV::OPC##_MF2: \ 1755 case RISCV::OPC##_MF4: \ 1756 case RISCV::OPC##_MF8 1757 1758 #define OPCODE_LMUL_MASK_CASE(OPC) \ 1759 case RISCV::OPC##_M1_MASK: \ 1760 case RISCV::OPC##_M2_MASK: \ 1761 case RISCV::OPC##_M4_MASK: \ 1762 case RISCV::OPC##_M8_MASK: \ 1763 case RISCV::OPC##_MF2_MASK: \ 1764 case RISCV::OPC##_MF4_MASK: \ 1765 case RISCV::OPC##_MF8_MASK 1766 1767 unsigned Opcode = Inst.getOpcode(); 1768 if (Invert) { 1769 if (auto InvOpcode = getInverseOpcode(Opcode)) 1770 Opcode = *InvOpcode; 1771 else 1772 return false; 1773 } 1774 1775 // clang-format off 1776 switch (Opcode) { 1777 default: 1778 return false; 1779 OPCODE_LMUL_CASE(PseudoVADD_VV): 1780 OPCODE_LMUL_MASK_CASE(PseudoVADD_VV): 1781 OPCODE_LMUL_CASE(PseudoVMUL_VV): 1782 OPCODE_LMUL_MASK_CASE(PseudoVMUL_VV): 1783 return true; 1784 } 1785 // clang-format on 1786 1787 #undef OPCODE_LMUL_MASK_CASE 1788 #undef OPCODE_LMUL_CASE 1789 } 1790 1791 bool RISCVInstrInfo::areRVVInstsReassociable(const MachineInstr &Root, 1792 const MachineInstr &Prev) const { 1793 if (!areOpcodesEqualOrInverse(Root.getOpcode(), Prev.getOpcode())) 1794 return false; 1795 1796 assert(Root.getMF() == Prev.getMF()); 1797 const MachineRegisterInfo *MRI = &Root.getMF()->getRegInfo(); 1798 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); 1799 1800 // Make sure vtype operands are also the same. 1801 const MCInstrDesc &Desc = get(Root.getOpcode()); 1802 const uint64_t TSFlags = Desc.TSFlags; 1803 1804 auto checkImmOperand = [&](unsigned OpIdx) { 1805 return Root.getOperand(OpIdx).getImm() == Prev.getOperand(OpIdx).getImm(); 1806 }; 1807 1808 auto checkRegOperand = [&](unsigned OpIdx) { 1809 return Root.getOperand(OpIdx).getReg() == Prev.getOperand(OpIdx).getReg(); 1810 }; 1811 1812 // PassThru 1813 // TODO: Potentially we can loosen the condition to consider Root to be 1814 // associable with Prev if Root has NoReg as passthru. In which case we 1815 // also need to loosen the condition on vector policies between these. 1816 if (!checkRegOperand(1)) 1817 return false; 1818 1819 // SEW 1820 if (RISCVII::hasSEWOp(TSFlags) && 1821 !checkImmOperand(RISCVII::getSEWOpNum(Desc))) 1822 return false; 1823 1824 // Mask 1825 if (RISCVII::usesMaskPolicy(TSFlags)) { 1826 const MachineBasicBlock *MBB = Root.getParent(); 1827 const MachineBasicBlock::const_reverse_iterator It1(&Root); 1828 const MachineBasicBlock::const_reverse_iterator It2(&Prev); 1829 Register MI1VReg; 1830 1831 bool SeenMI2 = false; 1832 for (auto End = MBB->rend(), It = It1; It != End; ++It) { 1833 if (It == It2) { 1834 SeenMI2 = true; 1835 if (!MI1VReg.isValid()) 1836 // There is no V0 def between Root and Prev; they're sharing the 1837 // same V0. 1838 break; 1839 } 1840 1841 if (It->modifiesRegister(RISCV::V0, TRI)) { 1842 Register SrcReg = It->getOperand(1).getReg(); 1843 // If it's not VReg it'll be more difficult to track its defs, so 1844 // bailing out here just to be safe. 1845 if (!SrcReg.isVirtual()) 1846 return false; 1847 1848 if (!MI1VReg.isValid()) { 1849 // This is the V0 def for Root. 1850 MI1VReg = SrcReg; 1851 continue; 1852 } 1853 1854 // Some random mask updates. 1855 if (!SeenMI2) 1856 continue; 1857 1858 // This is the V0 def for Prev; check if it's the same as that of 1859 // Root. 1860 if (MI1VReg != SrcReg) 1861 return false; 1862 else 1863 break; 1864 } 1865 } 1866 1867 // If we haven't encountered Prev, it's likely that this function was 1868 // called in a wrong way (e.g. Root is before Prev). 1869 assert(SeenMI2 && "Prev is expected to appear before Root"); 1870 } 1871 1872 // Tail / Mask policies 1873 if (RISCVII::hasVecPolicyOp(TSFlags) && 1874 !checkImmOperand(RISCVII::getVecPolicyOpNum(Desc))) 1875 return false; 1876 1877 // VL 1878 if (RISCVII::hasVLOp(TSFlags)) { 1879 unsigned OpIdx = RISCVII::getVLOpNum(Desc); 1880 const MachineOperand &Op1 = Root.getOperand(OpIdx); 1881 const MachineOperand &Op2 = Prev.getOperand(OpIdx); 1882 if (Op1.getType() != Op2.getType()) 1883 return false; 1884 switch (Op1.getType()) { 1885 case MachineOperand::MO_Register: 1886 if (Op1.getReg() != Op2.getReg()) 1887 return false; 1888 break; 1889 case MachineOperand::MO_Immediate: 1890 if (Op1.getImm() != Op2.getImm()) 1891 return false; 1892 break; 1893 default: 1894 llvm_unreachable("Unrecognized VL operand type"); 1895 } 1896 } 1897 1898 // Rounding modes 1899 if (RISCVII::hasRoundModeOp(TSFlags) && 1900 !checkImmOperand(RISCVII::getVLOpNum(Desc) - 1)) 1901 return false; 1902 1903 return true; 1904 } 1905 1906 // Most of our RVV pseudos have passthru operand, so the real operands 1907 // start from index = 2. 1908 bool RISCVInstrInfo::hasReassociableVectorSibling(const MachineInstr &Inst, 1909 bool &Commuted) const { 1910 const MachineBasicBlock *MBB = Inst.getParent(); 1911 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1912 assert(RISCVII::isFirstDefTiedToFirstUse(get(Inst.getOpcode())) && 1913 "Expect the present of passthrough operand."); 1914 MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); 1915 MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(3).getReg()); 1916 1917 // If only one operand has the same or inverse opcode and it's the second 1918 // source operand, the operands must be commuted. 1919 Commuted = !areRVVInstsReassociable(Inst, *MI1) && 1920 areRVVInstsReassociable(Inst, *MI2); 1921 if (Commuted) 1922 std::swap(MI1, MI2); 1923 1924 return areRVVInstsReassociable(Inst, *MI1) && 1925 (isVectorAssociativeAndCommutative(*MI1) || 1926 isVectorAssociativeAndCommutative(*MI1, /* Invert */ true)) && 1927 hasReassociableOperands(*MI1, MBB) && 1928 MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()); 1929 } 1930 1931 bool RISCVInstrInfo::hasReassociableOperands( 1932 const MachineInstr &Inst, const MachineBasicBlock *MBB) const { 1933 if (!isVectorAssociativeAndCommutative(Inst) && 1934 !isVectorAssociativeAndCommutative(Inst, /*Invert=*/true)) 1935 return TargetInstrInfo::hasReassociableOperands(Inst, MBB); 1936 1937 const MachineOperand &Op1 = Inst.getOperand(2); 1938 const MachineOperand &Op2 = Inst.getOperand(3); 1939 const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1940 1941 // We need virtual register definitions for the operands that we will 1942 // reassociate. 1943 MachineInstr *MI1 = nullptr; 1944 MachineInstr *MI2 = nullptr; 1945 if (Op1.isReg() && Op1.getReg().isVirtual()) 1946 MI1 = MRI.getUniqueVRegDef(Op1.getReg()); 1947 if (Op2.isReg() && Op2.getReg().isVirtual()) 1948 MI2 = MRI.getUniqueVRegDef(Op2.getReg()); 1949 1950 // And at least one operand must be defined in MBB. 1951 return MI1 && MI2 && (MI1->getParent() == MBB || MI2->getParent() == MBB); 1952 } 1953 1954 void RISCVInstrInfo::getReassociateOperandIndices( 1955 const MachineInstr &Root, unsigned Pattern, 1956 std::array<unsigned, 5> &OperandIndices) const { 1957 TargetInstrInfo::getReassociateOperandIndices(Root, Pattern, OperandIndices); 1958 if (RISCV::getRVVMCOpcode(Root.getOpcode())) { 1959 // Skip the passthrough operand, so increment all indices by one. 1960 for (unsigned I = 0; I < 5; ++I) 1961 ++OperandIndices[I]; 1962 } 1963 } 1964 1965 bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst, 1966 bool &Commuted) const { 1967 if (isVectorAssociativeAndCommutative(Inst) || 1968 isVectorAssociativeAndCommutative(Inst, /*Invert=*/true)) 1969 return hasReassociableVectorSibling(Inst, Commuted); 1970 1971 if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted)) 1972 return false; 1973 1974 const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo(); 1975 unsigned OperandIdx = Commuted ? 2 : 1; 1976 const MachineInstr &Sibling = 1977 *MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg()); 1978 1979 int16_t InstFrmOpIdx = 1980 RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm); 1981 int16_t SiblingFrmOpIdx = 1982 RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm); 1983 1984 return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) || 1985 RISCV::hasEqualFRM(Inst, Sibling); 1986 } 1987 1988 bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, 1989 bool Invert) const { 1990 if (isVectorAssociativeAndCommutative(Inst, Invert)) 1991 return true; 1992 1993 unsigned Opc = Inst.getOpcode(); 1994 if (Invert) { 1995 auto InverseOpcode = getInverseOpcode(Opc); 1996 if (!InverseOpcode) 1997 return false; 1998 Opc = *InverseOpcode; 1999 } 2000 2001 if (isFADD(Opc) || isFMUL(Opc)) 2002 return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && 2003 Inst.getFlag(MachineInstr::MIFlag::FmNsz); 2004 2005 switch (Opc) { 2006 default: 2007 return false; 2008 case RISCV::ADD: 2009 case RISCV::ADDW: 2010 case RISCV::AND: 2011 case RISCV::OR: 2012 case RISCV::XOR: 2013 // From RISC-V ISA spec, if both the high and low bits of the same product 2014 // are required, then the recommended code sequence is: 2015 // 2016 // MULH[[S]U] rdh, rs1, rs2 2017 // MUL rdl, rs1, rs2 2018 // (source register specifiers must be in same order and rdh cannot be the 2019 // same as rs1 or rs2) 2020 // 2021 // Microarchitectures can then fuse these into a single multiply operation 2022 // instead of performing two separate multiplies. 2023 // MachineCombiner may reassociate MUL operands and lose the fusion 2024 // opportunity. 2025 case RISCV::MUL: 2026 case RISCV::MULW: 2027 case RISCV::MIN: 2028 case RISCV::MINU: 2029 case RISCV::MAX: 2030 case RISCV::MAXU: 2031 case RISCV::FMIN_H: 2032 case RISCV::FMIN_S: 2033 case RISCV::FMIN_D: 2034 case RISCV::FMAX_H: 2035 case RISCV::FMAX_S: 2036 case RISCV::FMAX_D: 2037 return true; 2038 } 2039 2040 return false; 2041 } 2042 2043 std::optional<unsigned> 2044 RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const { 2045 #define RVV_OPC_LMUL_CASE(OPC, INV) \ 2046 case RISCV::OPC##_M1: \ 2047 return RISCV::INV##_M1; \ 2048 case RISCV::OPC##_M2: \ 2049 return RISCV::INV##_M2; \ 2050 case RISCV::OPC##_M4: \ 2051 return RISCV::INV##_M4; \ 2052 case RISCV::OPC##_M8: \ 2053 return RISCV::INV##_M8; \ 2054 case RISCV::OPC##_MF2: \ 2055 return RISCV::INV##_MF2; \ 2056 case RISCV::OPC##_MF4: \ 2057 return RISCV::INV##_MF4; \ 2058 case RISCV::OPC##_MF8: \ 2059 return RISCV::INV##_MF8 2060 2061 #define RVV_OPC_LMUL_MASK_CASE(OPC, INV) \ 2062 case RISCV::OPC##_M1_MASK: \ 2063 return RISCV::INV##_M1_MASK; \ 2064 case RISCV::OPC##_M2_MASK: \ 2065 return RISCV::INV##_M2_MASK; \ 2066 case RISCV::OPC##_M4_MASK: \ 2067 return RISCV::INV##_M4_MASK; \ 2068 case RISCV::OPC##_M8_MASK: \ 2069 return RISCV::INV##_M8_MASK; \ 2070 case RISCV::OPC##_MF2_MASK: \ 2071 return RISCV::INV##_MF2_MASK; \ 2072 case RISCV::OPC##_MF4_MASK: \ 2073 return RISCV::INV##_MF4_MASK; \ 2074 case RISCV::OPC##_MF8_MASK: \ 2075 return RISCV::INV##_MF8_MASK 2076 2077 switch (Opcode) { 2078 default: 2079 return std::nullopt; 2080 case RISCV::FADD_H: 2081 return RISCV::FSUB_H; 2082 case RISCV::FADD_S: 2083 return RISCV::FSUB_S; 2084 case RISCV::FADD_D: 2085 return RISCV::FSUB_D; 2086 case RISCV::FSUB_H: 2087 return RISCV::FADD_H; 2088 case RISCV::FSUB_S: 2089 return RISCV::FADD_S; 2090 case RISCV::FSUB_D: 2091 return RISCV::FADD_D; 2092 case RISCV::ADD: 2093 return RISCV::SUB; 2094 case RISCV::SUB: 2095 return RISCV::ADD; 2096 case RISCV::ADDW: 2097 return RISCV::SUBW; 2098 case RISCV::SUBW: 2099 return RISCV::ADDW; 2100 // clang-format off 2101 RVV_OPC_LMUL_CASE(PseudoVADD_VV, PseudoVSUB_VV); 2102 RVV_OPC_LMUL_MASK_CASE(PseudoVADD_VV, PseudoVSUB_VV); 2103 RVV_OPC_LMUL_CASE(PseudoVSUB_VV, PseudoVADD_VV); 2104 RVV_OPC_LMUL_MASK_CASE(PseudoVSUB_VV, PseudoVADD_VV); 2105 // clang-format on 2106 } 2107 2108 #undef RVV_OPC_LMUL_MASK_CASE 2109 #undef RVV_OPC_LMUL_CASE 2110 } 2111 2112 static bool canCombineFPFusedMultiply(const MachineInstr &Root, 2113 const MachineOperand &MO, 2114 bool DoRegPressureReduce) { 2115 if (!MO.isReg() || !MO.getReg().isVirtual()) 2116 return false; 2117 const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); 2118 MachineInstr *MI = MRI.getVRegDef(MO.getReg()); 2119 if (!MI || !isFMUL(MI->getOpcode())) 2120 return false; 2121 2122 if (!Root.getFlag(MachineInstr::MIFlag::FmContract) || 2123 !MI->getFlag(MachineInstr::MIFlag::FmContract)) 2124 return false; 2125 2126 // Try combining even if fmul has more than one use as it eliminates 2127 // dependency between fadd(fsub) and fmul. However, it can extend liveranges 2128 // for fmul operands, so reject the transformation in register pressure 2129 // reduction mode. 2130 if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 2131 return false; 2132 2133 // Do not combine instructions from different basic blocks. 2134 if (Root.getParent() != MI->getParent()) 2135 return false; 2136 return RISCV::hasEqualFRM(Root, *MI); 2137 } 2138 2139 static bool getFPFusedMultiplyPatterns(MachineInstr &Root, 2140 SmallVectorImpl<unsigned> &Patterns, 2141 bool DoRegPressureReduce) { 2142 unsigned Opc = Root.getOpcode(); 2143 bool IsFAdd = isFADD(Opc); 2144 if (!IsFAdd && !isFSUB(Opc)) 2145 return false; 2146 bool Added = false; 2147 if (canCombineFPFusedMultiply(Root, Root.getOperand(1), 2148 DoRegPressureReduce)) { 2149 Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_AX 2150 : RISCVMachineCombinerPattern::FMSUB); 2151 Added = true; 2152 } 2153 if (canCombineFPFusedMultiply(Root, Root.getOperand(2), 2154 DoRegPressureReduce)) { 2155 Patterns.push_back(IsFAdd ? RISCVMachineCombinerPattern::FMADD_XA 2156 : RISCVMachineCombinerPattern::FNMSUB); 2157 Added = true; 2158 } 2159 return Added; 2160 } 2161 2162 static bool getFPPatterns(MachineInstr &Root, 2163 SmallVectorImpl<unsigned> &Patterns, 2164 bool DoRegPressureReduce) { 2165 return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce); 2166 } 2167 2168 /// Utility routine that checks if \param MO is defined by an 2169 /// \param CombineOpc instruction in the basic block \param MBB 2170 static const MachineInstr *canCombine(const MachineBasicBlock &MBB, 2171 const MachineOperand &MO, 2172 unsigned CombineOpc) { 2173 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2174 const MachineInstr *MI = nullptr; 2175 2176 if (MO.isReg() && MO.getReg().isVirtual()) 2177 MI = MRI.getUniqueVRegDef(MO.getReg()); 2178 // And it needs to be in the trace (otherwise, it won't have a depth). 2179 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc) 2180 return nullptr; 2181 // Must only used by the user we combine with. 2182 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 2183 return nullptr; 2184 2185 return MI; 2186 } 2187 2188 /// Utility routine that checks if \param MO is defined by a SLLI in \param 2189 /// MBB that can be combined by splitting across 2 SHXADD instructions. The 2190 /// first SHXADD shift amount is given by \param OuterShiftAmt. 2191 static bool canCombineShiftIntoShXAdd(const MachineBasicBlock &MBB, 2192 const MachineOperand &MO, 2193 unsigned OuterShiftAmt) { 2194 const MachineInstr *ShiftMI = canCombine(MBB, MO, RISCV::SLLI); 2195 if (!ShiftMI) 2196 return false; 2197 2198 unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm(); 2199 if (InnerShiftAmt < OuterShiftAmt || (InnerShiftAmt - OuterShiftAmt) > 3) 2200 return false; 2201 2202 return true; 2203 } 2204 2205 // Returns the shift amount from a SHXADD instruction. Returns 0 if the 2206 // instruction is not a SHXADD. 2207 static unsigned getSHXADDShiftAmount(unsigned Opc) { 2208 switch (Opc) { 2209 default: 2210 return 0; 2211 case RISCV::SH1ADD: 2212 return 1; 2213 case RISCV::SH2ADD: 2214 return 2; 2215 case RISCV::SH3ADD: 2216 return 3; 2217 } 2218 } 2219 2220 // Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into 2221 // (sh3add (sh2add Y, Z), X). 2222 static bool getSHXADDPatterns(const MachineInstr &Root, 2223 SmallVectorImpl<unsigned> &Patterns) { 2224 unsigned ShiftAmt = getSHXADDShiftAmount(Root.getOpcode()); 2225 if (!ShiftAmt) 2226 return false; 2227 2228 const MachineBasicBlock &MBB = *Root.getParent(); 2229 2230 const MachineInstr *AddMI = canCombine(MBB, Root.getOperand(2), RISCV::ADD); 2231 if (!AddMI) 2232 return false; 2233 2234 bool Found = false; 2235 if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(1), ShiftAmt)) { 2236 Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1); 2237 Found = true; 2238 } 2239 if (canCombineShiftIntoShXAdd(MBB, AddMI->getOperand(2), ShiftAmt)) { 2240 Patterns.push_back(RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2); 2241 Found = true; 2242 } 2243 2244 return Found; 2245 } 2246 2247 CombinerObjective RISCVInstrInfo::getCombinerObjective(unsigned Pattern) const { 2248 switch (Pattern) { 2249 case RISCVMachineCombinerPattern::FMADD_AX: 2250 case RISCVMachineCombinerPattern::FMADD_XA: 2251 case RISCVMachineCombinerPattern::FMSUB: 2252 case RISCVMachineCombinerPattern::FNMSUB: 2253 return CombinerObjective::MustReduceDepth; 2254 default: 2255 return TargetInstrInfo::getCombinerObjective(Pattern); 2256 } 2257 } 2258 2259 bool RISCVInstrInfo::getMachineCombinerPatterns( 2260 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns, 2261 bool DoRegPressureReduce) const { 2262 2263 if (getFPPatterns(Root, Patterns, DoRegPressureReduce)) 2264 return true; 2265 2266 if (getSHXADDPatterns(Root, Patterns)) 2267 return true; 2268 2269 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, 2270 DoRegPressureReduce); 2271 } 2272 2273 static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern) { 2274 switch (RootOpc) { 2275 default: 2276 llvm_unreachable("Unexpected opcode"); 2277 case RISCV::FADD_H: 2278 return RISCV::FMADD_H; 2279 case RISCV::FADD_S: 2280 return RISCV::FMADD_S; 2281 case RISCV::FADD_D: 2282 return RISCV::FMADD_D; 2283 case RISCV::FSUB_H: 2284 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_H 2285 : RISCV::FNMSUB_H; 2286 case RISCV::FSUB_S: 2287 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_S 2288 : RISCV::FNMSUB_S; 2289 case RISCV::FSUB_D: 2290 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_D 2291 : RISCV::FNMSUB_D; 2292 } 2293 } 2294 2295 static unsigned getAddendOperandIdx(unsigned Pattern) { 2296 switch (Pattern) { 2297 default: 2298 llvm_unreachable("Unexpected pattern"); 2299 case RISCVMachineCombinerPattern::FMADD_AX: 2300 case RISCVMachineCombinerPattern::FMSUB: 2301 return 2; 2302 case RISCVMachineCombinerPattern::FMADD_XA: 2303 case RISCVMachineCombinerPattern::FNMSUB: 2304 return 1; 2305 } 2306 } 2307 2308 static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev, 2309 unsigned Pattern, 2310 SmallVectorImpl<MachineInstr *> &InsInstrs, 2311 SmallVectorImpl<MachineInstr *> &DelInstrs) { 2312 MachineFunction *MF = Root.getMF(); 2313 MachineRegisterInfo &MRI = MF->getRegInfo(); 2314 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 2315 2316 MachineOperand &Mul1 = Prev.getOperand(1); 2317 MachineOperand &Mul2 = Prev.getOperand(2); 2318 MachineOperand &Dst = Root.getOperand(0); 2319 MachineOperand &Addend = Root.getOperand(getAddendOperandIdx(Pattern)); 2320 2321 Register DstReg = Dst.getReg(); 2322 unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern); 2323 uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags(); 2324 DebugLoc MergedLoc = 2325 DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc()); 2326 2327 bool Mul1IsKill = Mul1.isKill(); 2328 bool Mul2IsKill = Mul2.isKill(); 2329 bool AddendIsKill = Addend.isKill(); 2330 2331 // We need to clear kill flags since we may be extending the live range past 2332 // a kill. If the mul had kill flags, we can preserve those since we know 2333 // where the previous range stopped. 2334 MRI.clearKillFlags(Mul1.getReg()); 2335 MRI.clearKillFlags(Mul2.getReg()); 2336 2337 MachineInstrBuilder MIB = 2338 BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg) 2339 .addReg(Mul1.getReg(), getKillRegState(Mul1IsKill)) 2340 .addReg(Mul2.getReg(), getKillRegState(Mul2IsKill)) 2341 .addReg(Addend.getReg(), getKillRegState(AddendIsKill)) 2342 .setMIFlags(IntersectedFlags); 2343 2344 InsInstrs.push_back(MIB); 2345 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) 2346 DelInstrs.push_back(&Prev); 2347 DelInstrs.push_back(&Root); 2348 } 2349 2350 // Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to 2351 // (sh3add (sh2add Y, Z), X) if the shift amount can be split across two 2352 // shXadd instructions. The outer shXadd keeps its original opcode. 2353 static void 2354 genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx, 2355 SmallVectorImpl<MachineInstr *> &InsInstrs, 2356 SmallVectorImpl<MachineInstr *> &DelInstrs, 2357 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) { 2358 MachineFunction *MF = Root.getMF(); 2359 MachineRegisterInfo &MRI = MF->getRegInfo(); 2360 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 2361 2362 unsigned OuterShiftAmt = getSHXADDShiftAmount(Root.getOpcode()); 2363 assert(OuterShiftAmt != 0 && "Unexpected opcode"); 2364 2365 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); 2366 MachineInstr *ShiftMI = 2367 MRI.getUniqueVRegDef(AddMI->getOperand(AddOpIdx).getReg()); 2368 2369 unsigned InnerShiftAmt = ShiftMI->getOperand(2).getImm(); 2370 assert(InnerShiftAmt >= OuterShiftAmt && "Unexpected shift amount"); 2371 2372 unsigned InnerOpc; 2373 switch (InnerShiftAmt - OuterShiftAmt) { 2374 default: 2375 llvm_unreachable("Unexpected shift amount"); 2376 case 0: 2377 InnerOpc = RISCV::ADD; 2378 break; 2379 case 1: 2380 InnerOpc = RISCV::SH1ADD; 2381 break; 2382 case 2: 2383 InnerOpc = RISCV::SH2ADD; 2384 break; 2385 case 3: 2386 InnerOpc = RISCV::SH3ADD; 2387 break; 2388 } 2389 2390 const MachineOperand &X = AddMI->getOperand(3 - AddOpIdx); 2391 const MachineOperand &Y = ShiftMI->getOperand(1); 2392 const MachineOperand &Z = Root.getOperand(1); 2393 2394 Register NewVR = MRI.createVirtualRegister(&RISCV::GPRRegClass); 2395 2396 auto MIB1 = BuildMI(*MF, MIMetadata(Root), TII->get(InnerOpc), NewVR) 2397 .addReg(Y.getReg(), getKillRegState(Y.isKill())) 2398 .addReg(Z.getReg(), getKillRegState(Z.isKill())); 2399 auto MIB2 = BuildMI(*MF, MIMetadata(Root), TII->get(Root.getOpcode()), 2400 Root.getOperand(0).getReg()) 2401 .addReg(NewVR, RegState::Kill) 2402 .addReg(X.getReg(), getKillRegState(X.isKill())); 2403 2404 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); 2405 InsInstrs.push_back(MIB1); 2406 InsInstrs.push_back(MIB2); 2407 DelInstrs.push_back(ShiftMI); 2408 DelInstrs.push_back(AddMI); 2409 DelInstrs.push_back(&Root); 2410 } 2411 2412 void RISCVInstrInfo::genAlternativeCodeSequence( 2413 MachineInstr &Root, unsigned Pattern, 2414 SmallVectorImpl<MachineInstr *> &InsInstrs, 2415 SmallVectorImpl<MachineInstr *> &DelInstrs, 2416 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 2417 MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); 2418 switch (Pattern) { 2419 default: 2420 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 2421 DelInstrs, InstrIdxForVirtReg); 2422 return; 2423 case RISCVMachineCombinerPattern::FMADD_AX: 2424 case RISCVMachineCombinerPattern::FMSUB: { 2425 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg()); 2426 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); 2427 return; 2428 } 2429 case RISCVMachineCombinerPattern::FMADD_XA: 2430 case RISCVMachineCombinerPattern::FNMSUB: { 2431 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg()); 2432 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); 2433 return; 2434 } 2435 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1: 2436 genShXAddAddShift(Root, 1, InsInstrs, DelInstrs, InstrIdxForVirtReg); 2437 return; 2438 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2: 2439 genShXAddAddShift(Root, 2, InsInstrs, DelInstrs, InstrIdxForVirtReg); 2440 return; 2441 } 2442 } 2443 2444 bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, 2445 StringRef &ErrInfo) const { 2446 MCInstrDesc const &Desc = MI.getDesc(); 2447 2448 for (const auto &[Index, Operand] : enumerate(Desc.operands())) { 2449 unsigned OpType = Operand.OperandType; 2450 if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM && 2451 OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) { 2452 const MachineOperand &MO = MI.getOperand(Index); 2453 if (MO.isReg()) { 2454 ErrInfo = "Expected a non-register operand."; 2455 return false; 2456 } 2457 if (MO.isImm()) { 2458 int64_t Imm = MO.getImm(); 2459 bool Ok; 2460 switch (OpType) { 2461 default: 2462 llvm_unreachable("Unexpected operand type"); 2463 2464 // clang-format off 2465 #define CASE_OPERAND_UIMM(NUM) \ 2466 case RISCVOp::OPERAND_UIMM##NUM: \ 2467 Ok = isUInt<NUM>(Imm); \ 2468 break; 2469 #define CASE_OPERAND_SIMM(NUM) \ 2470 case RISCVOp::OPERAND_SIMM##NUM: \ 2471 Ok = isInt<NUM>(Imm); \ 2472 break; 2473 CASE_OPERAND_UIMM(1) 2474 CASE_OPERAND_UIMM(2) 2475 CASE_OPERAND_UIMM(3) 2476 CASE_OPERAND_UIMM(4) 2477 CASE_OPERAND_UIMM(5) 2478 CASE_OPERAND_UIMM(6) 2479 CASE_OPERAND_UIMM(7) 2480 CASE_OPERAND_UIMM(8) 2481 CASE_OPERAND_UIMM(10) 2482 CASE_OPERAND_UIMM(12) 2483 CASE_OPERAND_UIMM(20) 2484 // clang-format on 2485 case RISCVOp::OPERAND_UIMM2_LSB0: 2486 Ok = isShiftedUInt<1, 1>(Imm); 2487 break; 2488 case RISCVOp::OPERAND_UIMM5_LSB0: 2489 Ok = isShiftedUInt<4, 1>(Imm); 2490 break; 2491 case RISCVOp::OPERAND_UIMM6_LSB0: 2492 Ok = isShiftedUInt<5, 1>(Imm); 2493 break; 2494 case RISCVOp::OPERAND_UIMM7_LSB00: 2495 Ok = isShiftedUInt<5, 2>(Imm); 2496 break; 2497 case RISCVOp::OPERAND_UIMM7_LSB000: 2498 Ok = isShiftedUInt<4, 3>(Imm); 2499 break; 2500 case RISCVOp::OPERAND_UIMM8_LSB00: 2501 Ok = isShiftedUInt<6, 2>(Imm); 2502 break; 2503 case RISCVOp::OPERAND_UIMM8_LSB000: 2504 Ok = isShiftedUInt<5, 3>(Imm); 2505 break; 2506 case RISCVOp::OPERAND_UIMM8_GE32: 2507 Ok = isUInt<8>(Imm) && Imm >= 32; 2508 break; 2509 case RISCVOp::OPERAND_UIMM9_LSB000: 2510 Ok = isShiftedUInt<6, 3>(Imm); 2511 break; 2512 case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO: 2513 Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0); 2514 break; 2515 case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO: 2516 Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0); 2517 break; 2518 case RISCVOp::OPERAND_ZERO: 2519 Ok = Imm == 0; 2520 break; 2521 // clang-format off 2522 CASE_OPERAND_SIMM(5) 2523 CASE_OPERAND_SIMM(6) 2524 CASE_OPERAND_SIMM(12) 2525 CASE_OPERAND_SIMM(26) 2526 // clang-format on 2527 case RISCVOp::OPERAND_SIMM5_PLUS1: 2528 Ok = (isInt<5>(Imm) && Imm != -16) || Imm == 16; 2529 break; 2530 case RISCVOp::OPERAND_SIMM6_NONZERO: 2531 Ok = Imm != 0 && isInt<6>(Imm); 2532 break; 2533 case RISCVOp::OPERAND_VTYPEI10: 2534 Ok = isUInt<10>(Imm); 2535 break; 2536 case RISCVOp::OPERAND_VTYPEI11: 2537 Ok = isUInt<11>(Imm); 2538 break; 2539 case RISCVOp::OPERAND_SIMM12_LSB00000: 2540 Ok = isShiftedInt<7, 5>(Imm); 2541 break; 2542 case RISCVOp::OPERAND_UIMMLOG2XLEN: 2543 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm); 2544 break; 2545 case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO: 2546 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm); 2547 Ok = Ok && Imm != 0; 2548 break; 2549 case RISCVOp::OPERAND_CLUI_IMM: 2550 Ok = (isUInt<5>(Imm) && Imm != 0) || 2551 (Imm >= 0xfffe0 && Imm <= 0xfffff); 2552 break; 2553 case RISCVOp::OPERAND_RVKRNUM: 2554 Ok = Imm >= 0 && Imm <= 10; 2555 break; 2556 case RISCVOp::OPERAND_RVKRNUM_0_7: 2557 Ok = Imm >= 0 && Imm <= 7; 2558 break; 2559 case RISCVOp::OPERAND_RVKRNUM_1_10: 2560 Ok = Imm >= 1 && Imm <= 10; 2561 break; 2562 case RISCVOp::OPERAND_RVKRNUM_2_14: 2563 Ok = Imm >= 2 && Imm <= 14; 2564 break; 2565 case RISCVOp::OPERAND_SPIMM: 2566 Ok = (Imm & 0xf) == 0; 2567 break; 2568 case RISCVOp::OPERAND_FRMARG: 2569 Ok = RISCVFPRndMode::isValidRoundingMode(Imm); 2570 break; 2571 case RISCVOp::OPERAND_RTZARG: 2572 Ok = Imm == RISCVFPRndMode::RTZ; 2573 break; 2574 case RISCVOp::OPERAND_COND_CODE: 2575 Ok = Imm >= 0 && Imm < RISCVCC::COND_INVALID; 2576 break; 2577 case RISCVOp::OPERAND_VEC_POLICY: 2578 Ok = (Imm & (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) == Imm; 2579 break; 2580 case RISCVOp::OPERAND_SEW: 2581 Ok = (isUInt<5>(Imm) && RISCVVType::isValidSEW(1 << Imm)); 2582 break; 2583 case RISCVOp::OPERAND_SEW_MASK: 2584 Ok = Imm == 0; 2585 break; 2586 case RISCVOp::OPERAND_VEC_RM: 2587 assert(RISCVII::hasRoundModeOp(Desc.TSFlags)); 2588 if (RISCVII::usesVXRM(Desc.TSFlags)) 2589 Ok = isUInt<2>(Imm); 2590 else 2591 Ok = RISCVFPRndMode::isValidRoundingMode(Imm); 2592 break; 2593 } 2594 if (!Ok) { 2595 ErrInfo = "Invalid immediate"; 2596 return false; 2597 } 2598 } 2599 } 2600 } 2601 2602 const uint64_t TSFlags = Desc.TSFlags; 2603 if (RISCVII::hasVLOp(TSFlags)) { 2604 const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc)); 2605 if (!Op.isImm() && !Op.isReg()) { 2606 ErrInfo = "Invalid operand type for VL operand"; 2607 return false; 2608 } 2609 if (Op.isReg() && Op.getReg() != RISCV::NoRegister) { 2610 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 2611 auto *RC = MRI.getRegClass(Op.getReg()); 2612 if (!RISCV::GPRRegClass.hasSubClassEq(RC)) { 2613 ErrInfo = "Invalid register class for VL operand"; 2614 return false; 2615 } 2616 } 2617 if (!RISCVII::hasSEWOp(TSFlags)) { 2618 ErrInfo = "VL operand w/o SEW operand?"; 2619 return false; 2620 } 2621 } 2622 if (RISCVII::hasSEWOp(TSFlags)) { 2623 unsigned OpIdx = RISCVII::getSEWOpNum(Desc); 2624 if (!MI.getOperand(OpIdx).isImm()) { 2625 ErrInfo = "SEW value expected to be an immediate"; 2626 return false; 2627 } 2628 uint64_t Log2SEW = MI.getOperand(OpIdx).getImm(); 2629 if (Log2SEW > 31) { 2630 ErrInfo = "Unexpected SEW value"; 2631 return false; 2632 } 2633 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 2634 if (!RISCVVType::isValidSEW(SEW)) { 2635 ErrInfo = "Unexpected SEW value"; 2636 return false; 2637 } 2638 } 2639 if (RISCVII::hasVecPolicyOp(TSFlags)) { 2640 unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc); 2641 if (!MI.getOperand(OpIdx).isImm()) { 2642 ErrInfo = "Policy operand expected to be an immediate"; 2643 return false; 2644 } 2645 uint64_t Policy = MI.getOperand(OpIdx).getImm(); 2646 if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) { 2647 ErrInfo = "Invalid Policy Value"; 2648 return false; 2649 } 2650 if (!RISCVII::hasVLOp(TSFlags)) { 2651 ErrInfo = "policy operand w/o VL operand?"; 2652 return false; 2653 } 2654 2655 // VecPolicy operands can only exist on instructions with passthru/merge 2656 // arguments. Note that not all arguments with passthru have vec policy 2657 // operands- some instructions have implicit policies. 2658 unsigned UseOpIdx; 2659 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 2660 ErrInfo = "policy operand w/o tied operand?"; 2661 return false; 2662 } 2663 } 2664 2665 if (int Idx = RISCVII::getFRMOpNum(Desc); 2666 Idx >= 0 && MI.getOperand(Idx).getImm() == RISCVFPRndMode::DYN && 2667 !MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr)) { 2668 ErrInfo = "dynamic rounding mode should read FRM"; 2669 return false; 2670 } 2671 2672 return true; 2673 } 2674 2675 bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, 2676 const MachineInstr &AddrI, 2677 ExtAddrMode &AM) const { 2678 switch (MemI.getOpcode()) { 2679 default: 2680 return false; 2681 case RISCV::LB: 2682 case RISCV::LBU: 2683 case RISCV::LH: 2684 case RISCV::LH_INX: 2685 case RISCV::LHU: 2686 case RISCV::LW: 2687 case RISCV::LW_INX: 2688 case RISCV::LWU: 2689 case RISCV::LD: 2690 case RISCV::FLH: 2691 case RISCV::FLW: 2692 case RISCV::FLD: 2693 case RISCV::SB: 2694 case RISCV::SH: 2695 case RISCV::SH_INX: 2696 case RISCV::SW: 2697 case RISCV::SW_INX: 2698 case RISCV::SD: 2699 case RISCV::FSH: 2700 case RISCV::FSW: 2701 case RISCV::FSD: 2702 break; 2703 } 2704 2705 if (MemI.getOperand(0).getReg() == Reg) 2706 return false; 2707 2708 if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() || 2709 !AddrI.getOperand(2).isImm()) 2710 return false; 2711 2712 int64_t OldOffset = MemI.getOperand(2).getImm(); 2713 int64_t Disp = AddrI.getOperand(2).getImm(); 2714 int64_t NewOffset = OldOffset + Disp; 2715 if (!STI.is64Bit()) 2716 NewOffset = SignExtend64<32>(NewOffset); 2717 2718 if (!isInt<12>(NewOffset)) 2719 return false; 2720 2721 AM.BaseReg = AddrI.getOperand(1).getReg(); 2722 AM.ScaledReg = 0; 2723 AM.Scale = 0; 2724 AM.Displacement = NewOffset; 2725 AM.Form = ExtAddrMode::Formula::Basic; 2726 return true; 2727 } 2728 2729 MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI, 2730 const ExtAddrMode &AM) const { 2731 2732 const DebugLoc &DL = MemI.getDebugLoc(); 2733 MachineBasicBlock &MBB = *MemI.getParent(); 2734 2735 assert(AM.ScaledReg == 0 && AM.Scale == 0 && 2736 "Addressing mode not supported for folding"); 2737 2738 return BuildMI(MBB, MemI, DL, get(MemI.getOpcode())) 2739 .addReg(MemI.getOperand(0).getReg(), 2740 MemI.mayLoad() ? RegState::Define : 0) 2741 .addReg(AM.BaseReg) 2742 .addImm(AM.Displacement) 2743 .setMemRefs(MemI.memoperands()) 2744 .setMIFlags(MemI.getFlags()); 2745 } 2746 2747 bool RISCVInstrInfo::getMemOperandsWithOffsetWidth( 2748 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, 2749 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, 2750 const TargetRegisterInfo *TRI) const { 2751 if (!LdSt.mayLoadOrStore()) 2752 return false; 2753 2754 // Conservatively, only handle scalar loads/stores for now. 2755 switch (LdSt.getOpcode()) { 2756 case RISCV::LB: 2757 case RISCV::LBU: 2758 case RISCV::SB: 2759 case RISCV::LH: 2760 case RISCV::LH_INX: 2761 case RISCV::LHU: 2762 case RISCV::FLH: 2763 case RISCV::SH: 2764 case RISCV::SH_INX: 2765 case RISCV::FSH: 2766 case RISCV::LW: 2767 case RISCV::LW_INX: 2768 case RISCV::LWU: 2769 case RISCV::FLW: 2770 case RISCV::SW: 2771 case RISCV::SW_INX: 2772 case RISCV::FSW: 2773 case RISCV::LD: 2774 case RISCV::FLD: 2775 case RISCV::SD: 2776 case RISCV::FSD: 2777 break; 2778 default: 2779 return false; 2780 } 2781 const MachineOperand *BaseOp; 2782 OffsetIsScalable = false; 2783 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) 2784 return false; 2785 BaseOps.push_back(BaseOp); 2786 return true; 2787 } 2788 2789 // TODO: This was copied from SIInstrInfo. Could it be lifted to a common 2790 // helper? 2791 static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, 2792 ArrayRef<const MachineOperand *> BaseOps1, 2793 const MachineInstr &MI2, 2794 ArrayRef<const MachineOperand *> BaseOps2) { 2795 // Only examine the first "base" operand of each instruction, on the 2796 // assumption that it represents the real base address of the memory access. 2797 // Other operands are typically offsets or indices from this base address. 2798 if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front())) 2799 return true; 2800 2801 if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand()) 2802 return false; 2803 2804 auto MO1 = *MI1.memoperands_begin(); 2805 auto MO2 = *MI2.memoperands_begin(); 2806 if (MO1->getAddrSpace() != MO2->getAddrSpace()) 2807 return false; 2808 2809 auto Base1 = MO1->getValue(); 2810 auto Base2 = MO2->getValue(); 2811 if (!Base1 || !Base2) 2812 return false; 2813 Base1 = getUnderlyingObject(Base1); 2814 Base2 = getUnderlyingObject(Base2); 2815 2816 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2)) 2817 return false; 2818 2819 return Base1 == Base2; 2820 } 2821 2822 bool RISCVInstrInfo::shouldClusterMemOps( 2823 ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1, 2824 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2, 2825 int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, 2826 unsigned NumBytes) const { 2827 // If the mem ops (to be clustered) do not have the same base ptr, then they 2828 // should not be clustered 2829 if (!BaseOps1.empty() && !BaseOps2.empty()) { 2830 const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); 2831 const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); 2832 if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) 2833 return false; 2834 } else if (!BaseOps1.empty() || !BaseOps2.empty()) { 2835 // If only one base op is empty, they do not have the same base ptr 2836 return false; 2837 } 2838 2839 unsigned CacheLineSize = 2840 BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize(); 2841 // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget. 2842 CacheLineSize = CacheLineSize ? CacheLineSize : 64; 2843 // Cluster if the memory operations are on the same or a neighbouring cache 2844 // line, but limit the maximum ClusterSize to avoid creating too much 2845 // additional register pressure. 2846 return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize; 2847 } 2848 2849 // Set BaseReg (the base register operand), Offset (the byte offset being 2850 // accessed) and the access Width of the passed instruction that reads/writes 2851 // memory. Returns false if the instruction does not read/write memory or the 2852 // BaseReg/Offset/Width can't be determined. Is not guaranteed to always 2853 // recognise base operands and offsets in all cases. 2854 // TODO: Add an IsScalable bool ref argument (like the equivalent AArch64 2855 // function) and set it as appropriate. 2856 bool RISCVInstrInfo::getMemOperandWithOffsetWidth( 2857 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, 2858 LocationSize &Width, const TargetRegisterInfo *TRI) const { 2859 if (!LdSt.mayLoadOrStore()) 2860 return false; 2861 2862 // Here we assume the standard RISC-V ISA, which uses a base+offset 2863 // addressing mode. You'll need to relax these conditions to support custom 2864 // load/store instructions. 2865 if (LdSt.getNumExplicitOperands() != 3) 2866 return false; 2867 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || 2868 !LdSt.getOperand(2).isImm()) 2869 return false; 2870 2871 if (!LdSt.hasOneMemOperand()) 2872 return false; 2873 2874 Width = (*LdSt.memoperands_begin())->getSize(); 2875 BaseReg = &LdSt.getOperand(1); 2876 Offset = LdSt.getOperand(2).getImm(); 2877 return true; 2878 } 2879 2880 bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint( 2881 const MachineInstr &MIa, const MachineInstr &MIb) const { 2882 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 2883 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 2884 2885 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 2886 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 2887 return false; 2888 2889 // Retrieve the base register, offset from the base register and width. Width 2890 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If 2891 // base registers are identical, and the offset of a lower memory access + 2892 // the width doesn't overlap the offset of a higher memory access, 2893 // then the memory accesses are different. 2894 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 2895 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; 2896 int64_t OffsetA = 0, OffsetB = 0; 2897 LocationSize WidthA = 0, WidthB = 0; 2898 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && 2899 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { 2900 if (BaseOpA->isIdenticalTo(*BaseOpB)) { 2901 int LowOffset = std::min(OffsetA, OffsetB); 2902 int HighOffset = std::max(OffsetA, OffsetB); 2903 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 2904 if (LowWidth.hasValue() && 2905 LowOffset + (int)LowWidth.getValue() <= HighOffset) 2906 return true; 2907 } 2908 } 2909 return false; 2910 } 2911 2912 std::pair<unsigned, unsigned> 2913 RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 2914 const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK; 2915 return std::make_pair(TF & Mask, TF & ~Mask); 2916 } 2917 2918 ArrayRef<std::pair<unsigned, const char *>> 2919 RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 2920 using namespace RISCVII; 2921 static const std::pair<unsigned, const char *> TargetFlags[] = { 2922 {MO_CALL, "riscv-call"}, 2923 {MO_LO, "riscv-lo"}, 2924 {MO_HI, "riscv-hi"}, 2925 {MO_PCREL_LO, "riscv-pcrel-lo"}, 2926 {MO_PCREL_HI, "riscv-pcrel-hi"}, 2927 {MO_GOT_HI, "riscv-got-hi"}, 2928 {MO_TPREL_LO, "riscv-tprel-lo"}, 2929 {MO_TPREL_HI, "riscv-tprel-hi"}, 2930 {MO_TPREL_ADD, "riscv-tprel-add"}, 2931 {MO_TLS_GOT_HI, "riscv-tls-got-hi"}, 2932 {MO_TLS_GD_HI, "riscv-tls-gd-hi"}, 2933 {MO_TLSDESC_HI, "riscv-tlsdesc-hi"}, 2934 {MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"}, 2935 {MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"}, 2936 {MO_TLSDESC_CALL, "riscv-tlsdesc-call"}}; 2937 return ArrayRef(TargetFlags); 2938 } 2939 bool RISCVInstrInfo::isFunctionSafeToOutlineFrom( 2940 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { 2941 const Function &F = MF.getFunction(); 2942 2943 // Can F be deduplicated by the linker? If it can, don't outline from it. 2944 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 2945 return false; 2946 2947 // Don't outline from functions with section markings; the program could 2948 // expect that all the code is in the named section. 2949 if (F.hasSection()) 2950 return false; 2951 2952 // It's safe to outline from MF. 2953 return true; 2954 } 2955 2956 bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, 2957 unsigned &Flags) const { 2958 // More accurate safety checking is done in getOutliningCandidateInfo. 2959 return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags); 2960 } 2961 2962 // Enum values indicating how an outlined call should be constructed. 2963 enum MachineOutlinerConstructionID { 2964 MachineOutlinerTailCall, 2965 MachineOutlinerDefault 2966 }; 2967 2968 bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault( 2969 MachineFunction &MF) const { 2970 return MF.getFunction().hasMinSize(); 2971 } 2972 2973 static bool isCandidatePatchable(const MachineBasicBlock &MBB) { 2974 const MachineFunction *MF = MBB.getParent(); 2975 const Function &F = MF->getFunction(); 2976 return F.getFnAttribute("fentry-call").getValueAsBool() || 2977 F.hasFnAttribute("patchable-function-entry"); 2978 } 2979 2980 static bool isMIReadsReg(const MachineInstr &MI, const TargetRegisterInfo *TRI, 2981 unsigned RegNo) { 2982 return MI.readsRegister(RegNo, TRI) || 2983 MI.getDesc().hasImplicitUseOfPhysReg(RegNo); 2984 } 2985 2986 static bool isMIModifiesReg(const MachineInstr &MI, 2987 const TargetRegisterInfo *TRI, unsigned RegNo) { 2988 return MI.modifiesRegister(RegNo, TRI) || 2989 MI.getDesc().hasImplicitDefOfPhysReg(RegNo); 2990 } 2991 2992 static bool cannotInsertTailCall(const MachineBasicBlock &MBB) { 2993 if (!MBB.back().isReturn()) 2994 return true; 2995 if (isCandidatePatchable(MBB)) 2996 return true; 2997 2998 // If the candidate reads the pre-set register 2999 // that can be used for expanding PseudoTAIL instruction, 3000 // then we cannot insert tail call. 3001 const TargetSubtargetInfo &STI = MBB.getParent()->getSubtarget(); 3002 unsigned TailExpandUseRegNo = 3003 RISCVII::getTailExpandUseRegNo(STI.getFeatureBits()); 3004 for (const MachineInstr &MI : MBB) { 3005 if (isMIReadsReg(MI, STI.getRegisterInfo(), TailExpandUseRegNo)) 3006 return true; 3007 if (isMIModifiesReg(MI, STI.getRegisterInfo(), TailExpandUseRegNo)) 3008 break; 3009 } 3010 return false; 3011 } 3012 3013 static std::optional<MachineOutlinerConstructionID> 3014 analyzeCandidate(outliner::Candidate &C) { 3015 // If last instruction is return then we can rely on 3016 // the verification already performed in the getOutliningTypeImpl. 3017 if (C.back().isReturn()) { 3018 assert(!cannotInsertTailCall(*C.getMBB()) && 3019 "The candidate who uses return instruction must be outlined " 3020 "using tail call"); 3021 return MachineOutlinerTailCall; 3022 } 3023 3024 auto CandidateUsesX5 = [](outliner::Candidate &C) { 3025 const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo(); 3026 if (std::any_of(C.begin(), C.end(), [TRI](const MachineInstr &MI) { 3027 return isMIModifiesReg(MI, TRI, RISCV::X5); 3028 })) 3029 return true; 3030 return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI); 3031 }; 3032 3033 if (!CandidateUsesX5(C)) 3034 return MachineOutlinerDefault; 3035 3036 return std::nullopt; 3037 } 3038 3039 std::optional<std::unique_ptr<outliner::OutlinedFunction>> 3040 RISCVInstrInfo::getOutliningCandidateInfo( 3041 const MachineModuleInfo &MMI, 3042 std::vector<outliner::Candidate> &RepeatedSequenceLocs, 3043 unsigned MinRepeats) const { 3044 3045 // Each RepeatedSequenceLoc is identical. 3046 outliner::Candidate &Candidate = RepeatedSequenceLocs[0]; 3047 auto CandidateInfo = analyzeCandidate(Candidate); 3048 if (!CandidateInfo) 3049 RepeatedSequenceLocs.clear(); 3050 3051 // If the sequence doesn't have enough candidates left, then we're done. 3052 if (RepeatedSequenceLocs.size() < MinRepeats) 3053 return std::nullopt; 3054 3055 unsigned InstrSizeCExt = 3056 Candidate.getMF()->getSubtarget<RISCVSubtarget>().hasStdExtCOrZca() ? 2 3057 : 4; 3058 unsigned CallOverhead = 0, FrameOverhead = 0; 3059 3060 MachineOutlinerConstructionID MOCI = CandidateInfo.value(); 3061 switch (MOCI) { 3062 case MachineOutlinerDefault: 3063 // call t0, function = 8 bytes. 3064 CallOverhead = 8; 3065 // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled. 3066 FrameOverhead = InstrSizeCExt; 3067 break; 3068 case MachineOutlinerTailCall: 3069 // tail call = auipc + jalr in the worst case without linker relaxation. 3070 CallOverhead = 4 + InstrSizeCExt; 3071 // Using tail call we move ret instruction from caller to callee. 3072 FrameOverhead = 0; 3073 break; 3074 } 3075 3076 for (auto &C : RepeatedSequenceLocs) 3077 C.setCallInfo(MOCI, CallOverhead); 3078 3079 unsigned SequenceSize = 0; 3080 for (auto &MI : Candidate) 3081 SequenceSize += getInstSizeInBytes(MI); 3082 3083 return std::make_unique<outliner::OutlinedFunction>( 3084 RepeatedSequenceLocs, SequenceSize, FrameOverhead, MOCI); 3085 } 3086 3087 outliner::InstrType 3088 RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI, 3089 MachineBasicBlock::iterator &MBBI, 3090 unsigned Flags) const { 3091 MachineInstr &MI = *MBBI; 3092 MachineBasicBlock *MBB = MI.getParent(); 3093 const TargetRegisterInfo *TRI = 3094 MBB->getParent()->getSubtarget().getRegisterInfo(); 3095 const auto &F = MI.getMF()->getFunction(); 3096 3097 // We can manually strip out CFI instructions later. 3098 if (MI.isCFIInstruction()) 3099 // If current function has exception handling code, we can't outline & 3100 // strip these CFI instructions since it may break .eh_frame section 3101 // needed in unwinding. 3102 return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal 3103 : outliner::InstrType::Invisible; 3104 3105 if (cannotInsertTailCall(*MBB) && 3106 (MI.isReturn() || isMIModifiesReg(MI, TRI, RISCV::X5))) 3107 return outliner::InstrType::Illegal; 3108 3109 // Make sure the operands don't reference something unsafe. 3110 for (const auto &MO : MI.operands()) { 3111 3112 // pcrel-hi and pcrel-lo can't put in separate sections, filter that out 3113 // if any possible. 3114 if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO && 3115 (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() || 3116 F.hasSection() || F.getSectionPrefix())) 3117 return outliner::InstrType::Illegal; 3118 } 3119 3120 return outliner::InstrType::Legal; 3121 } 3122 3123 void RISCVInstrInfo::buildOutlinedFrame( 3124 MachineBasicBlock &MBB, MachineFunction &MF, 3125 const outliner::OutlinedFunction &OF) const { 3126 3127 // Strip out any CFI instructions 3128 bool Changed = true; 3129 while (Changed) { 3130 Changed = false; 3131 auto I = MBB.begin(); 3132 auto E = MBB.end(); 3133 for (; I != E; ++I) { 3134 if (I->isCFIInstruction()) { 3135 I->removeFromParent(); 3136 Changed = true; 3137 break; 3138 } 3139 } 3140 } 3141 3142 if (OF.FrameConstructionID == MachineOutlinerTailCall) 3143 return; 3144 3145 MBB.addLiveIn(RISCV::X5); 3146 3147 // Add in a return instruction to the end of the outlined frame. 3148 MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR)) 3149 .addReg(RISCV::X0, RegState::Define) 3150 .addReg(RISCV::X5) 3151 .addImm(0)); 3152 } 3153 3154 MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall( 3155 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, 3156 MachineFunction &MF, outliner::Candidate &C) const { 3157 3158 if (C.CallConstructionID == MachineOutlinerTailCall) { 3159 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(RISCV::PseudoTAIL)) 3160 .addGlobalAddress(M.getNamedValue(MF.getName()), 3161 /*Offset=*/0, RISCVII::MO_CALL)); 3162 return It; 3163 } 3164 3165 // Add in a call instruction to the outlined function at the given location. 3166 It = MBB.insert(It, 3167 BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5) 3168 .addGlobalAddress(M.getNamedValue(MF.getName()), 0, 3169 RISCVII::MO_CALL)); 3170 return It; 3171 } 3172 3173 std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI, 3174 Register Reg) const { 3175 // TODO: Handle cases where Reg is a super- or sub-register of the 3176 // destination register. 3177 const MachineOperand &Op0 = MI.getOperand(0); 3178 if (!Op0.isReg() || Reg != Op0.getReg()) 3179 return std::nullopt; 3180 3181 // Don't consider ADDIW as a candidate because the caller may not be aware 3182 // of its sign extension behaviour. 3183 if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() && 3184 MI.getOperand(2).isImm()) 3185 return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()}; 3186 3187 return std::nullopt; 3188 } 3189 3190 // MIR printer helper function to annotate Operands with a comment. 3191 std::string RISCVInstrInfo::createMIROperandComment( 3192 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, 3193 const TargetRegisterInfo *TRI) const { 3194 // Print a generic comment for this operand if there is one. 3195 std::string GenericComment = 3196 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI); 3197 if (!GenericComment.empty()) 3198 return GenericComment; 3199 3200 // If not, we must have an immediate operand. 3201 if (!Op.isImm()) 3202 return std::string(); 3203 3204 const MCInstrDesc &Desc = MI.getDesc(); 3205 if (OpIdx >= Desc.getNumOperands()) 3206 return std::string(); 3207 3208 std::string Comment; 3209 raw_string_ostream OS(Comment); 3210 3211 const MCOperandInfo &OpInfo = Desc.operands()[OpIdx]; 3212 3213 // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW 3214 // operand of vector codegen pseudos. 3215 switch (OpInfo.OperandType) { 3216 case RISCVOp::OPERAND_VTYPEI10: 3217 case RISCVOp::OPERAND_VTYPEI11: { 3218 unsigned Imm = Op.getImm(); 3219 RISCVVType::printVType(Imm, OS); 3220 break; 3221 } 3222 case RISCVOp::OPERAND_SEW: 3223 case RISCVOp::OPERAND_SEW_MASK: { 3224 unsigned Log2SEW = Op.getImm(); 3225 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 3226 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 3227 OS << "e" << SEW; 3228 break; 3229 } 3230 case RISCVOp::OPERAND_VEC_POLICY: 3231 unsigned Policy = Op.getImm(); 3232 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 3233 "Invalid Policy Value"); 3234 OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", " 3235 << (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu"); 3236 break; 3237 } 3238 3239 return Comment; 3240 } 3241 3242 // clang-format off 3243 #define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \ 3244 RISCV::Pseudo##OP##_##LMUL 3245 3246 #define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \ 3247 RISCV::Pseudo##OP##_##LMUL##_MASK 3248 3249 #define CASE_RVV_OPCODE_LMUL(OP, LMUL) \ 3250 CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \ 3251 case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) 3252 3253 #define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \ 3254 CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \ 3255 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \ 3256 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \ 3257 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \ 3258 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \ 3259 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4) 3260 3261 #define CASE_RVV_OPCODE_UNMASK(OP) \ 3262 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \ 3263 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8) 3264 3265 #define CASE_RVV_OPCODE_MASK_WIDEN(OP) \ 3266 CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \ 3267 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \ 3268 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \ 3269 case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \ 3270 case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \ 3271 case CASE_RVV_OPCODE_MASK_LMUL(OP, M4) 3272 3273 #define CASE_RVV_OPCODE_MASK(OP) \ 3274 CASE_RVV_OPCODE_MASK_WIDEN(OP): \ 3275 case CASE_RVV_OPCODE_MASK_LMUL(OP, M8) 3276 3277 #define CASE_RVV_OPCODE_WIDEN(OP) \ 3278 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \ 3279 case CASE_RVV_OPCODE_MASK_WIDEN(OP) 3280 3281 #define CASE_RVV_OPCODE(OP) \ 3282 CASE_RVV_OPCODE_UNMASK(OP): \ 3283 case CASE_RVV_OPCODE_MASK(OP) 3284 // clang-format on 3285 3286 // clang-format off 3287 #define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \ 3288 RISCV::PseudoV##OP##_##TYPE##_##LMUL 3289 3290 #define CASE_VMA_OPCODE_LMULS_M1(OP, TYPE) \ 3291 CASE_VMA_OPCODE_COMMON(OP, TYPE, M1): \ 3292 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M2): \ 3293 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M4): \ 3294 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M8) 3295 3296 #define CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE) \ 3297 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF2): \ 3298 case CASE_VMA_OPCODE_LMULS_M1(OP, TYPE) 3299 3300 #define CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE) \ 3301 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF4): \ 3302 case CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE) 3303 3304 #define CASE_VMA_OPCODE_LMULS(OP, TYPE) \ 3305 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF8): \ 3306 case CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE) 3307 3308 // VFMA instructions are SEW specific. 3309 #define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL, SEW) \ 3310 RISCV::PseudoV##OP##_##TYPE##_##LMUL##_##SEW 3311 3312 #define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) \ 3313 CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1, SEW): \ 3314 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2, SEW): \ 3315 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4, SEW): \ 3316 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8, SEW) 3317 3318 #define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) \ 3319 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2, SEW): \ 3320 case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) 3321 3322 #define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE, SEW) \ 3323 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4, SEW): \ 3324 case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) 3325 3326 #define CASE_VFMA_OPCODE_VV(OP) \ 3327 CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \ 3328 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \ 3329 case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64) 3330 3331 #define CASE_VFMA_SPLATS(OP) \ 3332 CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \ 3333 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \ 3334 case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64) 3335 // clang-format on 3336 3337 bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, 3338 unsigned &SrcOpIdx1, 3339 unsigned &SrcOpIdx2) const { 3340 const MCInstrDesc &Desc = MI.getDesc(); 3341 if (!Desc.isCommutable()) 3342 return false; 3343 3344 switch (MI.getOpcode()) { 3345 case RISCV::TH_MVEQZ: 3346 case RISCV::TH_MVNEZ: 3347 // We can't commute operands if operand 2 (i.e., rs1 in 3348 // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is 3349 // not valid as the in/out-operand 1). 3350 if (MI.getOperand(2).getReg() == RISCV::X0) 3351 return false; 3352 // Operands 1 and 2 are commutable, if we switch the opcode. 3353 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2); 3354 case RISCV::TH_MULA: 3355 case RISCV::TH_MULAW: 3356 case RISCV::TH_MULAH: 3357 case RISCV::TH_MULS: 3358 case RISCV::TH_MULSW: 3359 case RISCV::TH_MULSH: 3360 // Operands 2 and 3 are commutable. 3361 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); 3362 case RISCV::PseudoCCMOVGPRNoX0: 3363 case RISCV::PseudoCCMOVGPR: 3364 // Operands 4 and 5 are commutable. 3365 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5); 3366 case CASE_RVV_OPCODE(VADD_VV): 3367 case CASE_RVV_OPCODE(VAND_VV): 3368 case CASE_RVV_OPCODE(VOR_VV): 3369 case CASE_RVV_OPCODE(VXOR_VV): 3370 case CASE_RVV_OPCODE_MASK(VMSEQ_VV): 3371 case CASE_RVV_OPCODE_MASK(VMSNE_VV): 3372 case CASE_RVV_OPCODE(VMIN_VV): 3373 case CASE_RVV_OPCODE(VMINU_VV): 3374 case CASE_RVV_OPCODE(VMAX_VV): 3375 case CASE_RVV_OPCODE(VMAXU_VV): 3376 case CASE_RVV_OPCODE(VMUL_VV): 3377 case CASE_RVV_OPCODE(VMULH_VV): 3378 case CASE_RVV_OPCODE(VMULHU_VV): 3379 case CASE_RVV_OPCODE_WIDEN(VWADD_VV): 3380 case CASE_RVV_OPCODE_WIDEN(VWADDU_VV): 3381 case CASE_RVV_OPCODE_WIDEN(VWMUL_VV): 3382 case CASE_RVV_OPCODE_WIDEN(VWMULU_VV): 3383 case CASE_RVV_OPCODE_WIDEN(VWMACC_VV): 3384 case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV): 3385 case CASE_RVV_OPCODE_UNMASK(VADC_VVM): 3386 case CASE_RVV_OPCODE(VSADD_VV): 3387 case CASE_RVV_OPCODE(VSADDU_VV): 3388 case CASE_RVV_OPCODE(VAADD_VV): 3389 case CASE_RVV_OPCODE(VAADDU_VV): 3390 case CASE_RVV_OPCODE(VSMUL_VV): 3391 // Operands 2 and 3 are commutable. 3392 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); 3393 case CASE_VFMA_SPLATS(FMADD): 3394 case CASE_VFMA_SPLATS(FMSUB): 3395 case CASE_VFMA_SPLATS(FMACC): 3396 case CASE_VFMA_SPLATS(FMSAC): 3397 case CASE_VFMA_SPLATS(FNMADD): 3398 case CASE_VFMA_SPLATS(FNMSUB): 3399 case CASE_VFMA_SPLATS(FNMACC): 3400 case CASE_VFMA_SPLATS(FNMSAC): 3401 case CASE_VFMA_OPCODE_VV(FMACC): 3402 case CASE_VFMA_OPCODE_VV(FMSAC): 3403 case CASE_VFMA_OPCODE_VV(FNMACC): 3404 case CASE_VFMA_OPCODE_VV(FNMSAC): 3405 case CASE_VMA_OPCODE_LMULS(MADD, VX): 3406 case CASE_VMA_OPCODE_LMULS(NMSUB, VX): 3407 case CASE_VMA_OPCODE_LMULS(MACC, VX): 3408 case CASE_VMA_OPCODE_LMULS(NMSAC, VX): 3409 case CASE_VMA_OPCODE_LMULS(MACC, VV): 3410 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): { 3411 // If the tail policy is undisturbed we can't commute. 3412 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags)); 3413 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0) 3414 return false; 3415 3416 // For these instructions we can only swap operand 1 and operand 3 by 3417 // changing the opcode. 3418 unsigned CommutableOpIdx1 = 1; 3419 unsigned CommutableOpIdx2 = 3; 3420 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1, 3421 CommutableOpIdx2)) 3422 return false; 3423 return true; 3424 } 3425 case CASE_VFMA_OPCODE_VV(FMADD): 3426 case CASE_VFMA_OPCODE_VV(FMSUB): 3427 case CASE_VFMA_OPCODE_VV(FNMADD): 3428 case CASE_VFMA_OPCODE_VV(FNMSUB): 3429 case CASE_VMA_OPCODE_LMULS(MADD, VV): 3430 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): { 3431 // If the tail policy is undisturbed we can't commute. 3432 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags)); 3433 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0) 3434 return false; 3435 3436 // For these instructions we have more freedom. We can commute with the 3437 // other multiplicand or with the addend/subtrahend/minuend. 3438 3439 // Any fixed operand must be from source 1, 2 or 3. 3440 if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3) 3441 return false; 3442 if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3) 3443 return false; 3444 3445 // It both ops are fixed one must be the tied source. 3446 if (SrcOpIdx1 != CommuteAnyOperandIndex && 3447 SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1) 3448 return false; 3449 3450 // Look for two different register operands assumed to be commutable 3451 // regardless of the FMA opcode. The FMA opcode is adjusted later if 3452 // needed. 3453 if (SrcOpIdx1 == CommuteAnyOperandIndex || 3454 SrcOpIdx2 == CommuteAnyOperandIndex) { 3455 // At least one of operands to be commuted is not specified and 3456 // this method is free to choose appropriate commutable operands. 3457 unsigned CommutableOpIdx1 = SrcOpIdx1; 3458 if (SrcOpIdx1 == SrcOpIdx2) { 3459 // Both of operands are not fixed. Set one of commutable 3460 // operands to the tied source. 3461 CommutableOpIdx1 = 1; 3462 } else if (SrcOpIdx1 == CommuteAnyOperandIndex) { 3463 // Only one of the operands is not fixed. 3464 CommutableOpIdx1 = SrcOpIdx2; 3465 } 3466 3467 // CommutableOpIdx1 is well defined now. Let's choose another commutable 3468 // operand and assign its index to CommutableOpIdx2. 3469 unsigned CommutableOpIdx2; 3470 if (CommutableOpIdx1 != 1) { 3471 // If we haven't already used the tied source, we must use it now. 3472 CommutableOpIdx2 = 1; 3473 } else { 3474 Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg(); 3475 3476 // The commuted operands should have different registers. 3477 // Otherwise, the commute transformation does not change anything and 3478 // is useless. We use this as a hint to make our decision. 3479 if (Op1Reg != MI.getOperand(2).getReg()) 3480 CommutableOpIdx2 = 2; 3481 else 3482 CommutableOpIdx2 = 3; 3483 } 3484 3485 // Assign the found pair of commutable indices to SrcOpIdx1 and 3486 // SrcOpIdx2 to return those values. 3487 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1, 3488 CommutableOpIdx2)) 3489 return false; 3490 } 3491 3492 return true; 3493 } 3494 } 3495 3496 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 3497 } 3498 3499 // clang-format off 3500 #define CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \ 3501 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \ 3502 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \ 3503 break; 3504 3505 #define CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \ 3506 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \ 3507 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \ 3508 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \ 3509 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8) 3510 3511 #define CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \ 3512 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \ 3513 CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) 3514 3515 #define CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \ 3516 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \ 3517 CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) 3518 3519 #define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \ 3520 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \ 3521 CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) 3522 3523 #define CASE_VMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \ 3524 CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \ 3525 CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \ 3526 CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64) 3527 3528 // VFMA depends on SEW. 3529 #define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL, SEW) \ 3530 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_##SEW: \ 3531 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_##SEW; \ 3532 break; 3533 3534 #define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) \ 3535 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1, SEW) \ 3536 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2, SEW) \ 3537 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4, SEW) \ 3538 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8, SEW) 3539 3540 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) \ 3541 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2, SEW) \ 3542 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) 3543 3544 #define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \ 3545 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \ 3546 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \ 3547 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64) 3548 3549 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \ 3550 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \ 3551 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) 3552 3553 #define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE, SEW) \ 3554 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8, SEW) \ 3555 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) 3556 3557 #define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \ 3558 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \ 3559 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \ 3560 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64) 3561 3562 MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, 3563 bool NewMI, 3564 unsigned OpIdx1, 3565 unsigned OpIdx2) const { 3566 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { 3567 if (NewMI) 3568 return *MI.getParent()->getParent()->CloneMachineInstr(&MI); 3569 return MI; 3570 }; 3571 3572 switch (MI.getOpcode()) { 3573 case RISCV::TH_MVEQZ: 3574 case RISCV::TH_MVNEZ: { 3575 auto &WorkingMI = cloneIfNew(MI); 3576 WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ 3577 : RISCV::TH_MVEQZ)); 3578 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1, 3579 OpIdx2); 3580 } 3581 case RISCV::PseudoCCMOVGPRNoX0: 3582 case RISCV::PseudoCCMOVGPR: { 3583 // CCMOV can be commuted by inverting the condition. 3584 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 3585 CC = RISCVCC::getOppositeBranchCondition(CC); 3586 auto &WorkingMI = cloneIfNew(MI); 3587 WorkingMI.getOperand(3).setImm(CC); 3588 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false, 3589 OpIdx1, OpIdx2); 3590 } 3591 case CASE_VFMA_SPLATS(FMACC): 3592 case CASE_VFMA_SPLATS(FMADD): 3593 case CASE_VFMA_SPLATS(FMSAC): 3594 case CASE_VFMA_SPLATS(FMSUB): 3595 case CASE_VFMA_SPLATS(FNMACC): 3596 case CASE_VFMA_SPLATS(FNMADD): 3597 case CASE_VFMA_SPLATS(FNMSAC): 3598 case CASE_VFMA_SPLATS(FNMSUB): 3599 case CASE_VFMA_OPCODE_VV(FMACC): 3600 case CASE_VFMA_OPCODE_VV(FMSAC): 3601 case CASE_VFMA_OPCODE_VV(FNMACC): 3602 case CASE_VFMA_OPCODE_VV(FNMSAC): 3603 case CASE_VMA_OPCODE_LMULS(MADD, VX): 3604 case CASE_VMA_OPCODE_LMULS(NMSUB, VX): 3605 case CASE_VMA_OPCODE_LMULS(MACC, VX): 3606 case CASE_VMA_OPCODE_LMULS(NMSAC, VX): 3607 case CASE_VMA_OPCODE_LMULS(MACC, VV): 3608 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): { 3609 // It only make sense to toggle these between clobbering the 3610 // addend/subtrahend/minuend one of the multiplicands. 3611 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); 3612 assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index"); 3613 unsigned Opc; 3614 switch (MI.getOpcode()) { 3615 default: 3616 llvm_unreachable("Unexpected opcode"); 3617 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD) 3618 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC) 3619 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB) 3620 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC) 3621 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD) 3622 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC) 3623 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB) 3624 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC) 3625 CASE_VFMA_CHANGE_OPCODE_VV(FMACC, FMADD) 3626 CASE_VFMA_CHANGE_OPCODE_VV(FMSAC, FMSUB) 3627 CASE_VFMA_CHANGE_OPCODE_VV(FNMACC, FNMADD) 3628 CASE_VFMA_CHANGE_OPCODE_VV(FNMSAC, FNMSUB) 3629 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX) 3630 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX) 3631 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX) 3632 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX) 3633 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV) 3634 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV) 3635 } 3636 3637 auto &WorkingMI = cloneIfNew(MI); 3638 WorkingMI.setDesc(get(Opc)); 3639 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 3640 OpIdx1, OpIdx2); 3641 } 3642 case CASE_VFMA_OPCODE_VV(FMADD): 3643 case CASE_VFMA_OPCODE_VV(FMSUB): 3644 case CASE_VFMA_OPCODE_VV(FNMADD): 3645 case CASE_VFMA_OPCODE_VV(FNMSUB): 3646 case CASE_VMA_OPCODE_LMULS(MADD, VV): 3647 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): { 3648 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); 3649 // If one of the operands, is the addend we need to change opcode. 3650 // Otherwise we're just swapping 2 of the multiplicands. 3651 if (OpIdx1 == 3 || OpIdx2 == 3) { 3652 unsigned Opc; 3653 switch (MI.getOpcode()) { 3654 default: 3655 llvm_unreachable("Unexpected opcode"); 3656 CASE_VFMA_CHANGE_OPCODE_VV(FMADD, FMACC) 3657 CASE_VFMA_CHANGE_OPCODE_VV(FMSUB, FMSAC) 3658 CASE_VFMA_CHANGE_OPCODE_VV(FNMADD, FNMACC) 3659 CASE_VFMA_CHANGE_OPCODE_VV(FNMSUB, FNMSAC) 3660 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV) 3661 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV) 3662 } 3663 3664 auto &WorkingMI = cloneIfNew(MI); 3665 WorkingMI.setDesc(get(Opc)); 3666 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 3667 OpIdx1, OpIdx2); 3668 } 3669 // Let the default code handle it. 3670 break; 3671 } 3672 } 3673 3674 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 3675 } 3676 3677 #undef CASE_RVV_OPCODE_UNMASK_LMUL 3678 #undef CASE_RVV_OPCODE_MASK_LMUL 3679 #undef CASE_RVV_OPCODE_LMUL 3680 #undef CASE_RVV_OPCODE_UNMASK_WIDEN 3681 #undef CASE_RVV_OPCODE_UNMASK 3682 #undef CASE_RVV_OPCODE_MASK_WIDEN 3683 #undef CASE_RVV_OPCODE_MASK 3684 #undef CASE_RVV_OPCODE_WIDEN 3685 #undef CASE_RVV_OPCODE 3686 3687 #undef CASE_VMA_OPCODE_COMMON 3688 #undef CASE_VMA_OPCODE_LMULS_M1 3689 #undef CASE_VMA_OPCODE_LMULS_MF2 3690 #undef CASE_VMA_OPCODE_LMULS_MF4 3691 #undef CASE_VMA_OPCODE_LMULS 3692 #undef CASE_VFMA_OPCODE_COMMON 3693 #undef CASE_VFMA_OPCODE_LMULS_M1 3694 #undef CASE_VFMA_OPCODE_LMULS_MF2 3695 #undef CASE_VFMA_OPCODE_LMULS_MF4 3696 #undef CASE_VFMA_OPCODE_VV 3697 #undef CASE_VFMA_SPLATS 3698 3699 // clang-format off 3700 #define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \ 3701 RISCV::PseudoV##OP##_##LMUL##_TIED 3702 3703 #define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \ 3704 CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \ 3705 case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \ 3706 case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \ 3707 case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \ 3708 case CASE_WIDEOP_OPCODE_COMMON(OP, M4) 3709 3710 #define CASE_WIDEOP_OPCODE_LMULS(OP) \ 3711 CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \ 3712 case CASE_WIDEOP_OPCODE_LMULS_MF4(OP) 3713 3714 #define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \ 3715 case RISCV::PseudoV##OP##_##LMUL##_TIED: \ 3716 NewOpc = RISCV::PseudoV##OP##_##LMUL; \ 3717 break; 3718 3719 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \ 3720 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \ 3721 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \ 3722 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \ 3723 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \ 3724 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4) 3725 3726 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \ 3727 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \ 3728 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) 3729 3730 // FP Widening Ops may by SEW aware. Create SEW aware cases for these cases. 3731 #define CASE_FP_WIDEOP_OPCODE_COMMON(OP, LMUL, SEW) \ 3732 RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED 3733 3734 #define CASE_FP_WIDEOP_OPCODE_LMULS_MF4(OP) \ 3735 CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \ 3736 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \ 3737 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E32): \ 3738 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \ 3739 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E32): \ 3740 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \ 3741 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E32): \ 3742 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16): \ 3743 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E32) \ 3744 3745 #define CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL, SEW) \ 3746 case RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED: \ 3747 NewOpc = RISCV::PseudoV##OP##_##LMUL##_##SEW; \ 3748 break; 3749 3750 #define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \ 3751 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \ 3752 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \ 3753 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E32) \ 3754 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \ 3755 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E32) \ 3756 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \ 3757 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \ 3758 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \ 3759 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \ 3760 3761 #define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP) \ 3762 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) 3763 // clang-format on 3764 3765 MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI, 3766 LiveVariables *LV, 3767 LiveIntervals *LIS) const { 3768 MachineInstrBuilder MIB; 3769 switch (MI.getOpcode()) { 3770 default: 3771 return nullptr; 3772 case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV): 3773 case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): { 3774 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) && 3775 MI.getNumExplicitOperands() == 7 && 3776 "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy"); 3777 // If the tail policy is undisturbed we can't convert. 3778 if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() & 3779 1) == 0) 3780 return nullptr; 3781 // clang-format off 3782 unsigned NewOpc; 3783 switch (MI.getOpcode()) { 3784 default: 3785 llvm_unreachable("Unexpected opcode"); 3786 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV) 3787 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV) 3788 } 3789 // clang-format on 3790 3791 MachineBasicBlock &MBB = *MI.getParent(); 3792 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 3793 .add(MI.getOperand(0)) 3794 .addReg(MI.getOperand(0).getReg(), RegState::Undef) 3795 .add(MI.getOperand(1)) 3796 .add(MI.getOperand(2)) 3797 .add(MI.getOperand(3)) 3798 .add(MI.getOperand(4)) 3799 .add(MI.getOperand(5)) 3800 .add(MI.getOperand(6)); 3801 break; 3802 } 3803 case CASE_WIDEOP_OPCODE_LMULS(WADD_WV): 3804 case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV): 3805 case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV): 3806 case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): { 3807 // If the tail policy is undisturbed we can't convert. 3808 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) && 3809 MI.getNumExplicitOperands() == 6); 3810 if ((MI.getOperand(5).getImm() & 1) == 0) 3811 return nullptr; 3812 3813 // clang-format off 3814 unsigned NewOpc; 3815 switch (MI.getOpcode()) { 3816 default: 3817 llvm_unreachable("Unexpected opcode"); 3818 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV) 3819 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV) 3820 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV) 3821 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV) 3822 } 3823 // clang-format on 3824 3825 MachineBasicBlock &MBB = *MI.getParent(); 3826 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 3827 .add(MI.getOperand(0)) 3828 .addReg(MI.getOperand(0).getReg(), RegState::Undef) 3829 .add(MI.getOperand(1)) 3830 .add(MI.getOperand(2)) 3831 .add(MI.getOperand(3)) 3832 .add(MI.getOperand(4)) 3833 .add(MI.getOperand(5)); 3834 break; 3835 } 3836 } 3837 MIB.copyImplicitOps(MI); 3838 3839 if (LV) { 3840 unsigned NumOps = MI.getNumOperands(); 3841 for (unsigned I = 1; I < NumOps; ++I) { 3842 MachineOperand &Op = MI.getOperand(I); 3843 if (Op.isReg() && Op.isKill()) 3844 LV->replaceKillInstruction(Op.getReg(), MI, *MIB); 3845 } 3846 } 3847 3848 if (LIS) { 3849 SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB); 3850 3851 if (MI.getOperand(0).isEarlyClobber()) { 3852 // Use operand 1 was tied to early-clobber def operand 0, so its live 3853 // interval could have ended at an early-clobber slot. Now they are not 3854 // tied we need to update it to the normal register slot. 3855 LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg()); 3856 LiveRange::Segment *S = LI.getSegmentContaining(Idx); 3857 if (S->end == Idx.getRegSlot(true)) 3858 S->end = Idx.getRegSlot(); 3859 } 3860 } 3861 3862 return MIB; 3863 } 3864 3865 #undef CASE_WIDEOP_OPCODE_COMMON 3866 #undef CASE_WIDEOP_OPCODE_LMULS_MF4 3867 #undef CASE_WIDEOP_OPCODE_LMULS 3868 #undef CASE_WIDEOP_CHANGE_OPCODE_COMMON 3869 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4 3870 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS 3871 #undef CASE_FP_WIDEOP_OPCODE_COMMON 3872 #undef CASE_FP_WIDEOP_OPCODE_LMULS_MF4 3873 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON 3874 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4 3875 #undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS 3876 3877 void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB, 3878 MachineBasicBlock::iterator II, const DebugLoc &DL, 3879 Register DestReg, uint32_t Amount, 3880 MachineInstr::MIFlag Flag) const { 3881 MachineRegisterInfo &MRI = MF.getRegInfo(); 3882 if (llvm::has_single_bit<uint32_t>(Amount)) { 3883 uint32_t ShiftAmount = Log2_32(Amount); 3884 if (ShiftAmount == 0) 3885 return; 3886 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3887 .addReg(DestReg, RegState::Kill) 3888 .addImm(ShiftAmount) 3889 .setMIFlag(Flag); 3890 } else if (STI.hasStdExtZba() && 3891 ((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) || 3892 (Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) || 3893 (Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) { 3894 // We can use Zba SHXADD+SLLI instructions for multiply in some cases. 3895 unsigned Opc; 3896 uint32_t ShiftAmount; 3897 if (Amount % 9 == 0) { 3898 Opc = RISCV::SH3ADD; 3899 ShiftAmount = Log2_64(Amount / 9); 3900 } else if (Amount % 5 == 0) { 3901 Opc = RISCV::SH2ADD; 3902 ShiftAmount = Log2_64(Amount / 5); 3903 } else if (Amount % 3 == 0) { 3904 Opc = RISCV::SH1ADD; 3905 ShiftAmount = Log2_64(Amount / 3); 3906 } else { 3907 llvm_unreachable("implied by if-clause"); 3908 } 3909 if (ShiftAmount) 3910 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3911 .addReg(DestReg, RegState::Kill) 3912 .addImm(ShiftAmount) 3913 .setMIFlag(Flag); 3914 BuildMI(MBB, II, DL, get(Opc), DestReg) 3915 .addReg(DestReg, RegState::Kill) 3916 .addReg(DestReg) 3917 .setMIFlag(Flag); 3918 } else if (llvm::has_single_bit<uint32_t>(Amount - 1)) { 3919 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3920 uint32_t ShiftAmount = Log2_32(Amount - 1); 3921 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) 3922 .addReg(DestReg) 3923 .addImm(ShiftAmount) 3924 .setMIFlag(Flag); 3925 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg) 3926 .addReg(ScaledRegister, RegState::Kill) 3927 .addReg(DestReg, RegState::Kill) 3928 .setMIFlag(Flag); 3929 } else if (llvm::has_single_bit<uint32_t>(Amount + 1)) { 3930 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3931 uint32_t ShiftAmount = Log2_32(Amount + 1); 3932 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) 3933 .addReg(DestReg) 3934 .addImm(ShiftAmount) 3935 .setMIFlag(Flag); 3936 BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg) 3937 .addReg(ScaledRegister, RegState::Kill) 3938 .addReg(DestReg, RegState::Kill) 3939 .setMIFlag(Flag); 3940 } else if (STI.hasStdExtZmmul()) { 3941 Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3942 movImm(MBB, II, DL, N, Amount, Flag); 3943 BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg) 3944 .addReg(DestReg, RegState::Kill) 3945 .addReg(N, RegState::Kill) 3946 .setMIFlag(Flag); 3947 } else { 3948 Register Acc; 3949 uint32_t PrevShiftAmount = 0; 3950 for (uint32_t ShiftAmount = 0; Amount >> ShiftAmount; ShiftAmount++) { 3951 if (Amount & (1U << ShiftAmount)) { 3952 if (ShiftAmount) 3953 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3954 .addReg(DestReg, RegState::Kill) 3955 .addImm(ShiftAmount - PrevShiftAmount) 3956 .setMIFlag(Flag); 3957 if (Amount >> (ShiftAmount + 1)) { 3958 // If we don't have an accmulator yet, create it and copy DestReg. 3959 if (!Acc) { 3960 Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3961 BuildMI(MBB, II, DL, get(TargetOpcode::COPY), Acc) 3962 .addReg(DestReg) 3963 .setMIFlag(Flag); 3964 } else { 3965 BuildMI(MBB, II, DL, get(RISCV::ADD), Acc) 3966 .addReg(Acc, RegState::Kill) 3967 .addReg(DestReg) 3968 .setMIFlag(Flag); 3969 } 3970 } 3971 PrevShiftAmount = ShiftAmount; 3972 } 3973 } 3974 assert(Acc && "Expected valid accumulator"); 3975 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg) 3976 .addReg(DestReg, RegState::Kill) 3977 .addReg(Acc, RegState::Kill) 3978 .setMIFlag(Flag); 3979 } 3980 } 3981 3982 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 3983 RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const { 3984 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 3985 {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"}, 3986 {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}}; 3987 return ArrayRef(TargetFlags); 3988 } 3989 3990 unsigned RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const { 3991 return OptLevel >= CodeGenOptLevel::Aggressive 3992 ? STI.getTailDupAggressiveThreshold() 3993 : 2; 3994 } 3995 3996 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0. 3997 bool RISCV::isSEXT_W(const MachineInstr &MI) { 3998 return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() && 3999 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0; 4000 } 4001 4002 // Returns true if this is the zext.w pattern, adduw rd, rs1, x0. 4003 bool RISCV::isZEXT_W(const MachineInstr &MI) { 4004 return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() && 4005 MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0; 4006 } 4007 4008 // Returns true if this is the zext.b pattern, andi rd, rs1, 255. 4009 bool RISCV::isZEXT_B(const MachineInstr &MI) { 4010 return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() && 4011 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255; 4012 } 4013 4014 static bool isRVVWholeLoadStore(unsigned Opcode) { 4015 switch (Opcode) { 4016 default: 4017 return false; 4018 case RISCV::VS1R_V: 4019 case RISCV::VS2R_V: 4020 case RISCV::VS4R_V: 4021 case RISCV::VS8R_V: 4022 case RISCV::VL1RE8_V: 4023 case RISCV::VL2RE8_V: 4024 case RISCV::VL4RE8_V: 4025 case RISCV::VL8RE8_V: 4026 case RISCV::VL1RE16_V: 4027 case RISCV::VL2RE16_V: 4028 case RISCV::VL4RE16_V: 4029 case RISCV::VL8RE16_V: 4030 case RISCV::VL1RE32_V: 4031 case RISCV::VL2RE32_V: 4032 case RISCV::VL4RE32_V: 4033 case RISCV::VL8RE32_V: 4034 case RISCV::VL1RE64_V: 4035 case RISCV::VL2RE64_V: 4036 case RISCV::VL4RE64_V: 4037 case RISCV::VL8RE64_V: 4038 return true; 4039 } 4040 } 4041 4042 bool RISCV::isRVVSpill(const MachineInstr &MI) { 4043 // RVV lacks any support for immediate addressing for stack addresses, so be 4044 // conservative. 4045 unsigned Opcode = MI.getOpcode(); 4046 if (!RISCVVPseudosTable::getPseudoInfo(Opcode) && 4047 !isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode)) 4048 return false; 4049 return true; 4050 } 4051 4052 std::optional<std::pair<unsigned, unsigned>> 4053 RISCV::isRVVSpillForZvlsseg(unsigned Opcode) { 4054 switch (Opcode) { 4055 default: 4056 return std::nullopt; 4057 case RISCV::PseudoVSPILL2_M1: 4058 case RISCV::PseudoVRELOAD2_M1: 4059 return std::make_pair(2u, 1u); 4060 case RISCV::PseudoVSPILL2_M2: 4061 case RISCV::PseudoVRELOAD2_M2: 4062 return std::make_pair(2u, 2u); 4063 case RISCV::PseudoVSPILL2_M4: 4064 case RISCV::PseudoVRELOAD2_M4: 4065 return std::make_pair(2u, 4u); 4066 case RISCV::PseudoVSPILL3_M1: 4067 case RISCV::PseudoVRELOAD3_M1: 4068 return std::make_pair(3u, 1u); 4069 case RISCV::PseudoVSPILL3_M2: 4070 case RISCV::PseudoVRELOAD3_M2: 4071 return std::make_pair(3u, 2u); 4072 case RISCV::PseudoVSPILL4_M1: 4073 case RISCV::PseudoVRELOAD4_M1: 4074 return std::make_pair(4u, 1u); 4075 case RISCV::PseudoVSPILL4_M2: 4076 case RISCV::PseudoVRELOAD4_M2: 4077 return std::make_pair(4u, 2u); 4078 case RISCV::PseudoVSPILL5_M1: 4079 case RISCV::PseudoVRELOAD5_M1: 4080 return std::make_pair(5u, 1u); 4081 case RISCV::PseudoVSPILL6_M1: 4082 case RISCV::PseudoVRELOAD6_M1: 4083 return std::make_pair(6u, 1u); 4084 case RISCV::PseudoVSPILL7_M1: 4085 case RISCV::PseudoVRELOAD7_M1: 4086 return std::make_pair(7u, 1u); 4087 case RISCV::PseudoVSPILL8_M1: 4088 case RISCV::PseudoVRELOAD8_M1: 4089 return std::make_pair(8u, 1u); 4090 } 4091 } 4092 4093 bool RISCV::isFaultFirstLoad(const MachineInstr &MI) { 4094 return MI.getNumExplicitDefs() == 2 && 4095 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) && !MI.isInlineAsm(); 4096 } 4097 4098 bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) { 4099 int16_t MI1FrmOpIdx = 4100 RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm); 4101 int16_t MI2FrmOpIdx = 4102 RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm); 4103 if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0) 4104 return false; 4105 MachineOperand FrmOp1 = MI1.getOperand(MI1FrmOpIdx); 4106 MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx); 4107 return FrmOp1.getImm() == FrmOp2.getImm(); 4108 } 4109 4110 std::optional<unsigned> 4111 RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) { 4112 // TODO: Handle Zvbb instructions 4113 switch (Opcode) { 4114 default: 4115 return std::nullopt; 4116 4117 // 11.6. Vector Single-Width Shift Instructions 4118 case RISCV::VSLL_VX: 4119 case RISCV::VSRL_VX: 4120 case RISCV::VSRA_VX: 4121 // 12.4. Vector Single-Width Scaling Shift Instructions 4122 case RISCV::VSSRL_VX: 4123 case RISCV::VSSRA_VX: 4124 // Only the low lg2(SEW) bits of the shift-amount value are used. 4125 return Log2SEW; 4126 4127 // 11.7 Vector Narrowing Integer Right Shift Instructions 4128 case RISCV::VNSRL_WX: 4129 case RISCV::VNSRA_WX: 4130 // 12.5. Vector Narrowing Fixed-Point Clip Instructions 4131 case RISCV::VNCLIPU_WX: 4132 case RISCV::VNCLIP_WX: 4133 // Only the low lg2(2*SEW) bits of the shift-amount value are used. 4134 return Log2SEW + 1; 4135 4136 // 11.1. Vector Single-Width Integer Add and Subtract 4137 case RISCV::VADD_VX: 4138 case RISCV::VSUB_VX: 4139 case RISCV::VRSUB_VX: 4140 // 11.2. Vector Widening Integer Add/Subtract 4141 case RISCV::VWADDU_VX: 4142 case RISCV::VWSUBU_VX: 4143 case RISCV::VWADD_VX: 4144 case RISCV::VWSUB_VX: 4145 case RISCV::VWADDU_WX: 4146 case RISCV::VWSUBU_WX: 4147 case RISCV::VWADD_WX: 4148 case RISCV::VWSUB_WX: 4149 // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions 4150 case RISCV::VADC_VXM: 4151 case RISCV::VADC_VIM: 4152 case RISCV::VMADC_VXM: 4153 case RISCV::VMADC_VIM: 4154 case RISCV::VMADC_VX: 4155 case RISCV::VSBC_VXM: 4156 case RISCV::VMSBC_VXM: 4157 case RISCV::VMSBC_VX: 4158 // 11.5 Vector Bitwise Logical Instructions 4159 case RISCV::VAND_VX: 4160 case RISCV::VOR_VX: 4161 case RISCV::VXOR_VX: 4162 // 11.8. Vector Integer Compare Instructions 4163 case RISCV::VMSEQ_VX: 4164 case RISCV::VMSNE_VX: 4165 case RISCV::VMSLTU_VX: 4166 case RISCV::VMSLT_VX: 4167 case RISCV::VMSLEU_VX: 4168 case RISCV::VMSLE_VX: 4169 case RISCV::VMSGTU_VX: 4170 case RISCV::VMSGT_VX: 4171 // 11.9. Vector Integer Min/Max Instructions 4172 case RISCV::VMINU_VX: 4173 case RISCV::VMIN_VX: 4174 case RISCV::VMAXU_VX: 4175 case RISCV::VMAX_VX: 4176 // 11.10. Vector Single-Width Integer Multiply Instructions 4177 case RISCV::VMUL_VX: 4178 case RISCV::VMULH_VX: 4179 case RISCV::VMULHU_VX: 4180 case RISCV::VMULHSU_VX: 4181 // 11.11. Vector Integer Divide Instructions 4182 case RISCV::VDIVU_VX: 4183 case RISCV::VDIV_VX: 4184 case RISCV::VREMU_VX: 4185 case RISCV::VREM_VX: 4186 // 11.12. Vector Widening Integer Multiply Instructions 4187 case RISCV::VWMUL_VX: 4188 case RISCV::VWMULU_VX: 4189 case RISCV::VWMULSU_VX: 4190 // 11.13. Vector Single-Width Integer Multiply-Add Instructions 4191 case RISCV::VMACC_VX: 4192 case RISCV::VNMSAC_VX: 4193 case RISCV::VMADD_VX: 4194 case RISCV::VNMSUB_VX: 4195 // 11.14. Vector Widening Integer Multiply-Add Instructions 4196 case RISCV::VWMACCU_VX: 4197 case RISCV::VWMACC_VX: 4198 case RISCV::VWMACCSU_VX: 4199 case RISCV::VWMACCUS_VX: 4200 // 11.15. Vector Integer Merge Instructions 4201 case RISCV::VMERGE_VXM: 4202 // 11.16. Vector Integer Move Instructions 4203 case RISCV::VMV_V_X: 4204 // 12.1. Vector Single-Width Saturating Add and Subtract 4205 case RISCV::VSADDU_VX: 4206 case RISCV::VSADD_VX: 4207 case RISCV::VSSUBU_VX: 4208 case RISCV::VSSUB_VX: 4209 // 12.2. Vector Single-Width Averaging Add and Subtract 4210 case RISCV::VAADDU_VX: 4211 case RISCV::VAADD_VX: 4212 case RISCV::VASUBU_VX: 4213 case RISCV::VASUB_VX: 4214 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation 4215 case RISCV::VSMUL_VX: 4216 // 16.1. Integer Scalar Move Instructions 4217 case RISCV::VMV_S_X: 4218 return 1U << Log2SEW; 4219 } 4220 } 4221 4222 unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) { 4223 const RISCVVPseudosTable::PseudoInfo *RVV = 4224 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode); 4225 if (!RVV) 4226 return 0; 4227 return RVV->BaseInstr; 4228 } 4229 4230 unsigned RISCV::getDestLog2EEW(const MCInstrDesc &Desc, unsigned Log2SEW) { 4231 unsigned DestEEW = 4232 (Desc.TSFlags & RISCVII::DestEEWMask) >> RISCVII::DestEEWShift; 4233 // EEW = 1 4234 if (DestEEW == 0) 4235 return 0; 4236 // EEW = SEW * n 4237 unsigned Scaled = Log2SEW + (DestEEW - 1); 4238 assert(Scaled >= 3 && Scaled <= 6); 4239 return Scaled; 4240 } 4241 4242 /// Given two VL operands, do we know that LHS <= RHS? 4243 bool RISCV::isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) { 4244 if (LHS.isReg() && RHS.isReg() && LHS.getReg().isVirtual() && 4245 LHS.getReg() == RHS.getReg()) 4246 return true; 4247 if (RHS.isImm() && RHS.getImm() == RISCV::VLMaxSentinel) 4248 return true; 4249 if (LHS.isImm() && LHS.getImm() == RISCV::VLMaxSentinel) 4250 return false; 4251 if (!LHS.isImm() || !RHS.isImm()) 4252 return false; 4253 return LHS.getImm() <= RHS.getImm(); 4254 } 4255 4256 namespace { 4257 class RISCVPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo { 4258 const MachineInstr *LHS; 4259 const MachineInstr *RHS; 4260 SmallVector<MachineOperand, 3> Cond; 4261 4262 public: 4263 RISCVPipelinerLoopInfo(const MachineInstr *LHS, const MachineInstr *RHS, 4264 const SmallVectorImpl<MachineOperand> &Cond) 4265 : LHS(LHS), RHS(RHS), Cond(Cond.begin(), Cond.end()) {} 4266 4267 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override { 4268 // Make the instructions for loop control be placed in stage 0. 4269 // The predecessors of LHS/RHS are considered by the caller. 4270 if (LHS && MI == LHS) 4271 return true; 4272 if (RHS && MI == RHS) 4273 return true; 4274 return false; 4275 } 4276 4277 std::optional<bool> createTripCountGreaterCondition( 4278 int TC, MachineBasicBlock &MBB, 4279 SmallVectorImpl<MachineOperand> &CondParam) override { 4280 // A branch instruction will be inserted as "if (Cond) goto epilogue". 4281 // Cond is normalized for such use. 4282 // The predecessors of the branch are assumed to have already been inserted. 4283 CondParam = Cond; 4284 return {}; 4285 } 4286 4287 void setPreheader(MachineBasicBlock *NewPreheader) override {} 4288 4289 void adjustTripCount(int TripCountAdjust) override {} 4290 }; 4291 } // namespace 4292 4293 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> 4294 RISCVInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const { 4295 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 4296 SmallVector<MachineOperand, 4> Cond; 4297 if (analyzeBranch(*LoopBB, TBB, FBB, Cond, /*AllowModify=*/false)) 4298 return nullptr; 4299 4300 // Infinite loops are not supported 4301 if (TBB == LoopBB && FBB == LoopBB) 4302 return nullptr; 4303 4304 // Must be conditional branch 4305 if (FBB == nullptr) 4306 return nullptr; 4307 4308 assert((TBB == LoopBB || FBB == LoopBB) && 4309 "The Loop must be a single-basic-block loop"); 4310 4311 // Normalization for createTripCountGreaterCondition() 4312 if (TBB == LoopBB) 4313 reverseBranchCondition(Cond); 4314 4315 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo(); 4316 auto FindRegDef = [&MRI](MachineOperand &Op) -> const MachineInstr * { 4317 if (!Op.isReg()) 4318 return nullptr; 4319 Register Reg = Op.getReg(); 4320 if (!Reg.isVirtual()) 4321 return nullptr; 4322 return MRI.getVRegDef(Reg); 4323 }; 4324 4325 const MachineInstr *LHS = FindRegDef(Cond[1]); 4326 const MachineInstr *RHS = FindRegDef(Cond[2]); 4327 if (LHS && LHS->isPHI()) 4328 return nullptr; 4329 if (RHS && RHS->isPHI()) 4330 return nullptr; 4331 4332 return std::make_unique<RISCVPipelinerLoopInfo>(LHS, RHS, Cond); 4333 } 4334