1 //===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the RISC-V implementation of the TargetInstrInfo class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVInstrInfo.h" 14 #include "MCTargetDesc/RISCVMatInt.h" 15 #include "RISCV.h" 16 #include "RISCVMachineFunctionInfo.h" 17 #include "RISCVSubtarget.h" 18 #include "RISCVTargetMachine.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/Analysis/MemoryLocation.h" 22 #include "llvm/Analysis/ValueTracking.h" 23 #include "llvm/CodeGen/LiveIntervals.h" 24 #include "llvm/CodeGen/LiveVariables.h" 25 #include "llvm/CodeGen/MachineCombinerPattern.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/MachineTraceMetrics.h" 30 #include "llvm/CodeGen/RegisterScavenging.h" 31 #include "llvm/CodeGen/StackMaps.h" 32 #include "llvm/IR/DebugInfoMetadata.h" 33 #include "llvm/MC/MCInstBuilder.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/ErrorHandling.h" 36 37 using namespace llvm; 38 39 #define GEN_CHECK_COMPRESS_INSTR 40 #include "RISCVGenCompressInstEmitter.inc" 41 42 #define GET_INSTRINFO_CTOR_DTOR 43 #define GET_INSTRINFO_NAMED_OPS 44 #include "RISCVGenInstrInfo.inc" 45 46 static cl::opt<bool> PreferWholeRegisterMove( 47 "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden, 48 cl::desc("Prefer whole register move for vector registers.")); 49 50 static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy( 51 "riscv-force-machine-combiner-strategy", cl::Hidden, 52 cl::desc("Force machine combiner to use a specific strategy for machine " 53 "trace metrics evaluation."), 54 cl::init(MachineTraceStrategy::TS_NumStrategies), 55 cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local", 56 "Local strategy."), 57 clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr", 58 "MinInstrCount strategy."))); 59 60 namespace llvm::RISCVVPseudosTable { 61 62 using namespace RISCV; 63 64 #define GET_RISCVVPseudosTable_IMPL 65 #include "RISCVGenSearchableTables.inc" 66 67 } // namespace llvm::RISCVVPseudosTable 68 69 RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI) 70 : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP), 71 STI(STI) {} 72 73 MCInst RISCVInstrInfo::getNop() const { 74 if (STI.hasStdExtCOrZca()) 75 return MCInstBuilder(RISCV::C_NOP); 76 return MCInstBuilder(RISCV::ADDI) 77 .addReg(RISCV::X0) 78 .addReg(RISCV::X0) 79 .addImm(0); 80 } 81 82 unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 83 int &FrameIndex) const { 84 unsigned Dummy; 85 return isLoadFromStackSlot(MI, FrameIndex, Dummy); 86 } 87 88 unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, 89 int &FrameIndex, 90 unsigned &MemBytes) const { 91 switch (MI.getOpcode()) { 92 default: 93 return 0; 94 case RISCV::LB: 95 case RISCV::LBU: 96 MemBytes = 1; 97 break; 98 case RISCV::LH: 99 case RISCV::LHU: 100 case RISCV::FLH: 101 MemBytes = 2; 102 break; 103 case RISCV::LW: 104 case RISCV::FLW: 105 case RISCV::LWU: 106 MemBytes = 4; 107 break; 108 case RISCV::LD: 109 case RISCV::FLD: 110 MemBytes = 8; 111 break; 112 } 113 114 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 115 MI.getOperand(2).getImm() == 0) { 116 FrameIndex = MI.getOperand(1).getIndex(); 117 return MI.getOperand(0).getReg(); 118 } 119 120 return 0; 121 } 122 123 unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 124 int &FrameIndex) const { 125 unsigned Dummy; 126 return isStoreToStackSlot(MI, FrameIndex, Dummy); 127 } 128 129 unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, 130 int &FrameIndex, 131 unsigned &MemBytes) const { 132 switch (MI.getOpcode()) { 133 default: 134 return 0; 135 case RISCV::SB: 136 MemBytes = 1; 137 break; 138 case RISCV::SH: 139 case RISCV::FSH: 140 MemBytes = 2; 141 break; 142 case RISCV::SW: 143 case RISCV::FSW: 144 MemBytes = 4; 145 break; 146 case RISCV::SD: 147 case RISCV::FSD: 148 MemBytes = 8; 149 break; 150 } 151 152 if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && 153 MI.getOperand(2).getImm() == 0) { 154 FrameIndex = MI.getOperand(1).getIndex(); 155 return MI.getOperand(0).getReg(); 156 } 157 158 return 0; 159 } 160 161 static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg, 162 unsigned NumRegs) { 163 return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs; 164 } 165 166 static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI, 167 const MachineBasicBlock &MBB, 168 MachineBasicBlock::const_iterator MBBI, 169 MachineBasicBlock::const_iterator &DefMBBI, 170 RISCVII::VLMUL LMul) { 171 if (PreferWholeRegisterMove) 172 return false; 173 174 assert(MBBI->getOpcode() == TargetOpcode::COPY && 175 "Unexpected COPY instruction."); 176 Register SrcReg = MBBI->getOperand(1).getReg(); 177 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 178 179 bool FoundDef = false; 180 bool FirstVSetVLI = false; 181 unsigned FirstSEW = 0; 182 while (MBBI != MBB.begin()) { 183 --MBBI; 184 if (MBBI->isMetaInstruction()) 185 continue; 186 187 if (MBBI->getOpcode() == RISCV::PseudoVSETVLI || 188 MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 || 189 MBBI->getOpcode() == RISCV::PseudoVSETIVLI) { 190 // There is a vsetvli between COPY and source define instruction. 191 // vy = def_vop ... (producing instruction) 192 // ... 193 // vsetvli 194 // ... 195 // vx = COPY vy 196 if (!FoundDef) { 197 if (!FirstVSetVLI) { 198 FirstVSetVLI = true; 199 unsigned FirstVType = MBBI->getOperand(2).getImm(); 200 RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType); 201 FirstSEW = RISCVVType::getSEW(FirstVType); 202 // The first encountered vsetvli must have the same lmul as the 203 // register class of COPY. 204 if (FirstLMul != LMul) 205 return false; 206 } 207 // Only permit `vsetvli x0, x0, vtype` between COPY and the source 208 // define instruction. 209 if (MBBI->getOperand(0).getReg() != RISCV::X0) 210 return false; 211 if (MBBI->getOperand(1).isImm()) 212 return false; 213 if (MBBI->getOperand(1).getReg() != RISCV::X0) 214 return false; 215 continue; 216 } 217 218 // MBBI is the first vsetvli before the producing instruction. 219 unsigned VType = MBBI->getOperand(2).getImm(); 220 // If there is a vsetvli between COPY and the producing instruction. 221 if (FirstVSetVLI) { 222 // If SEW is different, return false. 223 if (RISCVVType::getSEW(VType) != FirstSEW) 224 return false; 225 } 226 227 // If the vsetvli is tail undisturbed, keep the whole register move. 228 if (!RISCVVType::isTailAgnostic(VType)) 229 return false; 230 231 // The checking is conservative. We only have register classes for 232 // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v 233 // for fractional LMUL operations. However, we could not use the vsetvli 234 // lmul for widening operations. The result of widening operation is 235 // 2 x LMUL. 236 return LMul == RISCVVType::getVLMUL(VType); 237 } else if (MBBI->isInlineAsm() || MBBI->isCall()) { 238 return false; 239 } else if (MBBI->getNumDefs()) { 240 // Check all the instructions which will change VL. 241 // For example, vleff has implicit def VL. 242 if (MBBI->modifiesRegister(RISCV::VL)) 243 return false; 244 245 // Only converting whole register copies to vmv.v.v when the defining 246 // value appears in the explicit operands. 247 for (const MachineOperand &MO : MBBI->explicit_operands()) { 248 if (!MO.isReg() || !MO.isDef()) 249 continue; 250 if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) { 251 // We only permit the source of COPY has the same LMUL as the defined 252 // operand. 253 // There are cases we need to keep the whole register copy if the LMUL 254 // is different. 255 // For example, 256 // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m 257 // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2 258 // # The COPY may be created by vlmul_trunc intrinsic. 259 // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4 260 // 261 // After widening, the valid value will be 4 x e32 elements. If we 262 // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements. 263 // FIXME: The COPY of subregister of Zvlsseg register will not be able 264 // to convert to vmv.v.[v|i] under the constraint. 265 if (MO.getReg() != SrcReg) 266 return false; 267 268 // In widening reduction instructions with LMUL_1 input vector case, 269 // only checking the LMUL is insufficient due to reduction result is 270 // always LMUL_1. 271 // For example, 272 // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu 273 // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27 274 // $v26 = COPY killed renamable $v8 275 // After widening, The valid value will be 1 x e16 elements. If we 276 // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements. 277 uint64_t TSFlags = MBBI->getDesc().TSFlags; 278 if (RISCVII::isRVVWideningReduction(TSFlags)) 279 return false; 280 281 // If the producing instruction does not depend on vsetvli, do not 282 // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD. 283 if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags)) 284 return false; 285 286 // Found the definition. 287 FoundDef = true; 288 DefMBBI = MBBI; 289 break; 290 } 291 } 292 } 293 } 294 295 return false; 296 } 297 298 void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB, 299 MachineBasicBlock::iterator MBBI, 300 const DebugLoc &DL, MCRegister DstReg, 301 MCRegister SrcReg, bool KillSrc, 302 unsigned Opc, unsigned NF) const { 303 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 304 305 RISCVII::VLMUL LMul; 306 unsigned SubRegIdx; 307 unsigned VVOpc, VIOpc; 308 switch (Opc) { 309 default: 310 llvm_unreachable("Impossible LMUL for vector register copy."); 311 case RISCV::VMV1R_V: 312 LMul = RISCVII::LMUL_1; 313 SubRegIdx = RISCV::sub_vrm1_0; 314 VVOpc = RISCV::PseudoVMV_V_V_M1; 315 VIOpc = RISCV::PseudoVMV_V_I_M1; 316 break; 317 case RISCV::VMV2R_V: 318 LMul = RISCVII::LMUL_2; 319 SubRegIdx = RISCV::sub_vrm2_0; 320 VVOpc = RISCV::PseudoVMV_V_V_M2; 321 VIOpc = RISCV::PseudoVMV_V_I_M2; 322 break; 323 case RISCV::VMV4R_V: 324 LMul = RISCVII::LMUL_4; 325 SubRegIdx = RISCV::sub_vrm4_0; 326 VVOpc = RISCV::PseudoVMV_V_V_M4; 327 VIOpc = RISCV::PseudoVMV_V_I_M4; 328 break; 329 case RISCV::VMV8R_V: 330 assert(NF == 1); 331 LMul = RISCVII::LMUL_8; 332 SubRegIdx = RISCV::sub_vrm1_0; // There is no sub_vrm8_0. 333 VVOpc = RISCV::PseudoVMV_V_V_M8; 334 VIOpc = RISCV::PseudoVMV_V_I_M8; 335 break; 336 } 337 338 bool UseVMV_V_V = false; 339 bool UseVMV_V_I = false; 340 MachineBasicBlock::const_iterator DefMBBI; 341 if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) { 342 UseVMV_V_V = true; 343 Opc = VVOpc; 344 345 if (DefMBBI->getOpcode() == VIOpc) { 346 UseVMV_V_I = true; 347 Opc = VIOpc; 348 } 349 } 350 351 if (NF == 1) { 352 auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg); 353 if (UseVMV_V_V) 354 MIB.addReg(DstReg, RegState::Undef); 355 if (UseVMV_V_I) 356 MIB = MIB.add(DefMBBI->getOperand(2)); 357 else 358 MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc)); 359 if (UseVMV_V_V) { 360 const MCInstrDesc &Desc = DefMBBI->getDesc(); 361 MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL 362 MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW 363 MIB.addImm(0); // tu, mu 364 MIB.addReg(RISCV::VL, RegState::Implicit); 365 MIB.addReg(RISCV::VTYPE, RegState::Implicit); 366 } 367 return; 368 } 369 370 int I = 0, End = NF, Incr = 1; 371 unsigned SrcEncoding = TRI->getEncodingValue(SrcReg); 372 unsigned DstEncoding = TRI->getEncodingValue(DstReg); 373 unsigned LMulVal; 374 bool Fractional; 375 std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul); 376 assert(!Fractional && "It is impossible be fractional lmul here."); 377 if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) { 378 I = NF - 1; 379 End = -1; 380 Incr = -1; 381 } 382 383 for (; I != End; I += Incr) { 384 auto MIB = 385 BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I)); 386 if (UseVMV_V_V) 387 MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I), RegState::Undef); 388 if (UseVMV_V_I) 389 MIB = MIB.add(DefMBBI->getOperand(2)); 390 else 391 MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I), 392 getKillRegState(KillSrc)); 393 if (UseVMV_V_V) { 394 const MCInstrDesc &Desc = DefMBBI->getDesc(); 395 MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL 396 MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW 397 MIB.addImm(0); // tu, mu 398 MIB.addReg(RISCV::VL, RegState::Implicit); 399 MIB.addReg(RISCV::VTYPE, RegState::Implicit); 400 } 401 } 402 } 403 404 void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 405 MachineBasicBlock::iterator MBBI, 406 const DebugLoc &DL, MCRegister DstReg, 407 MCRegister SrcReg, bool KillSrc) const { 408 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 409 410 if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) { 411 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg) 412 .addReg(SrcReg, getKillRegState(KillSrc)) 413 .addImm(0); 414 return; 415 } 416 417 if (RISCV::GPRPF64RegClass.contains(DstReg, SrcReg)) { 418 // Emit an ADDI for both parts of GPRPF64. 419 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), 420 TRI->getSubReg(DstReg, RISCV::sub_32)) 421 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_32), getKillRegState(KillSrc)) 422 .addImm(0); 423 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), 424 TRI->getSubReg(DstReg, RISCV::sub_32_hi)) 425 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_32_hi), 426 getKillRegState(KillSrc)) 427 .addImm(0); 428 return; 429 } 430 431 // Handle copy from csr 432 if (RISCV::VCSRRegClass.contains(SrcReg) && 433 RISCV::GPRRegClass.contains(DstReg)) { 434 BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg) 435 .addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding) 436 .addReg(RISCV::X0); 437 return; 438 } 439 440 if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) { 441 unsigned Opc; 442 if (STI.hasStdExtZfh()) { 443 Opc = RISCV::FSGNJ_H; 444 } else { 445 assert(STI.hasStdExtF() && 446 (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) && 447 "Unexpected extensions"); 448 // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S. 449 DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16, 450 &RISCV::FPR32RegClass); 451 SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16, 452 &RISCV::FPR32RegClass); 453 Opc = RISCV::FSGNJ_S; 454 } 455 BuildMI(MBB, MBBI, DL, get(Opc), DstReg) 456 .addReg(SrcReg, getKillRegState(KillSrc)) 457 .addReg(SrcReg, getKillRegState(KillSrc)); 458 return; 459 } 460 461 if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) { 462 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg) 463 .addReg(SrcReg, getKillRegState(KillSrc)) 464 .addReg(SrcReg, getKillRegState(KillSrc)); 465 return; 466 } 467 468 if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) { 469 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg) 470 .addReg(SrcReg, getKillRegState(KillSrc)) 471 .addReg(SrcReg, getKillRegState(KillSrc)); 472 return; 473 } 474 475 if (RISCV::FPR32RegClass.contains(DstReg) && 476 RISCV::GPRRegClass.contains(SrcReg)) { 477 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg) 478 .addReg(SrcReg, getKillRegState(KillSrc)); 479 return; 480 } 481 482 if (RISCV::GPRRegClass.contains(DstReg) && 483 RISCV::FPR32RegClass.contains(SrcReg)) { 484 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg) 485 .addReg(SrcReg, getKillRegState(KillSrc)); 486 return; 487 } 488 489 if (RISCV::FPR64RegClass.contains(DstReg) && 490 RISCV::GPRRegClass.contains(SrcReg)) { 491 assert(STI.getXLen() == 64 && "Unexpected GPR size"); 492 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg) 493 .addReg(SrcReg, getKillRegState(KillSrc)); 494 return; 495 } 496 497 if (RISCV::GPRRegClass.contains(DstReg) && 498 RISCV::FPR64RegClass.contains(SrcReg)) { 499 assert(STI.getXLen() == 64 && "Unexpected GPR size"); 500 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg) 501 .addReg(SrcReg, getKillRegState(KillSrc)); 502 return; 503 } 504 505 // VR->VR copies. 506 if (RISCV::VRRegClass.contains(DstReg, SrcReg)) { 507 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V); 508 return; 509 } 510 511 if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) { 512 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V); 513 return; 514 } 515 516 if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) { 517 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V); 518 return; 519 } 520 521 if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) { 522 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV8R_V); 523 return; 524 } 525 526 if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) { 527 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 528 /*NF=*/2); 529 return; 530 } 531 532 if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) { 533 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, 534 /*NF=*/2); 535 return; 536 } 537 538 if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) { 539 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV4R_V, 540 /*NF=*/2); 541 return; 542 } 543 544 if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) { 545 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 546 /*NF=*/3); 547 return; 548 } 549 550 if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) { 551 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, 552 /*NF=*/3); 553 return; 554 } 555 556 if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) { 557 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 558 /*NF=*/4); 559 return; 560 } 561 562 if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) { 563 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV2R_V, 564 /*NF=*/4); 565 return; 566 } 567 568 if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) { 569 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 570 /*NF=*/5); 571 return; 572 } 573 574 if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) { 575 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 576 /*NF=*/6); 577 return; 578 } 579 580 if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) { 581 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 582 /*NF=*/7); 583 return; 584 } 585 586 if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) { 587 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCV::VMV1R_V, 588 /*NF=*/8); 589 return; 590 } 591 592 llvm_unreachable("Impossible reg-to-reg copy"); 593 } 594 595 void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 596 MachineBasicBlock::iterator I, 597 Register SrcReg, bool IsKill, int FI, 598 const TargetRegisterClass *RC, 599 const TargetRegisterInfo *TRI, 600 Register VReg) const { 601 MachineFunction *MF = MBB.getParent(); 602 MachineFrameInfo &MFI = MF->getFrameInfo(); 603 604 unsigned Opcode; 605 bool IsScalableVector = true; 606 if (RISCV::GPRRegClass.hasSubClassEq(RC)) { 607 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? 608 RISCV::SW : RISCV::SD; 609 IsScalableVector = false; 610 } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) { 611 Opcode = RISCV::PseudoRV32ZdinxSD; 612 IsScalableVector = false; 613 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { 614 Opcode = RISCV::FSH; 615 IsScalableVector = false; 616 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { 617 Opcode = RISCV::FSW; 618 IsScalableVector = false; 619 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { 620 Opcode = RISCV::FSD; 621 IsScalableVector = false; 622 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { 623 Opcode = RISCV::VS1R_V; 624 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { 625 Opcode = RISCV::VS2R_V; 626 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { 627 Opcode = RISCV::VS4R_V; 628 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { 629 Opcode = RISCV::VS8R_V; 630 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) 631 Opcode = RISCV::PseudoVSPILL2_M1; 632 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) 633 Opcode = RISCV::PseudoVSPILL2_M2; 634 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) 635 Opcode = RISCV::PseudoVSPILL2_M4; 636 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) 637 Opcode = RISCV::PseudoVSPILL3_M1; 638 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) 639 Opcode = RISCV::PseudoVSPILL3_M2; 640 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) 641 Opcode = RISCV::PseudoVSPILL4_M1; 642 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) 643 Opcode = RISCV::PseudoVSPILL4_M2; 644 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) 645 Opcode = RISCV::PseudoVSPILL5_M1; 646 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) 647 Opcode = RISCV::PseudoVSPILL6_M1; 648 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) 649 Opcode = RISCV::PseudoVSPILL7_M1; 650 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) 651 Opcode = RISCV::PseudoVSPILL8_M1; 652 else 653 llvm_unreachable("Can't store this register to stack slot"); 654 655 if (IsScalableVector) { 656 MachineMemOperand *MMO = MF->getMachineMemOperand( 657 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 658 MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); 659 660 MFI.setStackID(FI, TargetStackID::ScalableVector); 661 BuildMI(MBB, I, DebugLoc(), get(Opcode)) 662 .addReg(SrcReg, getKillRegState(IsKill)) 663 .addFrameIndex(FI) 664 .addMemOperand(MMO); 665 } else { 666 MachineMemOperand *MMO = MF->getMachineMemOperand( 667 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 668 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 669 670 BuildMI(MBB, I, DebugLoc(), get(Opcode)) 671 .addReg(SrcReg, getKillRegState(IsKill)) 672 .addFrameIndex(FI) 673 .addImm(0) 674 .addMemOperand(MMO); 675 } 676 } 677 678 void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 679 MachineBasicBlock::iterator I, 680 Register DstReg, int FI, 681 const TargetRegisterClass *RC, 682 const TargetRegisterInfo *TRI, 683 Register VReg) const { 684 MachineFunction *MF = MBB.getParent(); 685 MachineFrameInfo &MFI = MF->getFrameInfo(); 686 687 unsigned Opcode; 688 bool IsScalableVector = true; 689 if (RISCV::GPRRegClass.hasSubClassEq(RC)) { 690 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? 691 RISCV::LW : RISCV::LD; 692 IsScalableVector = false; 693 } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) { 694 Opcode = RISCV::PseudoRV32ZdinxLD; 695 IsScalableVector = false; 696 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { 697 Opcode = RISCV::FLH; 698 IsScalableVector = false; 699 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { 700 Opcode = RISCV::FLW; 701 IsScalableVector = false; 702 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { 703 Opcode = RISCV::FLD; 704 IsScalableVector = false; 705 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { 706 Opcode = RISCV::VL1RE8_V; 707 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { 708 Opcode = RISCV::VL2RE8_V; 709 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { 710 Opcode = RISCV::VL4RE8_V; 711 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) { 712 Opcode = RISCV::VL8RE8_V; 713 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC)) 714 Opcode = RISCV::PseudoVRELOAD2_M1; 715 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC)) 716 Opcode = RISCV::PseudoVRELOAD2_M2; 717 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC)) 718 Opcode = RISCV::PseudoVRELOAD2_M4; 719 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC)) 720 Opcode = RISCV::PseudoVRELOAD3_M1; 721 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC)) 722 Opcode = RISCV::PseudoVRELOAD3_M2; 723 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC)) 724 Opcode = RISCV::PseudoVRELOAD4_M1; 725 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC)) 726 Opcode = RISCV::PseudoVRELOAD4_M2; 727 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC)) 728 Opcode = RISCV::PseudoVRELOAD5_M1; 729 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC)) 730 Opcode = RISCV::PseudoVRELOAD6_M1; 731 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC)) 732 Opcode = RISCV::PseudoVRELOAD7_M1; 733 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC)) 734 Opcode = RISCV::PseudoVRELOAD8_M1; 735 else 736 llvm_unreachable("Can't load this register from stack slot"); 737 738 if (IsScalableVector) { 739 MachineMemOperand *MMO = MF->getMachineMemOperand( 740 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 741 MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); 742 743 MFI.setStackID(FI, TargetStackID::ScalableVector); 744 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg) 745 .addFrameIndex(FI) 746 .addMemOperand(MMO); 747 } else { 748 MachineMemOperand *MMO = MF->getMachineMemOperand( 749 MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 750 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 751 752 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg) 753 .addFrameIndex(FI) 754 .addImm(0) 755 .addMemOperand(MMO); 756 } 757 } 758 759 MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( 760 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, 761 MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, 762 VirtRegMap *VRM) const { 763 const MachineFrameInfo &MFI = MF.getFrameInfo(); 764 765 // The below optimizations narrow the load so they are only valid for little 766 // endian. 767 // TODO: Support big endian by adding an offset into the frame object? 768 if (MF.getDataLayout().isBigEndian()) 769 return nullptr; 770 771 // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w. 772 if (Ops.size() != 1 || Ops[0] != 1) 773 return nullptr; 774 775 unsigned LoadOpc; 776 switch (MI.getOpcode()) { 777 default: 778 if (RISCV::isSEXT_W(MI)) { 779 LoadOpc = RISCV::LW; 780 break; 781 } 782 if (RISCV::isZEXT_W(MI)) { 783 LoadOpc = RISCV::LWU; 784 break; 785 } 786 if (RISCV::isZEXT_B(MI)) { 787 LoadOpc = RISCV::LBU; 788 break; 789 } 790 return nullptr; 791 case RISCV::SEXT_H: 792 LoadOpc = RISCV::LH; 793 break; 794 case RISCV::SEXT_B: 795 LoadOpc = RISCV::LB; 796 break; 797 case RISCV::ZEXT_H_RV32: 798 case RISCV::ZEXT_H_RV64: 799 LoadOpc = RISCV::LHU; 800 break; 801 } 802 803 MachineMemOperand *MMO = MF.getMachineMemOperand( 804 MachinePointerInfo::getFixedStack(MF, FrameIndex), 805 MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), 806 MFI.getObjectAlign(FrameIndex)); 807 808 Register DstReg = MI.getOperand(0).getReg(); 809 return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc), 810 DstReg) 811 .addFrameIndex(FrameIndex) 812 .addImm(0) 813 .addMemOperand(MMO); 814 } 815 816 void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, 817 MachineBasicBlock::iterator MBBI, 818 const DebugLoc &DL, Register DstReg, uint64_t Val, 819 MachineInstr::MIFlag Flag, bool DstRenamable, 820 bool DstIsDead) const { 821 Register SrcReg = RISCV::X0; 822 823 if (!STI.is64Bit() && !isInt<32>(Val)) 824 report_fatal_error("Should only materialize 32-bit constants for RV32"); 825 826 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI); 827 assert(!Seq.empty()); 828 829 bool SrcRenamable = false; 830 unsigned Num = 0; 831 832 for (const RISCVMatInt::Inst &Inst : Seq) { 833 bool LastItem = ++Num == Seq.size(); 834 unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) | 835 getRenamableRegState(DstRenamable); 836 unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) | 837 getRenamableRegState(SrcRenamable); 838 switch (Inst.getOpndKind()) { 839 case RISCVMatInt::Imm: 840 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 841 .addReg(DstReg, RegState::Define | DstRegState) 842 .addImm(Inst.getImm()) 843 .setMIFlag(Flag); 844 break; 845 case RISCVMatInt::RegX0: 846 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 847 .addReg(DstReg, RegState::Define | DstRegState) 848 .addReg(SrcReg, SrcRegState) 849 .addReg(RISCV::X0) 850 .setMIFlag(Flag); 851 break; 852 case RISCVMatInt::RegReg: 853 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 854 .addReg(DstReg, RegState::Define | DstRegState) 855 .addReg(SrcReg, SrcRegState) 856 .addReg(SrcReg, SrcRegState) 857 .setMIFlag(Flag); 858 break; 859 case RISCVMatInt::RegImm: 860 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode())) 861 .addReg(DstReg, RegState::Define | DstRegState) 862 .addReg(SrcReg, SrcRegState) 863 .addImm(Inst.getImm()) 864 .setMIFlag(Flag); 865 break; 866 } 867 868 // Only the first instruction has X0 as its source. 869 SrcReg = DstReg; 870 SrcRenamable = DstRenamable; 871 } 872 } 873 874 static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) { 875 switch (Opc) { 876 default: 877 return RISCVCC::COND_INVALID; 878 case RISCV::BEQ: 879 return RISCVCC::COND_EQ; 880 case RISCV::BNE: 881 return RISCVCC::COND_NE; 882 case RISCV::BLT: 883 return RISCVCC::COND_LT; 884 case RISCV::BGE: 885 return RISCVCC::COND_GE; 886 case RISCV::BLTU: 887 return RISCVCC::COND_LTU; 888 case RISCV::BGEU: 889 return RISCVCC::COND_GEU; 890 } 891 } 892 893 // The contents of values added to Cond are not examined outside of 894 // RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we 895 // push BranchOpcode, Reg1, Reg2. 896 static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target, 897 SmallVectorImpl<MachineOperand> &Cond) { 898 // Block ends with fall-through condbranch. 899 assert(LastInst.getDesc().isConditionalBranch() && 900 "Unknown conditional branch"); 901 Target = LastInst.getOperand(2).getMBB(); 902 unsigned CC = getCondFromBranchOpc(LastInst.getOpcode()); 903 Cond.push_back(MachineOperand::CreateImm(CC)); 904 Cond.push_back(LastInst.getOperand(0)); 905 Cond.push_back(LastInst.getOperand(1)); 906 } 907 908 unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC) { 909 switch (CC) { 910 default: 911 llvm_unreachable("Unknown condition code!"); 912 case RISCVCC::COND_EQ: 913 return RISCV::BEQ; 914 case RISCVCC::COND_NE: 915 return RISCV::BNE; 916 case RISCVCC::COND_LT: 917 return RISCV::BLT; 918 case RISCVCC::COND_GE: 919 return RISCV::BGE; 920 case RISCVCC::COND_LTU: 921 return RISCV::BLTU; 922 case RISCVCC::COND_GEU: 923 return RISCV::BGEU; 924 } 925 } 926 927 const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const { 928 return get(RISCVCC::getBrCond(CC)); 929 } 930 931 RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) { 932 switch (CC) { 933 default: 934 llvm_unreachable("Unrecognized conditional branch"); 935 case RISCVCC::COND_EQ: 936 return RISCVCC::COND_NE; 937 case RISCVCC::COND_NE: 938 return RISCVCC::COND_EQ; 939 case RISCVCC::COND_LT: 940 return RISCVCC::COND_GE; 941 case RISCVCC::COND_GE: 942 return RISCVCC::COND_LT; 943 case RISCVCC::COND_LTU: 944 return RISCVCC::COND_GEU; 945 case RISCVCC::COND_GEU: 946 return RISCVCC::COND_LTU; 947 } 948 } 949 950 bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, 951 MachineBasicBlock *&TBB, 952 MachineBasicBlock *&FBB, 953 SmallVectorImpl<MachineOperand> &Cond, 954 bool AllowModify) const { 955 TBB = FBB = nullptr; 956 Cond.clear(); 957 958 // If the block has no terminators, it just falls into the block after it. 959 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 960 if (I == MBB.end() || !isUnpredicatedTerminator(*I)) 961 return false; 962 963 // Count the number of terminators and find the first unconditional or 964 // indirect branch. 965 MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end(); 966 int NumTerminators = 0; 967 for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J); 968 J++) { 969 NumTerminators++; 970 if (J->getDesc().isUnconditionalBranch() || 971 J->getDesc().isIndirectBranch()) { 972 FirstUncondOrIndirectBr = J.getReverse(); 973 } 974 } 975 976 // If AllowModify is true, we can erase any terminators after 977 // FirstUncondOrIndirectBR. 978 if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) { 979 while (std::next(FirstUncondOrIndirectBr) != MBB.end()) { 980 std::next(FirstUncondOrIndirectBr)->eraseFromParent(); 981 NumTerminators--; 982 } 983 I = FirstUncondOrIndirectBr; 984 } 985 986 // We can't handle blocks that end in an indirect branch. 987 if (I->getDesc().isIndirectBranch()) 988 return true; 989 990 // We can't handle Generic branch opcodes from Global ISel. 991 if (I->isPreISelOpcode()) 992 return true; 993 994 // We can't handle blocks with more than 2 terminators. 995 if (NumTerminators > 2) 996 return true; 997 998 // Handle a single unconditional branch. 999 if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) { 1000 TBB = getBranchDestBlock(*I); 1001 return false; 1002 } 1003 1004 // Handle a single conditional branch. 1005 if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) { 1006 parseCondBranch(*I, TBB, Cond); 1007 return false; 1008 } 1009 1010 // Handle a conditional branch followed by an unconditional branch. 1011 if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() && 1012 I->getDesc().isUnconditionalBranch()) { 1013 parseCondBranch(*std::prev(I), TBB, Cond); 1014 FBB = getBranchDestBlock(*I); 1015 return false; 1016 } 1017 1018 // Otherwise, we can't handle this. 1019 return true; 1020 } 1021 1022 unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB, 1023 int *BytesRemoved) const { 1024 if (BytesRemoved) 1025 *BytesRemoved = 0; 1026 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); 1027 if (I == MBB.end()) 1028 return 0; 1029 1030 if (!I->getDesc().isUnconditionalBranch() && 1031 !I->getDesc().isConditionalBranch()) 1032 return 0; 1033 1034 // Remove the branch. 1035 if (BytesRemoved) 1036 *BytesRemoved += getInstSizeInBytes(*I); 1037 I->eraseFromParent(); 1038 1039 I = MBB.end(); 1040 1041 if (I == MBB.begin()) 1042 return 1; 1043 --I; 1044 if (!I->getDesc().isConditionalBranch()) 1045 return 1; 1046 1047 // Remove the branch. 1048 if (BytesRemoved) 1049 *BytesRemoved += getInstSizeInBytes(*I); 1050 I->eraseFromParent(); 1051 return 2; 1052 } 1053 1054 // Inserts a branch into the end of the specific MachineBasicBlock, returning 1055 // the number of instructions inserted. 1056 unsigned RISCVInstrInfo::insertBranch( 1057 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, 1058 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { 1059 if (BytesAdded) 1060 *BytesAdded = 0; 1061 1062 // Shouldn't be a fall through. 1063 assert(TBB && "insertBranch must not be told to insert a fallthrough"); 1064 assert((Cond.size() == 3 || Cond.size() == 0) && 1065 "RISC-V branch conditions have two components!"); 1066 1067 // Unconditional branch. 1068 if (Cond.empty()) { 1069 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB); 1070 if (BytesAdded) 1071 *BytesAdded += getInstSizeInBytes(MI); 1072 return 1; 1073 } 1074 1075 // Either a one or two-way conditional branch. 1076 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1077 MachineInstr &CondMI = 1078 *BuildMI(&MBB, DL, getBrCond(CC)).add(Cond[1]).add(Cond[2]).addMBB(TBB); 1079 if (BytesAdded) 1080 *BytesAdded += getInstSizeInBytes(CondMI); 1081 1082 // One-way conditional branch. 1083 if (!FBB) 1084 return 1; 1085 1086 // Two-way conditional branch. 1087 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB); 1088 if (BytesAdded) 1089 *BytesAdded += getInstSizeInBytes(MI); 1090 return 2; 1091 } 1092 1093 void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, 1094 MachineBasicBlock &DestBB, 1095 MachineBasicBlock &RestoreBB, 1096 const DebugLoc &DL, int64_t BrOffset, 1097 RegScavenger *RS) const { 1098 assert(RS && "RegScavenger required for long branching"); 1099 assert(MBB.empty() && 1100 "new block should be inserted for expanding unconditional branch"); 1101 assert(MBB.pred_size() == 1); 1102 assert(RestoreBB.empty() && 1103 "restore block should be inserted for restoring clobbered registers"); 1104 1105 MachineFunction *MF = MBB.getParent(); 1106 MachineRegisterInfo &MRI = MF->getRegInfo(); 1107 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>(); 1108 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 1109 1110 if (!isInt<32>(BrOffset)) 1111 report_fatal_error( 1112 "Branch offsets outside of the signed 32-bit range not supported"); 1113 1114 // FIXME: A virtual register must be used initially, as the register 1115 // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch 1116 // uses the same workaround). 1117 Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); 1118 auto II = MBB.end(); 1119 // We may also update the jump target to RestoreBB later. 1120 MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump)) 1121 .addReg(ScratchReg, RegState::Define | RegState::Dead) 1122 .addMBB(&DestBB, RISCVII::MO_CALL); 1123 1124 RS->enterBasicBlockEnd(MBB); 1125 Register TmpGPR = 1126 RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(), 1127 /*RestoreAfter=*/false, /*SpAdj=*/0, 1128 /*AllowSpill=*/false); 1129 if (TmpGPR != RISCV::NoRegister) 1130 RS->setRegUsed(TmpGPR); 1131 else { 1132 // The case when there is no scavenged register needs special handling. 1133 1134 // Pick s11 because it doesn't make a difference. 1135 TmpGPR = RISCV::X27; 1136 1137 int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex(); 1138 if (FrameIndex == -1) 1139 report_fatal_error("underestimated function size"); 1140 1141 storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex, 1142 &RISCV::GPRRegClass, TRI, Register()); 1143 TRI->eliminateFrameIndex(std::prev(MI.getIterator()), 1144 /*SpAdj=*/0, /*FIOperandNum=*/1); 1145 1146 MI.getOperand(1).setMBB(&RestoreBB); 1147 1148 loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex, 1149 &RISCV::GPRRegClass, TRI, Register()); 1150 TRI->eliminateFrameIndex(RestoreBB.back(), 1151 /*SpAdj=*/0, /*FIOperandNum=*/1); 1152 } 1153 1154 MRI.replaceRegWith(ScratchReg, TmpGPR); 1155 MRI.clearVirtRegs(); 1156 } 1157 1158 bool RISCVInstrInfo::reverseBranchCondition( 1159 SmallVectorImpl<MachineOperand> &Cond) const { 1160 assert((Cond.size() == 3) && "Invalid branch condition!"); 1161 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1162 Cond[0].setImm(getOppositeBranchCondition(CC)); 1163 return false; 1164 } 1165 1166 bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const { 1167 MachineBasicBlock *MBB = MI.getParent(); 1168 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1169 1170 MachineBasicBlock *TBB, *FBB; 1171 SmallVector<MachineOperand, 3> Cond; 1172 if (analyzeBranch(*MBB, TBB, FBB, Cond, /*AllowModify=*/false)) 1173 return false; 1174 (void)FBB; 1175 1176 RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm()); 1177 assert(CC != RISCVCC::COND_INVALID); 1178 1179 if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE) 1180 return false; 1181 1182 // For two constants C0 and C1 from 1183 // ``` 1184 // li Y, C0 1185 // li Z, C1 1186 // ``` 1187 // 1. if C1 = C0 + 1 1188 // we can turn: 1189 // (a) blt Y, X -> bge X, Z 1190 // (b) bge Y, X -> blt X, Z 1191 // 1192 // 2. if C1 = C0 - 1 1193 // we can turn: 1194 // (a) blt X, Y -> bge Z, X 1195 // (b) bge X, Y -> blt Z, X 1196 // 1197 // To make sure this optimization is really beneficial, we only 1198 // optimize for cases where Y had only one use (i.e. only used by the branch). 1199 1200 // Right now we only care about LI (i.e. ADDI x0, imm) 1201 auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool { 1202 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() && 1203 MI->getOperand(1).getReg() == RISCV::X0) { 1204 Imm = MI->getOperand(2).getImm(); 1205 return true; 1206 } 1207 return false; 1208 }; 1209 // Either a load from immediate instruction or X0. 1210 auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool { 1211 if (!Op.isReg()) 1212 return false; 1213 Register Reg = Op.getReg(); 1214 if (Reg == RISCV::X0) { 1215 Imm = 0; 1216 return true; 1217 } 1218 if (!Reg.isVirtual()) 1219 return false; 1220 return isLoadImm(MRI.getVRegDef(Op.getReg()), Imm); 1221 }; 1222 1223 MachineOperand &LHS = MI.getOperand(0); 1224 MachineOperand &RHS = MI.getOperand(1); 1225 // Try to find the register for constant Z; return 1226 // invalid register otherwise. 1227 auto searchConst = [&](int64_t C1) -> Register { 1228 MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend(); 1229 auto DefC1 = std::find_if(++II, E, [&](const MachineInstr &I) -> bool { 1230 int64_t Imm; 1231 return isLoadImm(&I, Imm) && Imm == C1; 1232 }); 1233 if (DefC1 != E) 1234 return DefC1->getOperand(0).getReg(); 1235 1236 return Register(); 1237 }; 1238 1239 bool Modify = false; 1240 int64_t C0; 1241 if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(LHS.getReg())) { 1242 // Might be case 1. 1243 // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need 1244 // to worry about unsigned overflow here) 1245 if (C0 < INT64_MAX) 1246 if (Register RegZ = searchConst(C0 + 1)) { 1247 reverseBranchCondition(Cond); 1248 Cond[1] = MachineOperand::CreateReg(RHS.getReg(), /*isDef=*/false); 1249 Cond[2] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); 1250 // We might extend the live range of Z, clear its kill flag to 1251 // account for this. 1252 MRI.clearKillFlags(RegZ); 1253 Modify = true; 1254 } 1255 } else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RHS.getReg())) { 1256 // Might be case 2. 1257 // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX 1258 // when C0 is zero. 1259 if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0) 1260 if (Register RegZ = searchConst(C0 - 1)) { 1261 reverseBranchCondition(Cond); 1262 Cond[1] = MachineOperand::CreateReg(RegZ, /*isDef=*/false); 1263 Cond[2] = MachineOperand::CreateReg(LHS.getReg(), /*isDef=*/false); 1264 // We might extend the live range of Z, clear its kill flag to 1265 // account for this. 1266 MRI.clearKillFlags(RegZ); 1267 Modify = true; 1268 } 1269 } 1270 1271 if (!Modify) 1272 return false; 1273 1274 // Build the new branch and remove the old one. 1275 BuildMI(*MBB, MI, MI.getDebugLoc(), 1276 getBrCond(static_cast<RISCVCC::CondCode>(Cond[0].getImm()))) 1277 .add(Cond[1]) 1278 .add(Cond[2]) 1279 .addMBB(TBB); 1280 MI.eraseFromParent(); 1281 1282 return true; 1283 } 1284 1285 MachineBasicBlock * 1286 RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { 1287 assert(MI.getDesc().isBranch() && "Unexpected opcode!"); 1288 // The branch target is always the last operand. 1289 int NumOp = MI.getNumExplicitOperands(); 1290 return MI.getOperand(NumOp - 1).getMBB(); 1291 } 1292 1293 bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp, 1294 int64_t BrOffset) const { 1295 unsigned XLen = STI.getXLen(); 1296 // Ideally we could determine the supported branch offset from the 1297 // RISCVII::FormMask, but this can't be used for Pseudo instructions like 1298 // PseudoBR. 1299 switch (BranchOp) { 1300 default: 1301 llvm_unreachable("Unexpected opcode!"); 1302 case RISCV::BEQ: 1303 case RISCV::BNE: 1304 case RISCV::BLT: 1305 case RISCV::BGE: 1306 case RISCV::BLTU: 1307 case RISCV::BGEU: 1308 return isIntN(13, BrOffset); 1309 case RISCV::JAL: 1310 case RISCV::PseudoBR: 1311 return isIntN(21, BrOffset); 1312 case RISCV::PseudoJump: 1313 return isIntN(32, SignExtend64(BrOffset + 0x800, XLen)); 1314 } 1315 } 1316 1317 // If the operation has a predicated pseudo instruction, return the pseudo 1318 // instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END. 1319 // TODO: Support more operations. 1320 unsigned getPredicatedOpcode(unsigned Opcode) { 1321 switch (Opcode) { 1322 case RISCV::ADD: return RISCV::PseudoCCADD; break; 1323 case RISCV::SUB: return RISCV::PseudoCCSUB; break; 1324 case RISCV::SLL: return RISCV::PseudoCCSLL; break; 1325 case RISCV::SRL: return RISCV::PseudoCCSRL; break; 1326 case RISCV::SRA: return RISCV::PseudoCCSRA; break; 1327 case RISCV::AND: return RISCV::PseudoCCAND; break; 1328 case RISCV::OR: return RISCV::PseudoCCOR; break; 1329 case RISCV::XOR: return RISCV::PseudoCCXOR; break; 1330 1331 case RISCV::ADDI: return RISCV::PseudoCCADDI; break; 1332 case RISCV::SLLI: return RISCV::PseudoCCSLLI; break; 1333 case RISCV::SRLI: return RISCV::PseudoCCSRLI; break; 1334 case RISCV::SRAI: return RISCV::PseudoCCSRAI; break; 1335 case RISCV::ANDI: return RISCV::PseudoCCANDI; break; 1336 case RISCV::ORI: return RISCV::PseudoCCORI; break; 1337 case RISCV::XORI: return RISCV::PseudoCCXORI; break; 1338 1339 case RISCV::ADDW: return RISCV::PseudoCCADDW; break; 1340 case RISCV::SUBW: return RISCV::PseudoCCSUBW; break; 1341 case RISCV::SLLW: return RISCV::PseudoCCSLLW; break; 1342 case RISCV::SRLW: return RISCV::PseudoCCSRLW; break; 1343 case RISCV::SRAW: return RISCV::PseudoCCSRAW; break; 1344 1345 case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break; 1346 case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break; 1347 case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break; 1348 case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break; 1349 } 1350 1351 return RISCV::INSTRUCTION_LIST_END; 1352 } 1353 1354 /// Identify instructions that can be folded into a CCMOV instruction, and 1355 /// return the defining instruction. 1356 static MachineInstr *canFoldAsPredicatedOp(Register Reg, 1357 const MachineRegisterInfo &MRI, 1358 const TargetInstrInfo *TII) { 1359 if (!Reg.isVirtual()) 1360 return nullptr; 1361 if (!MRI.hasOneNonDBGUse(Reg)) 1362 return nullptr; 1363 MachineInstr *MI = MRI.getVRegDef(Reg); 1364 if (!MI) 1365 return nullptr; 1366 // Check if MI can be predicated and folded into the CCMOV. 1367 if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END) 1368 return nullptr; 1369 // Don't predicate li idiom. 1370 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() && 1371 MI->getOperand(1).getReg() == RISCV::X0) 1372 return nullptr; 1373 // Check if MI has any other defs or physreg uses. 1374 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) { 1375 // Reject frame index operands, PEI can't handle the predicated pseudos. 1376 if (MO.isFI() || MO.isCPI() || MO.isJTI()) 1377 return nullptr; 1378 if (!MO.isReg()) 1379 continue; 1380 // MI can't have any tied operands, that would conflict with predication. 1381 if (MO.isTied()) 1382 return nullptr; 1383 if (MO.isDef()) 1384 return nullptr; 1385 // Allow constant physregs. 1386 if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(MO.getReg())) 1387 return nullptr; 1388 } 1389 bool DontMoveAcrossStores = true; 1390 if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores)) 1391 return nullptr; 1392 return MI; 1393 } 1394 1395 bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI, 1396 SmallVectorImpl<MachineOperand> &Cond, 1397 unsigned &TrueOp, unsigned &FalseOp, 1398 bool &Optimizable) const { 1399 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR && 1400 "Unknown select instruction"); 1401 // CCMOV operands: 1402 // 0: Def. 1403 // 1: LHS of compare. 1404 // 2: RHS of compare. 1405 // 3: Condition code. 1406 // 4: False use. 1407 // 5: True use. 1408 TrueOp = 5; 1409 FalseOp = 4; 1410 Cond.push_back(MI.getOperand(1)); 1411 Cond.push_back(MI.getOperand(2)); 1412 Cond.push_back(MI.getOperand(3)); 1413 // We can only fold when we support short forward branch opt. 1414 Optimizable = STI.hasShortForwardBranchOpt(); 1415 return false; 1416 } 1417 1418 MachineInstr * 1419 RISCVInstrInfo::optimizeSelect(MachineInstr &MI, 1420 SmallPtrSetImpl<MachineInstr *> &SeenMIs, 1421 bool PreferFalse) const { 1422 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR && 1423 "Unknown select instruction"); 1424 if (!STI.hasShortForwardBranchOpt()) 1425 return nullptr; 1426 1427 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1428 MachineInstr *DefMI = 1429 canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this); 1430 bool Invert = !DefMI; 1431 if (!DefMI) 1432 DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this); 1433 if (!DefMI) 1434 return nullptr; 1435 1436 // Find new register class to use. 1437 MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4); 1438 Register DestReg = MI.getOperand(0).getReg(); 1439 const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); 1440 if (!MRI.constrainRegClass(DestReg, PreviousClass)) 1441 return nullptr; 1442 1443 unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode()); 1444 assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!"); 1445 1446 // Create a new predicated version of DefMI. 1447 MachineInstrBuilder NewMI = 1448 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg); 1449 1450 // Copy the condition portion. 1451 NewMI.add(MI.getOperand(1)); 1452 NewMI.add(MI.getOperand(2)); 1453 1454 // Add condition code, inverting if necessary. 1455 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 1456 if (Invert) 1457 CC = RISCVCC::getOppositeBranchCondition(CC); 1458 NewMI.addImm(CC); 1459 1460 // Copy the false register. 1461 NewMI.add(FalseReg); 1462 1463 // Copy all the DefMI operands. 1464 const MCInstrDesc &DefDesc = DefMI->getDesc(); 1465 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i) 1466 NewMI.add(DefMI->getOperand(i)); 1467 1468 // Update SeenMIs set: register newly created MI and erase removed DefMI. 1469 SeenMIs.insert(NewMI); 1470 SeenMIs.erase(DefMI); 1471 1472 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on 1473 // DefMI would be invalid when tranferred inside the loop. Checking for a 1474 // loop is expensive, but at least remove kill flags if they are in different 1475 // BBs. 1476 if (DefMI->getParent() != MI.getParent()) 1477 NewMI->clearKillInfo(); 1478 1479 // The caller will erase MI, but not DefMI. 1480 DefMI->eraseFromParent(); 1481 return NewMI; 1482 } 1483 1484 unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 1485 if (MI.isMetaInstruction()) 1486 return 0; 1487 1488 unsigned Opcode = MI.getOpcode(); 1489 1490 if (Opcode == TargetOpcode::INLINEASM || 1491 Opcode == TargetOpcode::INLINEASM_BR) { 1492 const MachineFunction &MF = *MI.getParent()->getParent(); 1493 const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget()); 1494 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), 1495 *TM.getMCAsmInfo()); 1496 } 1497 1498 if (!MI.memoperands_empty()) { 1499 MachineMemOperand *MMO = *(MI.memoperands_begin()); 1500 const MachineFunction &MF = *MI.getParent()->getParent(); 1501 const auto &ST = MF.getSubtarget<RISCVSubtarget>(); 1502 if (ST.hasStdExtZihintntl() && MMO->isNonTemporal()) { 1503 if (ST.hasStdExtCOrZca() && ST.enableRVCHintInstrs()) { 1504 if (isCompressibleInst(MI, STI)) 1505 return 4; // c.ntl.all + c.load/c.store 1506 return 6; // c.ntl.all + load/store 1507 } 1508 return 8; // ntl.all + load/store 1509 } 1510 } 1511 1512 if (Opcode == TargetOpcode::BUNDLE) 1513 return getInstBundleLength(MI); 1514 1515 if (MI.getParent() && MI.getParent()->getParent()) { 1516 if (isCompressibleInst(MI, STI)) 1517 return 2; 1518 } 1519 1520 switch (Opcode) { 1521 case TargetOpcode::STACKMAP: 1522 // The upper bound for a stackmap intrinsic is the full length of its shadow 1523 return StackMapOpers(&MI).getNumPatchBytes(); 1524 case TargetOpcode::PATCHPOINT: 1525 // The size of the patchpoint intrinsic is the number of bytes requested 1526 return PatchPointOpers(&MI).getNumPatchBytes(); 1527 case TargetOpcode::STATEPOINT: 1528 // The size of the statepoint intrinsic is the number of bytes requested 1529 return StatepointOpers(&MI).getNumPatchBytes(); 1530 default: 1531 return get(Opcode).getSize(); 1532 } 1533 } 1534 1535 unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const { 1536 unsigned Size = 0; 1537 MachineBasicBlock::const_instr_iterator I = MI.getIterator(); 1538 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); 1539 while (++I != E && I->isInsideBundle()) { 1540 assert(!I->isBundle() && "No nested bundle!"); 1541 Size += getInstSizeInBytes(*I); 1542 } 1543 return Size; 1544 } 1545 1546 bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { 1547 const unsigned Opcode = MI.getOpcode(); 1548 switch (Opcode) { 1549 default: 1550 break; 1551 case RISCV::FSGNJ_D: 1552 case RISCV::FSGNJ_S: 1553 case RISCV::FSGNJ_H: 1554 case RISCV::FSGNJ_D_INX: 1555 case RISCV::FSGNJ_D_IN32X: 1556 case RISCV::FSGNJ_S_INX: 1557 case RISCV::FSGNJ_H_INX: 1558 // The canonical floating-point move is fsgnj rd, rs, rs. 1559 return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && 1560 MI.getOperand(1).getReg() == MI.getOperand(2).getReg(); 1561 case RISCV::ADDI: 1562 case RISCV::ORI: 1563 case RISCV::XORI: 1564 return (MI.getOperand(1).isReg() && 1565 MI.getOperand(1).getReg() == RISCV::X0) || 1566 (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0); 1567 } 1568 return MI.isAsCheapAsAMove(); 1569 } 1570 1571 std::optional<DestSourcePair> 1572 RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { 1573 if (MI.isMoveReg()) 1574 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1575 switch (MI.getOpcode()) { 1576 default: 1577 break; 1578 case RISCV::ADDI: 1579 // Operand 1 can be a frameindex but callers expect registers 1580 if (MI.getOperand(1).isReg() && MI.getOperand(2).isImm() && 1581 MI.getOperand(2).getImm() == 0) 1582 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1583 break; 1584 case RISCV::FSGNJ_D: 1585 case RISCV::FSGNJ_S: 1586 case RISCV::FSGNJ_H: 1587 case RISCV::FSGNJ_D_INX: 1588 case RISCV::FSGNJ_D_IN32X: 1589 case RISCV::FSGNJ_S_INX: 1590 case RISCV::FSGNJ_H_INX: 1591 // The canonical floating-point move is fsgnj rd, rs, rs. 1592 if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && 1593 MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) 1594 return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; 1595 break; 1596 } 1597 return std::nullopt; 1598 } 1599 1600 MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const { 1601 if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) { 1602 // The option is unused. Choose Local strategy only for in-order cores. When 1603 // scheduling model is unspecified, use MinInstrCount strategy as more 1604 // generic one. 1605 const auto &SchedModel = STI.getSchedModel(); 1606 return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder()) 1607 ? MachineTraceStrategy::TS_MinInstrCount 1608 : MachineTraceStrategy::TS_Local; 1609 } 1610 // The strategy was forced by the option. 1611 return ForceMachineCombinerStrategy; 1612 } 1613 1614 void RISCVInstrInfo::finalizeInsInstrs( 1615 MachineInstr &Root, MachineCombinerPattern &P, 1616 SmallVectorImpl<MachineInstr *> &InsInstrs) const { 1617 int16_t FrmOpIdx = 1618 RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm); 1619 if (FrmOpIdx < 0) { 1620 assert(all_of(InsInstrs, 1621 [](MachineInstr *MI) { 1622 return RISCV::getNamedOperandIdx(MI->getOpcode(), 1623 RISCV::OpName::frm) < 0; 1624 }) && 1625 "New instructions require FRM whereas the old one does not have it"); 1626 return; 1627 } 1628 1629 const MachineOperand &FRM = Root.getOperand(FrmOpIdx); 1630 MachineFunction &MF = *Root.getMF(); 1631 1632 for (auto *NewMI : InsInstrs) { 1633 assert(static_cast<unsigned>(RISCV::getNamedOperandIdx( 1634 NewMI->getOpcode(), RISCV::OpName::frm)) == 1635 NewMI->getNumOperands() && 1636 "Instruction has unexpected number of operands"); 1637 MachineInstrBuilder MIB(MF, NewMI); 1638 MIB.add(FRM); 1639 if (FRM.getImm() == RISCVFPRndMode::DYN) 1640 MIB.addUse(RISCV::FRM, RegState::Implicit); 1641 } 1642 } 1643 1644 static bool isFADD(unsigned Opc) { 1645 switch (Opc) { 1646 default: 1647 return false; 1648 case RISCV::FADD_H: 1649 case RISCV::FADD_S: 1650 case RISCV::FADD_D: 1651 return true; 1652 } 1653 } 1654 1655 static bool isFSUB(unsigned Opc) { 1656 switch (Opc) { 1657 default: 1658 return false; 1659 case RISCV::FSUB_H: 1660 case RISCV::FSUB_S: 1661 case RISCV::FSUB_D: 1662 return true; 1663 } 1664 } 1665 1666 static bool isFMUL(unsigned Opc) { 1667 switch (Opc) { 1668 default: 1669 return false; 1670 case RISCV::FMUL_H: 1671 case RISCV::FMUL_S: 1672 case RISCV::FMUL_D: 1673 return true; 1674 } 1675 } 1676 1677 bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst, 1678 bool &Commuted) const { 1679 if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted)) 1680 return false; 1681 1682 const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo(); 1683 unsigned OperandIdx = Commuted ? 2 : 1; 1684 const MachineInstr &Sibling = 1685 *MRI.getVRegDef(Inst.getOperand(OperandIdx).getReg()); 1686 1687 int16_t InstFrmOpIdx = 1688 RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm); 1689 int16_t SiblingFrmOpIdx = 1690 RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm); 1691 1692 return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) || 1693 RISCV::hasEqualFRM(Inst, Sibling); 1694 } 1695 1696 bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst, 1697 bool Invert) const { 1698 unsigned Opc = Inst.getOpcode(); 1699 if (Invert) { 1700 auto InverseOpcode = getInverseOpcode(Opc); 1701 if (!InverseOpcode) 1702 return false; 1703 Opc = *InverseOpcode; 1704 } 1705 1706 if (isFADD(Opc) || isFMUL(Opc)) 1707 return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) && 1708 Inst.getFlag(MachineInstr::MIFlag::FmNsz); 1709 1710 switch (Opc) { 1711 default: 1712 return false; 1713 case RISCV::ADD: 1714 case RISCV::ADDW: 1715 case RISCV::AND: 1716 case RISCV::OR: 1717 case RISCV::XOR: 1718 // From RISC-V ISA spec, if both the high and low bits of the same product 1719 // are required, then the recommended code sequence is: 1720 // 1721 // MULH[[S]U] rdh, rs1, rs2 1722 // MUL rdl, rs1, rs2 1723 // (source register specifiers must be in same order and rdh cannot be the 1724 // same as rs1 or rs2) 1725 // 1726 // Microarchitectures can then fuse these into a single multiply operation 1727 // instead of performing two separate multiplies. 1728 // MachineCombiner may reassociate MUL operands and lose the fusion 1729 // opportunity. 1730 case RISCV::MUL: 1731 case RISCV::MULW: 1732 case RISCV::MIN: 1733 case RISCV::MINU: 1734 case RISCV::MAX: 1735 case RISCV::MAXU: 1736 case RISCV::FMIN_H: 1737 case RISCV::FMIN_S: 1738 case RISCV::FMIN_D: 1739 case RISCV::FMAX_H: 1740 case RISCV::FMAX_S: 1741 case RISCV::FMAX_D: 1742 return true; 1743 } 1744 1745 return false; 1746 } 1747 1748 std::optional<unsigned> 1749 RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const { 1750 switch (Opcode) { 1751 default: 1752 return std::nullopt; 1753 case RISCV::FADD_H: 1754 return RISCV::FSUB_H; 1755 case RISCV::FADD_S: 1756 return RISCV::FSUB_S; 1757 case RISCV::FADD_D: 1758 return RISCV::FSUB_D; 1759 case RISCV::FSUB_H: 1760 return RISCV::FADD_H; 1761 case RISCV::FSUB_S: 1762 return RISCV::FADD_S; 1763 case RISCV::FSUB_D: 1764 return RISCV::FADD_D; 1765 case RISCV::ADD: 1766 return RISCV::SUB; 1767 case RISCV::SUB: 1768 return RISCV::ADD; 1769 case RISCV::ADDW: 1770 return RISCV::SUBW; 1771 case RISCV::SUBW: 1772 return RISCV::ADDW; 1773 } 1774 } 1775 1776 static bool canCombineFPFusedMultiply(const MachineInstr &Root, 1777 const MachineOperand &MO, 1778 bool DoRegPressureReduce) { 1779 if (!MO.isReg() || !MO.getReg().isVirtual()) 1780 return false; 1781 const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); 1782 MachineInstr *MI = MRI.getVRegDef(MO.getReg()); 1783 if (!MI || !isFMUL(MI->getOpcode())) 1784 return false; 1785 1786 if (!Root.getFlag(MachineInstr::MIFlag::FmContract) || 1787 !MI->getFlag(MachineInstr::MIFlag::FmContract)) 1788 return false; 1789 1790 // Try combining even if fmul has more than one use as it eliminates 1791 // dependency between fadd(fsub) and fmul. However, it can extend liveranges 1792 // for fmul operands, so reject the transformation in register pressure 1793 // reduction mode. 1794 if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 1795 return false; 1796 1797 // Do not combine instructions from different basic blocks. 1798 if (Root.getParent() != MI->getParent()) 1799 return false; 1800 return RISCV::hasEqualFRM(Root, *MI); 1801 } 1802 1803 static bool 1804 getFPFusedMultiplyPatterns(MachineInstr &Root, 1805 SmallVectorImpl<MachineCombinerPattern> &Patterns, 1806 bool DoRegPressureReduce) { 1807 unsigned Opc = Root.getOpcode(); 1808 bool IsFAdd = isFADD(Opc); 1809 if (!IsFAdd && !isFSUB(Opc)) 1810 return false; 1811 bool Added = false; 1812 if (canCombineFPFusedMultiply(Root, Root.getOperand(1), 1813 DoRegPressureReduce)) { 1814 Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_AX 1815 : MachineCombinerPattern::FMSUB); 1816 Added = true; 1817 } 1818 if (canCombineFPFusedMultiply(Root, Root.getOperand(2), 1819 DoRegPressureReduce)) { 1820 Patterns.push_back(IsFAdd ? MachineCombinerPattern::FMADD_XA 1821 : MachineCombinerPattern::FNMSUB); 1822 Added = true; 1823 } 1824 return Added; 1825 } 1826 1827 static bool getFPPatterns(MachineInstr &Root, 1828 SmallVectorImpl<MachineCombinerPattern> &Patterns, 1829 bool DoRegPressureReduce) { 1830 return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce); 1831 } 1832 1833 bool RISCVInstrInfo::getMachineCombinerPatterns( 1834 MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, 1835 bool DoRegPressureReduce) const { 1836 1837 if (getFPPatterns(Root, Patterns, DoRegPressureReduce)) 1838 return true; 1839 1840 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, 1841 DoRegPressureReduce); 1842 } 1843 1844 static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, 1845 MachineCombinerPattern Pattern) { 1846 switch (RootOpc) { 1847 default: 1848 llvm_unreachable("Unexpected opcode"); 1849 case RISCV::FADD_H: 1850 return RISCV::FMADD_H; 1851 case RISCV::FADD_S: 1852 return RISCV::FMADD_S; 1853 case RISCV::FADD_D: 1854 return RISCV::FMADD_D; 1855 case RISCV::FSUB_H: 1856 return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_H 1857 : RISCV::FNMSUB_H; 1858 case RISCV::FSUB_S: 1859 return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_S 1860 : RISCV::FNMSUB_S; 1861 case RISCV::FSUB_D: 1862 return Pattern == MachineCombinerPattern::FMSUB ? RISCV::FMSUB_D 1863 : RISCV::FNMSUB_D; 1864 } 1865 } 1866 1867 static unsigned getAddendOperandIdx(MachineCombinerPattern Pattern) { 1868 switch (Pattern) { 1869 default: 1870 llvm_unreachable("Unexpected pattern"); 1871 case MachineCombinerPattern::FMADD_AX: 1872 case MachineCombinerPattern::FMSUB: 1873 return 2; 1874 case MachineCombinerPattern::FMADD_XA: 1875 case MachineCombinerPattern::FNMSUB: 1876 return 1; 1877 } 1878 } 1879 1880 static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev, 1881 MachineCombinerPattern Pattern, 1882 SmallVectorImpl<MachineInstr *> &InsInstrs, 1883 SmallVectorImpl<MachineInstr *> &DelInstrs) { 1884 MachineFunction *MF = Root.getMF(); 1885 MachineRegisterInfo &MRI = MF->getRegInfo(); 1886 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 1887 1888 MachineOperand &Mul1 = Prev.getOperand(1); 1889 MachineOperand &Mul2 = Prev.getOperand(2); 1890 MachineOperand &Dst = Root.getOperand(0); 1891 MachineOperand &Addend = Root.getOperand(getAddendOperandIdx(Pattern)); 1892 1893 Register DstReg = Dst.getReg(); 1894 unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern); 1895 uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags(); 1896 DebugLoc MergedLoc = 1897 DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc()); 1898 1899 bool Mul1IsKill = Mul1.isKill(); 1900 bool Mul2IsKill = Mul2.isKill(); 1901 bool AddendIsKill = Addend.isKill(); 1902 1903 // We need to clear kill flags since we may be extending the live range past 1904 // a kill. If the mul had kill flags, we can preserve those since we know 1905 // where the previous range stopped. 1906 MRI.clearKillFlags(Mul1.getReg()); 1907 MRI.clearKillFlags(Mul2.getReg()); 1908 1909 MachineInstrBuilder MIB = 1910 BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg) 1911 .addReg(Mul1.getReg(), getKillRegState(Mul1IsKill)) 1912 .addReg(Mul2.getReg(), getKillRegState(Mul2IsKill)) 1913 .addReg(Addend.getReg(), getKillRegState(AddendIsKill)) 1914 .setMIFlags(IntersectedFlags); 1915 1916 InsInstrs.push_back(MIB); 1917 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) 1918 DelInstrs.push_back(&Prev); 1919 DelInstrs.push_back(&Root); 1920 } 1921 1922 void RISCVInstrInfo::genAlternativeCodeSequence( 1923 MachineInstr &Root, MachineCombinerPattern Pattern, 1924 SmallVectorImpl<MachineInstr *> &InsInstrs, 1925 SmallVectorImpl<MachineInstr *> &DelInstrs, 1926 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { 1927 MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); 1928 switch (Pattern) { 1929 default: 1930 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs, 1931 DelInstrs, InstrIdxForVirtReg); 1932 return; 1933 case MachineCombinerPattern::FMADD_AX: 1934 case MachineCombinerPattern::FMSUB: { 1935 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(1).getReg()); 1936 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); 1937 return; 1938 } 1939 case MachineCombinerPattern::FMADD_XA: 1940 case MachineCombinerPattern::FNMSUB: { 1941 MachineInstr &Prev = *MRI.getVRegDef(Root.getOperand(2).getReg()); 1942 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs); 1943 return; 1944 } 1945 } 1946 } 1947 1948 bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, 1949 StringRef &ErrInfo) const { 1950 MCInstrDesc const &Desc = MI.getDesc(); 1951 1952 for (const auto &[Index, Operand] : enumerate(Desc.operands())) { 1953 unsigned OpType = Operand.OperandType; 1954 if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM && 1955 OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) { 1956 const MachineOperand &MO = MI.getOperand(Index); 1957 if (MO.isImm()) { 1958 int64_t Imm = MO.getImm(); 1959 bool Ok; 1960 switch (OpType) { 1961 default: 1962 llvm_unreachable("Unexpected operand type"); 1963 1964 // clang-format off 1965 #define CASE_OPERAND_UIMM(NUM) \ 1966 case RISCVOp::OPERAND_UIMM##NUM: \ 1967 Ok = isUInt<NUM>(Imm); \ 1968 break; 1969 CASE_OPERAND_UIMM(1) 1970 CASE_OPERAND_UIMM(2) 1971 CASE_OPERAND_UIMM(3) 1972 CASE_OPERAND_UIMM(4) 1973 CASE_OPERAND_UIMM(5) 1974 CASE_OPERAND_UIMM(6) 1975 CASE_OPERAND_UIMM(7) 1976 CASE_OPERAND_UIMM(8) 1977 CASE_OPERAND_UIMM(12) 1978 CASE_OPERAND_UIMM(20) 1979 // clang-format on 1980 case RISCVOp::OPERAND_UIMM2_LSB0: 1981 Ok = isShiftedUInt<1, 1>(Imm); 1982 break; 1983 case RISCVOp::OPERAND_UIMM7_LSB00: 1984 Ok = isShiftedUInt<5, 2>(Imm); 1985 break; 1986 case RISCVOp::OPERAND_UIMM8_LSB00: 1987 Ok = isShiftedUInt<6, 2>(Imm); 1988 break; 1989 case RISCVOp::OPERAND_UIMM8_LSB000: 1990 Ok = isShiftedUInt<5, 3>(Imm); 1991 break; 1992 case RISCVOp::OPERAND_UIMM8_GE32: 1993 Ok = isUInt<8>(Imm) && Imm >= 32; 1994 break; 1995 case RISCVOp::OPERAND_UIMM9_LSB000: 1996 Ok = isShiftedUInt<6, 3>(Imm); 1997 break; 1998 case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO: 1999 Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0); 2000 break; 2001 case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO: 2002 Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0); 2003 break; 2004 case RISCVOp::OPERAND_ZERO: 2005 Ok = Imm == 0; 2006 break; 2007 case RISCVOp::OPERAND_SIMM5: 2008 Ok = isInt<5>(Imm); 2009 break; 2010 case RISCVOp::OPERAND_SIMM5_PLUS1: 2011 Ok = (isInt<5>(Imm) && Imm != -16) || Imm == 16; 2012 break; 2013 case RISCVOp::OPERAND_SIMM6: 2014 Ok = isInt<6>(Imm); 2015 break; 2016 case RISCVOp::OPERAND_SIMM6_NONZERO: 2017 Ok = Imm != 0 && isInt<6>(Imm); 2018 break; 2019 case RISCVOp::OPERAND_VTYPEI10: 2020 Ok = isUInt<10>(Imm); 2021 break; 2022 case RISCVOp::OPERAND_VTYPEI11: 2023 Ok = isUInt<11>(Imm); 2024 break; 2025 case RISCVOp::OPERAND_SIMM12: 2026 Ok = isInt<12>(Imm); 2027 break; 2028 case RISCVOp::OPERAND_SIMM12_LSB00000: 2029 Ok = isShiftedInt<7, 5>(Imm); 2030 break; 2031 case RISCVOp::OPERAND_UIMMLOG2XLEN: 2032 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm); 2033 break; 2034 case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO: 2035 Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm); 2036 Ok = Ok && Imm != 0; 2037 break; 2038 case RISCVOp::OPERAND_CLUI_IMM: 2039 Ok = (isUInt<5>(Imm) && Imm != 0) || 2040 (Imm >= 0xfffe0 && Imm <= 0xfffff); 2041 break; 2042 case RISCVOp::OPERAND_RVKRNUM: 2043 Ok = Imm >= 0 && Imm <= 10; 2044 break; 2045 case RISCVOp::OPERAND_RVKRNUM_0_7: 2046 Ok = Imm >= 0 && Imm <= 7; 2047 break; 2048 case RISCVOp::OPERAND_RVKRNUM_1_10: 2049 Ok = Imm >= 1 && Imm <= 10; 2050 break; 2051 case RISCVOp::OPERAND_RVKRNUM_2_14: 2052 Ok = Imm >= 2 && Imm <= 14; 2053 break; 2054 } 2055 if (!Ok) { 2056 ErrInfo = "Invalid immediate"; 2057 return false; 2058 } 2059 } 2060 } 2061 } 2062 2063 const uint64_t TSFlags = Desc.TSFlags; 2064 if (RISCVII::hasVLOp(TSFlags)) { 2065 const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc)); 2066 if (!Op.isImm() && !Op.isReg()) { 2067 ErrInfo = "Invalid operand type for VL operand"; 2068 return false; 2069 } 2070 if (Op.isReg() && Op.getReg() != RISCV::NoRegister) { 2071 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 2072 auto *RC = MRI.getRegClass(Op.getReg()); 2073 if (!RISCV::GPRRegClass.hasSubClassEq(RC)) { 2074 ErrInfo = "Invalid register class for VL operand"; 2075 return false; 2076 } 2077 } 2078 if (!RISCVII::hasSEWOp(TSFlags)) { 2079 ErrInfo = "VL operand w/o SEW operand?"; 2080 return false; 2081 } 2082 } 2083 if (RISCVII::hasSEWOp(TSFlags)) { 2084 unsigned OpIdx = RISCVII::getSEWOpNum(Desc); 2085 if (!MI.getOperand(OpIdx).isImm()) { 2086 ErrInfo = "SEW value expected to be an immediate"; 2087 return false; 2088 } 2089 uint64_t Log2SEW = MI.getOperand(OpIdx).getImm(); 2090 if (Log2SEW > 31) { 2091 ErrInfo = "Unexpected SEW value"; 2092 return false; 2093 } 2094 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 2095 if (!RISCVVType::isValidSEW(SEW)) { 2096 ErrInfo = "Unexpected SEW value"; 2097 return false; 2098 } 2099 } 2100 if (RISCVII::hasVecPolicyOp(TSFlags)) { 2101 unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc); 2102 if (!MI.getOperand(OpIdx).isImm()) { 2103 ErrInfo = "Policy operand expected to be an immediate"; 2104 return false; 2105 } 2106 uint64_t Policy = MI.getOperand(OpIdx).getImm(); 2107 if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) { 2108 ErrInfo = "Invalid Policy Value"; 2109 return false; 2110 } 2111 if (!RISCVII::hasVLOp(TSFlags)) { 2112 ErrInfo = "policy operand w/o VL operand?"; 2113 return false; 2114 } 2115 2116 // VecPolicy operands can only exist on instructions with passthru/merge 2117 // arguments. Note that not all arguments with passthru have vec policy 2118 // operands- some instructions have implicit policies. 2119 unsigned UseOpIdx; 2120 if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) { 2121 ErrInfo = "policy operand w/o tied operand?"; 2122 return false; 2123 } 2124 } 2125 2126 return true; 2127 } 2128 2129 bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, 2130 const MachineInstr &AddrI, 2131 ExtAddrMode &AM) const { 2132 switch (MemI.getOpcode()) { 2133 default: 2134 return false; 2135 case RISCV::LB: 2136 case RISCV::LBU: 2137 case RISCV::LH: 2138 case RISCV::LHU: 2139 case RISCV::LW: 2140 case RISCV::LWU: 2141 case RISCV::LD: 2142 case RISCV::FLH: 2143 case RISCV::FLW: 2144 case RISCV::FLD: 2145 case RISCV::SB: 2146 case RISCV::SH: 2147 case RISCV::SW: 2148 case RISCV::SD: 2149 case RISCV::FSH: 2150 case RISCV::FSW: 2151 case RISCV::FSD: 2152 break; 2153 } 2154 2155 if (MemI.getOperand(0).getReg() == Reg) 2156 return false; 2157 2158 if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() || 2159 !AddrI.getOperand(2).isImm()) 2160 return false; 2161 2162 int64_t OldOffset = MemI.getOperand(2).getImm(); 2163 int64_t Disp = AddrI.getOperand(2).getImm(); 2164 int64_t NewOffset = OldOffset + Disp; 2165 if (!STI.is64Bit()) 2166 NewOffset = SignExtend64<32>(NewOffset); 2167 2168 if (!isInt<12>(NewOffset)) 2169 return false; 2170 2171 AM.BaseReg = AddrI.getOperand(1).getReg(); 2172 AM.ScaledReg = 0; 2173 AM.Scale = 0; 2174 AM.Displacement = NewOffset; 2175 AM.Form = ExtAddrMode::Formula::Basic; 2176 return true; 2177 } 2178 2179 MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI, 2180 const ExtAddrMode &AM) const { 2181 2182 const DebugLoc &DL = MemI.getDebugLoc(); 2183 MachineBasicBlock &MBB = *MemI.getParent(); 2184 2185 assert(AM.ScaledReg == 0 && AM.Scale == 0 && 2186 "Addressing mode not supported for folding"); 2187 2188 return BuildMI(MBB, MemI, DL, get(MemI.getOpcode())) 2189 .addReg(MemI.getOperand(0).getReg(), 2190 MemI.mayLoad() ? RegState::Define : 0) 2191 .addReg(AM.BaseReg) 2192 .addImm(AM.Displacement) 2193 .setMemRefs(MemI.memoperands()) 2194 .setMIFlags(MemI.getFlags()); 2195 } 2196 2197 bool RISCVInstrInfo::getMemOperandsWithOffsetWidth( 2198 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps, 2199 int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, 2200 const TargetRegisterInfo *TRI) const { 2201 if (!LdSt.mayLoadOrStore()) 2202 return false; 2203 2204 // Conservatively, only handle scalar loads/stores for now. 2205 switch (LdSt.getOpcode()) { 2206 case RISCV::LB: 2207 case RISCV::LBU: 2208 case RISCV::SB: 2209 case RISCV::LH: 2210 case RISCV::LHU: 2211 case RISCV::FLH: 2212 case RISCV::SH: 2213 case RISCV::FSH: 2214 case RISCV::LW: 2215 case RISCV::LWU: 2216 case RISCV::FLW: 2217 case RISCV::SW: 2218 case RISCV::FSW: 2219 case RISCV::LD: 2220 case RISCV::FLD: 2221 case RISCV::SD: 2222 case RISCV::FSD: 2223 break; 2224 default: 2225 return false; 2226 } 2227 const MachineOperand *BaseOp; 2228 OffsetIsScalable = false; 2229 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) 2230 return false; 2231 BaseOps.push_back(BaseOp); 2232 return true; 2233 } 2234 2235 // TODO: This was copied from SIInstrInfo. Could it be lifted to a common 2236 // helper? 2237 static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, 2238 ArrayRef<const MachineOperand *> BaseOps1, 2239 const MachineInstr &MI2, 2240 ArrayRef<const MachineOperand *> BaseOps2) { 2241 // Only examine the first "base" operand of each instruction, on the 2242 // assumption that it represents the real base address of the memory access. 2243 // Other operands are typically offsets or indices from this base address. 2244 if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front())) 2245 return true; 2246 2247 if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand()) 2248 return false; 2249 2250 auto MO1 = *MI1.memoperands_begin(); 2251 auto MO2 = *MI2.memoperands_begin(); 2252 if (MO1->getAddrSpace() != MO2->getAddrSpace()) 2253 return false; 2254 2255 auto Base1 = MO1->getValue(); 2256 auto Base2 = MO2->getValue(); 2257 if (!Base1 || !Base2) 2258 return false; 2259 Base1 = getUnderlyingObject(Base1); 2260 Base2 = getUnderlyingObject(Base2); 2261 2262 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2)) 2263 return false; 2264 2265 return Base1 == Base2; 2266 } 2267 2268 bool RISCVInstrInfo::shouldClusterMemOps( 2269 ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1, 2270 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2, 2271 int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, 2272 unsigned NumBytes) const { 2273 // If the mem ops (to be clustered) do not have the same base ptr, then they 2274 // should not be clustered 2275 if (!BaseOps1.empty() && !BaseOps2.empty()) { 2276 const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent(); 2277 const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent(); 2278 if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2)) 2279 return false; 2280 } else if (!BaseOps1.empty() || !BaseOps2.empty()) { 2281 // If only one base op is empty, they do not have the same base ptr 2282 return false; 2283 } 2284 2285 unsigned CacheLineSize = 2286 BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize(); 2287 // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget. 2288 CacheLineSize = CacheLineSize ? CacheLineSize : 64; 2289 // Cluster if the memory operations are on the same or a neighbouring cache 2290 // line, but limit the maximum ClusterSize to avoid creating too much 2291 // additional register pressure. 2292 return ClusterSize <= 4 && std::abs(Offset1 - Offset2) < CacheLineSize; 2293 } 2294 2295 // Set BaseReg (the base register operand), Offset (the byte offset being 2296 // accessed) and the access Width of the passed instruction that reads/writes 2297 // memory. Returns false if the instruction does not read/write memory or the 2298 // BaseReg/Offset/Width can't be determined. Is not guaranteed to always 2299 // recognise base operands and offsets in all cases. 2300 // TODO: Add an IsScalable bool ref argument (like the equivalent AArch64 2301 // function) and set it as appropriate. 2302 bool RISCVInstrInfo::getMemOperandWithOffsetWidth( 2303 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, 2304 unsigned &Width, const TargetRegisterInfo *TRI) const { 2305 if (!LdSt.mayLoadOrStore()) 2306 return false; 2307 2308 // Here we assume the standard RISC-V ISA, which uses a base+offset 2309 // addressing mode. You'll need to relax these conditions to support custom 2310 // load/store instructions. 2311 if (LdSt.getNumExplicitOperands() != 3) 2312 return false; 2313 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) || 2314 !LdSt.getOperand(2).isImm()) 2315 return false; 2316 2317 if (!LdSt.hasOneMemOperand()) 2318 return false; 2319 2320 Width = (*LdSt.memoperands_begin())->getSize(); 2321 BaseReg = &LdSt.getOperand(1); 2322 Offset = LdSt.getOperand(2).getImm(); 2323 return true; 2324 } 2325 2326 bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint( 2327 const MachineInstr &MIa, const MachineInstr &MIb) const { 2328 assert(MIa.mayLoadOrStore() && "MIa must be a load or store."); 2329 assert(MIb.mayLoadOrStore() && "MIb must be a load or store."); 2330 2331 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() || 2332 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 2333 return false; 2334 2335 // Retrieve the base register, offset from the base register and width. Width 2336 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If 2337 // base registers are identical, and the offset of a lower memory access + 2338 // the width doesn't overlap the offset of a higher memory access, 2339 // then the memory accesses are different. 2340 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 2341 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr; 2342 int64_t OffsetA = 0, OffsetB = 0; 2343 unsigned int WidthA = 0, WidthB = 0; 2344 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) && 2345 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) { 2346 if (BaseOpA->isIdenticalTo(*BaseOpB)) { 2347 int LowOffset = std::min(OffsetA, OffsetB); 2348 int HighOffset = std::max(OffsetA, OffsetB); 2349 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 2350 if (LowOffset + LowWidth <= HighOffset) 2351 return true; 2352 } 2353 } 2354 return false; 2355 } 2356 2357 std::pair<unsigned, unsigned> 2358 RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { 2359 const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK; 2360 return std::make_pair(TF & Mask, TF & ~Mask); 2361 } 2362 2363 ArrayRef<std::pair<unsigned, const char *>> 2364 RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { 2365 using namespace RISCVII; 2366 static const std::pair<unsigned, const char *> TargetFlags[] = { 2367 {MO_CALL, "riscv-call"}, 2368 {MO_PLT, "riscv-plt"}, 2369 {MO_LO, "riscv-lo"}, 2370 {MO_HI, "riscv-hi"}, 2371 {MO_PCREL_LO, "riscv-pcrel-lo"}, 2372 {MO_PCREL_HI, "riscv-pcrel-hi"}, 2373 {MO_GOT_HI, "riscv-got-hi"}, 2374 {MO_TPREL_LO, "riscv-tprel-lo"}, 2375 {MO_TPREL_HI, "riscv-tprel-hi"}, 2376 {MO_TPREL_ADD, "riscv-tprel-add"}, 2377 {MO_TLS_GOT_HI, "riscv-tls-got-hi"}, 2378 {MO_TLS_GD_HI, "riscv-tls-gd-hi"}}; 2379 return ArrayRef(TargetFlags); 2380 } 2381 bool RISCVInstrInfo::isFunctionSafeToOutlineFrom( 2382 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { 2383 const Function &F = MF.getFunction(); 2384 2385 // Can F be deduplicated by the linker? If it can, don't outline from it. 2386 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) 2387 return false; 2388 2389 // Don't outline from functions with section markings; the program could 2390 // expect that all the code is in the named section. 2391 if (F.hasSection()) 2392 return false; 2393 2394 // It's safe to outline from MF. 2395 return true; 2396 } 2397 2398 bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, 2399 unsigned &Flags) const { 2400 // More accurate safety checking is done in getOutliningCandidateInfo. 2401 return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags); 2402 } 2403 2404 // Enum values indicating how an outlined call should be constructed. 2405 enum MachineOutlinerConstructionID { 2406 MachineOutlinerDefault 2407 }; 2408 2409 bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault( 2410 MachineFunction &MF) const { 2411 return MF.getFunction().hasMinSize(); 2412 } 2413 2414 std::optional<outliner::OutlinedFunction> 2415 RISCVInstrInfo::getOutliningCandidateInfo( 2416 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { 2417 2418 // First we need to filter out candidates where the X5 register (IE t0) can't 2419 // be used to setup the function call. 2420 auto CannotInsertCall = [](outliner::Candidate &C) { 2421 const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo(); 2422 return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI); 2423 }; 2424 2425 llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall); 2426 2427 // If the sequence doesn't have enough candidates left, then we're done. 2428 if (RepeatedSequenceLocs.size() < 2) 2429 return std::nullopt; 2430 2431 unsigned SequenceSize = 0; 2432 2433 auto I = RepeatedSequenceLocs[0].front(); 2434 auto E = std::next(RepeatedSequenceLocs[0].back()); 2435 for (; I != E; ++I) 2436 SequenceSize += getInstSizeInBytes(*I); 2437 2438 // call t0, function = 8 bytes. 2439 unsigned CallOverhead = 8; 2440 for (auto &C : RepeatedSequenceLocs) 2441 C.setCallInfo(MachineOutlinerDefault, CallOverhead); 2442 2443 // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled. 2444 unsigned FrameOverhead = 4; 2445 if (RepeatedSequenceLocs[0] 2446 .getMF() 2447 ->getSubtarget<RISCVSubtarget>() 2448 .hasStdExtCOrZca()) 2449 FrameOverhead = 2; 2450 2451 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, 2452 FrameOverhead, MachineOutlinerDefault); 2453 } 2454 2455 outliner::InstrType 2456 RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI, 2457 unsigned Flags) const { 2458 MachineInstr &MI = *MBBI; 2459 MachineBasicBlock *MBB = MI.getParent(); 2460 const TargetRegisterInfo *TRI = 2461 MBB->getParent()->getSubtarget().getRegisterInfo(); 2462 const auto &F = MI.getMF()->getFunction(); 2463 2464 // We can manually strip out CFI instructions later. 2465 if (MI.isCFIInstruction()) 2466 // If current function has exception handling code, we can't outline & 2467 // strip these CFI instructions since it may break .eh_frame section 2468 // needed in unwinding. 2469 return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal 2470 : outliner::InstrType::Invisible; 2471 2472 // We need support for tail calls to outlined functions before return 2473 // statements can be allowed. 2474 if (MI.isReturn()) 2475 return outliner::InstrType::Illegal; 2476 2477 // Don't allow modifying the X5 register which we use for return addresses for 2478 // these outlined functions. 2479 if (MI.modifiesRegister(RISCV::X5, TRI) || 2480 MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5)) 2481 return outliner::InstrType::Illegal; 2482 2483 // Make sure the operands don't reference something unsafe. 2484 for (const auto &MO : MI.operands()) { 2485 2486 // pcrel-hi and pcrel-lo can't put in separate sections, filter that out 2487 // if any possible. 2488 if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO && 2489 (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() || 2490 F.hasSection())) 2491 return outliner::InstrType::Illegal; 2492 } 2493 2494 return outliner::InstrType::Legal; 2495 } 2496 2497 void RISCVInstrInfo::buildOutlinedFrame( 2498 MachineBasicBlock &MBB, MachineFunction &MF, 2499 const outliner::OutlinedFunction &OF) const { 2500 2501 // Strip out any CFI instructions 2502 bool Changed = true; 2503 while (Changed) { 2504 Changed = false; 2505 auto I = MBB.begin(); 2506 auto E = MBB.end(); 2507 for (; I != E; ++I) { 2508 if (I->isCFIInstruction()) { 2509 I->removeFromParent(); 2510 Changed = true; 2511 break; 2512 } 2513 } 2514 } 2515 2516 MBB.addLiveIn(RISCV::X5); 2517 2518 // Add in a return instruction to the end of the outlined frame. 2519 MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR)) 2520 .addReg(RISCV::X0, RegState::Define) 2521 .addReg(RISCV::X5) 2522 .addImm(0)); 2523 } 2524 2525 MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall( 2526 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, 2527 MachineFunction &MF, outliner::Candidate &C) const { 2528 2529 // Add in a call instruction to the outlined function at the given location. 2530 It = MBB.insert(It, 2531 BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5) 2532 .addGlobalAddress(M.getNamedValue(MF.getName()), 0, 2533 RISCVII::MO_CALL)); 2534 return It; 2535 } 2536 2537 std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI, 2538 Register Reg) const { 2539 // TODO: Handle cases where Reg is a super- or sub-register of the 2540 // destination register. 2541 const MachineOperand &Op0 = MI.getOperand(0); 2542 if (!Op0.isReg() || Reg != Op0.getReg()) 2543 return std::nullopt; 2544 2545 // Don't consider ADDIW as a candidate because the caller may not be aware 2546 // of its sign extension behaviour. 2547 if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() && 2548 MI.getOperand(2).isImm()) 2549 return RegImmPair{MI.getOperand(1).getReg(), MI.getOperand(2).getImm()}; 2550 2551 return std::nullopt; 2552 } 2553 2554 // MIR printer helper function to annotate Operands with a comment. 2555 std::string RISCVInstrInfo::createMIROperandComment( 2556 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx, 2557 const TargetRegisterInfo *TRI) const { 2558 // Print a generic comment for this operand if there is one. 2559 std::string GenericComment = 2560 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI); 2561 if (!GenericComment.empty()) 2562 return GenericComment; 2563 2564 // If not, we must have an immediate operand. 2565 if (!Op.isImm()) 2566 return std::string(); 2567 2568 std::string Comment; 2569 raw_string_ostream OS(Comment); 2570 2571 uint64_t TSFlags = MI.getDesc().TSFlags; 2572 2573 // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW 2574 // operand of vector codegen pseudos. 2575 if ((MI.getOpcode() == RISCV::VSETVLI || MI.getOpcode() == RISCV::VSETIVLI || 2576 MI.getOpcode() == RISCV::PseudoVSETVLI || 2577 MI.getOpcode() == RISCV::PseudoVSETIVLI || 2578 MI.getOpcode() == RISCV::PseudoVSETVLIX0) && 2579 OpIdx == 2) { 2580 unsigned Imm = MI.getOperand(OpIdx).getImm(); 2581 RISCVVType::printVType(Imm, OS); 2582 } else if (RISCVII::hasSEWOp(TSFlags) && 2583 OpIdx == RISCVII::getSEWOpNum(MI.getDesc())) { 2584 unsigned Log2SEW = MI.getOperand(OpIdx).getImm(); 2585 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; 2586 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW"); 2587 OS << "e" << SEW; 2588 } else if (RISCVII::hasVecPolicyOp(TSFlags) && 2589 OpIdx == RISCVII::getVecPolicyOpNum(MI.getDesc())) { 2590 unsigned Policy = MI.getOperand(OpIdx).getImm(); 2591 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && 2592 "Invalid Policy Value"); 2593 OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", " 2594 << (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu"); 2595 } 2596 2597 OS.flush(); 2598 return Comment; 2599 } 2600 2601 // clang-format off 2602 #define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL) \ 2603 RISCV::PseudoV##OP##_##TYPE##_##LMUL 2604 2605 #define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE) \ 2606 CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1): \ 2607 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2): \ 2608 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4): \ 2609 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8) 2610 2611 #define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE) \ 2612 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2): \ 2613 case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE) 2614 2615 #define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE) \ 2616 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4): \ 2617 case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE) 2618 2619 #define CASE_VFMA_OPCODE_LMULS(OP, TYPE) \ 2620 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8): \ 2621 case CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE) 2622 2623 #define CASE_VFMA_SPLATS(OP) \ 2624 CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16): \ 2625 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32): \ 2626 case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64) 2627 // clang-format on 2628 2629 bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI, 2630 unsigned &SrcOpIdx1, 2631 unsigned &SrcOpIdx2) const { 2632 const MCInstrDesc &Desc = MI.getDesc(); 2633 if (!Desc.isCommutable()) 2634 return false; 2635 2636 switch (MI.getOpcode()) { 2637 case RISCV::TH_MVEQZ: 2638 case RISCV::TH_MVNEZ: 2639 // We can't commute operands if operand 2 (i.e., rs1 in 2640 // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is 2641 // not valid as the in/out-operand 1). 2642 if (MI.getOperand(2).getReg() == RISCV::X0) 2643 return false; 2644 // Operands 1 and 2 are commutable, if we switch the opcode. 2645 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2); 2646 case RISCV::TH_MULA: 2647 case RISCV::TH_MULAW: 2648 case RISCV::TH_MULAH: 2649 case RISCV::TH_MULS: 2650 case RISCV::TH_MULSW: 2651 case RISCV::TH_MULSH: 2652 // Operands 2 and 3 are commutable. 2653 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3); 2654 case RISCV::PseudoCCMOVGPR: 2655 // Operands 4 and 5 are commutable. 2656 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5); 2657 case CASE_VFMA_SPLATS(FMADD): 2658 case CASE_VFMA_SPLATS(FMSUB): 2659 case CASE_VFMA_SPLATS(FMACC): 2660 case CASE_VFMA_SPLATS(FMSAC): 2661 case CASE_VFMA_SPLATS(FNMADD): 2662 case CASE_VFMA_SPLATS(FNMSUB): 2663 case CASE_VFMA_SPLATS(FNMACC): 2664 case CASE_VFMA_SPLATS(FNMSAC): 2665 case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV): 2666 case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV): 2667 case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV): 2668 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV): 2669 case CASE_VFMA_OPCODE_LMULS(MADD, VX): 2670 case CASE_VFMA_OPCODE_LMULS(NMSUB, VX): 2671 case CASE_VFMA_OPCODE_LMULS(MACC, VX): 2672 case CASE_VFMA_OPCODE_LMULS(NMSAC, VX): 2673 case CASE_VFMA_OPCODE_LMULS(MACC, VV): 2674 case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): { 2675 // If the tail policy is undisturbed we can't commute. 2676 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags)); 2677 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0) 2678 return false; 2679 2680 // For these instructions we can only swap operand 1 and operand 3 by 2681 // changing the opcode. 2682 unsigned CommutableOpIdx1 = 1; 2683 unsigned CommutableOpIdx2 = 3; 2684 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1, 2685 CommutableOpIdx2)) 2686 return false; 2687 return true; 2688 } 2689 case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV): 2690 case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV): 2691 case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV): 2692 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV): 2693 case CASE_VFMA_OPCODE_LMULS(MADD, VV): 2694 case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): { 2695 // If the tail policy is undisturbed we can't commute. 2696 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags)); 2697 if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0) 2698 return false; 2699 2700 // For these instructions we have more freedom. We can commute with the 2701 // other multiplicand or with the addend/subtrahend/minuend. 2702 2703 // Any fixed operand must be from source 1, 2 or 3. 2704 if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3) 2705 return false; 2706 if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3) 2707 return false; 2708 2709 // It both ops are fixed one must be the tied source. 2710 if (SrcOpIdx1 != CommuteAnyOperandIndex && 2711 SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1) 2712 return false; 2713 2714 // Look for two different register operands assumed to be commutable 2715 // regardless of the FMA opcode. The FMA opcode is adjusted later if 2716 // needed. 2717 if (SrcOpIdx1 == CommuteAnyOperandIndex || 2718 SrcOpIdx2 == CommuteAnyOperandIndex) { 2719 // At least one of operands to be commuted is not specified and 2720 // this method is free to choose appropriate commutable operands. 2721 unsigned CommutableOpIdx1 = SrcOpIdx1; 2722 if (SrcOpIdx1 == SrcOpIdx2) { 2723 // Both of operands are not fixed. Set one of commutable 2724 // operands to the tied source. 2725 CommutableOpIdx1 = 1; 2726 } else if (SrcOpIdx1 == CommuteAnyOperandIndex) { 2727 // Only one of the operands is not fixed. 2728 CommutableOpIdx1 = SrcOpIdx2; 2729 } 2730 2731 // CommutableOpIdx1 is well defined now. Let's choose another commutable 2732 // operand and assign its index to CommutableOpIdx2. 2733 unsigned CommutableOpIdx2; 2734 if (CommutableOpIdx1 != 1) { 2735 // If we haven't already used the tied source, we must use it now. 2736 CommutableOpIdx2 = 1; 2737 } else { 2738 Register Op1Reg = MI.getOperand(CommutableOpIdx1).getReg(); 2739 2740 // The commuted operands should have different registers. 2741 // Otherwise, the commute transformation does not change anything and 2742 // is useless. We use this as a hint to make our decision. 2743 if (Op1Reg != MI.getOperand(2).getReg()) 2744 CommutableOpIdx2 = 2; 2745 else 2746 CommutableOpIdx2 = 3; 2747 } 2748 2749 // Assign the found pair of commutable indices to SrcOpIdx1 and 2750 // SrcOpIdx2 to return those values. 2751 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1, 2752 CommutableOpIdx2)) 2753 return false; 2754 } 2755 2756 return true; 2757 } 2758 } 2759 2760 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); 2761 } 2762 2763 #define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \ 2764 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \ 2765 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \ 2766 break; 2767 2768 #define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \ 2769 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \ 2770 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \ 2771 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \ 2772 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8) 2773 2774 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \ 2775 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \ 2776 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) 2777 2778 #define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \ 2779 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \ 2780 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) 2781 2782 #define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \ 2783 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \ 2784 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) 2785 2786 #define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \ 2787 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \ 2788 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \ 2789 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64) 2790 2791 MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, 2792 bool NewMI, 2793 unsigned OpIdx1, 2794 unsigned OpIdx2) const { 2795 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & { 2796 if (NewMI) 2797 return *MI.getParent()->getParent()->CloneMachineInstr(&MI); 2798 return MI; 2799 }; 2800 2801 switch (MI.getOpcode()) { 2802 case RISCV::TH_MVEQZ: 2803 case RISCV::TH_MVNEZ: { 2804 auto &WorkingMI = cloneIfNew(MI); 2805 WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ 2806 : RISCV::TH_MVEQZ)); 2807 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1, 2808 OpIdx2); 2809 } 2810 case RISCV::PseudoCCMOVGPR: { 2811 // CCMOV can be commuted by inverting the condition. 2812 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm()); 2813 CC = RISCVCC::getOppositeBranchCondition(CC); 2814 auto &WorkingMI = cloneIfNew(MI); 2815 WorkingMI.getOperand(3).setImm(CC); 2816 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false, 2817 OpIdx1, OpIdx2); 2818 } 2819 case CASE_VFMA_SPLATS(FMACC): 2820 case CASE_VFMA_SPLATS(FMADD): 2821 case CASE_VFMA_SPLATS(FMSAC): 2822 case CASE_VFMA_SPLATS(FMSUB): 2823 case CASE_VFMA_SPLATS(FNMACC): 2824 case CASE_VFMA_SPLATS(FNMADD): 2825 case CASE_VFMA_SPLATS(FNMSAC): 2826 case CASE_VFMA_SPLATS(FNMSUB): 2827 case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV): 2828 case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV): 2829 case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV): 2830 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV): 2831 case CASE_VFMA_OPCODE_LMULS(MADD, VX): 2832 case CASE_VFMA_OPCODE_LMULS(NMSUB, VX): 2833 case CASE_VFMA_OPCODE_LMULS(MACC, VX): 2834 case CASE_VFMA_OPCODE_LMULS(NMSAC, VX): 2835 case CASE_VFMA_OPCODE_LMULS(MACC, VV): 2836 case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): { 2837 // It only make sense to toggle these between clobbering the 2838 // addend/subtrahend/minuend one of the multiplicands. 2839 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); 2840 assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index"); 2841 unsigned Opc; 2842 switch (MI.getOpcode()) { 2843 default: 2844 llvm_unreachable("Unexpected opcode"); 2845 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD) 2846 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC) 2847 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB) 2848 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC) 2849 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD) 2850 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC) 2851 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB) 2852 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC) 2853 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMACC, FMADD, VV) 2854 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSAC, FMSUB, VV) 2855 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMACC, FNMADD, VV) 2856 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSAC, FNMSUB, VV) 2857 CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX) 2858 CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX) 2859 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX) 2860 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX) 2861 CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV) 2862 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV) 2863 } 2864 2865 auto &WorkingMI = cloneIfNew(MI); 2866 WorkingMI.setDesc(get(Opc)); 2867 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 2868 OpIdx1, OpIdx2); 2869 } 2870 case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV): 2871 case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV): 2872 case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV): 2873 case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV): 2874 case CASE_VFMA_OPCODE_LMULS(MADD, VV): 2875 case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): { 2876 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); 2877 // If one of the operands, is the addend we need to change opcode. 2878 // Otherwise we're just swapping 2 of the multiplicands. 2879 if (OpIdx1 == 3 || OpIdx2 == 3) { 2880 unsigned Opc; 2881 switch (MI.getOpcode()) { 2882 default: 2883 llvm_unreachable("Unexpected opcode"); 2884 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMADD, FMACC, VV) 2885 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSUB, FMSAC, VV) 2886 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMADD, FNMACC, VV) 2887 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSUB, FNMSAC, VV) 2888 CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV) 2889 CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV) 2890 } 2891 2892 auto &WorkingMI = cloneIfNew(MI); 2893 WorkingMI.setDesc(get(Opc)); 2894 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, 2895 OpIdx1, OpIdx2); 2896 } 2897 // Let the default code handle it. 2898 break; 2899 } 2900 } 2901 2902 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); 2903 } 2904 2905 #undef CASE_VFMA_CHANGE_OPCODE_SPLATS 2906 #undef CASE_VFMA_CHANGE_OPCODE_LMULS 2907 #undef CASE_VFMA_CHANGE_OPCODE_COMMON 2908 #undef CASE_VFMA_SPLATS 2909 #undef CASE_VFMA_OPCODE_LMULS 2910 #undef CASE_VFMA_OPCODE_COMMON 2911 2912 // clang-format off 2913 #define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \ 2914 RISCV::PseudoV##OP##_##LMUL##_TIED 2915 2916 #define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \ 2917 CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \ 2918 case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \ 2919 case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \ 2920 case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \ 2921 case CASE_WIDEOP_OPCODE_COMMON(OP, M4) 2922 2923 #define CASE_WIDEOP_OPCODE_LMULS(OP) \ 2924 CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \ 2925 case CASE_WIDEOP_OPCODE_LMULS_MF4(OP) 2926 // clang-format on 2927 2928 #define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \ 2929 case RISCV::PseudoV##OP##_##LMUL##_TIED: \ 2930 NewOpc = RISCV::PseudoV##OP##_##LMUL; \ 2931 break; 2932 2933 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \ 2934 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \ 2935 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \ 2936 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \ 2937 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \ 2938 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4) 2939 2940 #define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \ 2941 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \ 2942 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) 2943 2944 MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI, 2945 LiveVariables *LV, 2946 LiveIntervals *LIS) const { 2947 MachineInstrBuilder MIB; 2948 switch (MI.getOpcode()) { 2949 default: 2950 return nullptr; 2951 case CASE_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV): 2952 case CASE_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): { 2953 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) && 2954 MI.getNumExplicitOperands() == 7 && 2955 "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy"); 2956 // If the tail policy is undisturbed we can't convert. 2957 if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() & 2958 1) == 0) 2959 return nullptr; 2960 // clang-format off 2961 unsigned NewOpc; 2962 switch (MI.getOpcode()) { 2963 default: 2964 llvm_unreachable("Unexpected opcode"); 2965 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV) 2966 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV) 2967 } 2968 // clang-format on 2969 2970 MachineBasicBlock &MBB = *MI.getParent(); 2971 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 2972 .add(MI.getOperand(0)) 2973 .addReg(MI.getOperand(0).getReg(), RegState::Undef) 2974 .add(MI.getOperand(1)) 2975 .add(MI.getOperand(2)) 2976 .add(MI.getOperand(3)) 2977 .add(MI.getOperand(4)) 2978 .add(MI.getOperand(5)) 2979 .add(MI.getOperand(6)); 2980 break; 2981 } 2982 case CASE_WIDEOP_OPCODE_LMULS(WADD_WV): 2983 case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV): 2984 case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV): 2985 case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): { 2986 // If the tail policy is undisturbed we can't convert. 2987 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) && 2988 MI.getNumExplicitOperands() == 6); 2989 if ((MI.getOperand(5).getImm() & 1) == 0) 2990 return nullptr; 2991 2992 // clang-format off 2993 unsigned NewOpc; 2994 switch (MI.getOpcode()) { 2995 default: 2996 llvm_unreachable("Unexpected opcode"); 2997 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV) 2998 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV) 2999 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV) 3000 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV) 3001 } 3002 // clang-format on 3003 3004 MachineBasicBlock &MBB = *MI.getParent(); 3005 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) 3006 .add(MI.getOperand(0)) 3007 .addReg(MI.getOperand(0).getReg(), RegState::Undef) 3008 .add(MI.getOperand(1)) 3009 .add(MI.getOperand(2)) 3010 .add(MI.getOperand(3)) 3011 .add(MI.getOperand(4)) 3012 .add(MI.getOperand(5)); 3013 break; 3014 } 3015 } 3016 MIB.copyImplicitOps(MI); 3017 3018 if (LV) { 3019 unsigned NumOps = MI.getNumOperands(); 3020 for (unsigned I = 1; I < NumOps; ++I) { 3021 MachineOperand &Op = MI.getOperand(I); 3022 if (Op.isReg() && Op.isKill()) 3023 LV->replaceKillInstruction(Op.getReg(), MI, *MIB); 3024 } 3025 } 3026 3027 if (LIS) { 3028 SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB); 3029 3030 if (MI.getOperand(0).isEarlyClobber()) { 3031 // Use operand 1 was tied to early-clobber def operand 0, so its live 3032 // interval could have ended at an early-clobber slot. Now they are not 3033 // tied we need to update it to the normal register slot. 3034 LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg()); 3035 LiveRange::Segment *S = LI.getSegmentContaining(Idx); 3036 if (S->end == Idx.getRegSlot(true)) 3037 S->end = Idx.getRegSlot(); 3038 } 3039 } 3040 3041 return MIB; 3042 } 3043 3044 #undef CASE_WIDEOP_CHANGE_OPCODE_LMULS 3045 #undef CASE_WIDEOP_CHANGE_OPCODE_COMMON 3046 #undef CASE_WIDEOP_OPCODE_LMULS 3047 #undef CASE_WIDEOP_OPCODE_COMMON 3048 3049 void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, 3050 MachineBasicBlock &MBB, 3051 MachineBasicBlock::iterator II, 3052 const DebugLoc &DL, Register DestReg, 3053 int64_t Amount, 3054 MachineInstr::MIFlag Flag) const { 3055 assert(Amount > 0 && "There is no need to get VLEN scaled value."); 3056 assert(Amount % 8 == 0 && 3057 "Reserve the stack by the multiple of one vector size."); 3058 3059 MachineRegisterInfo &MRI = MF.getRegInfo(); 3060 int64_t NumOfVReg = Amount / 8; 3061 3062 BuildMI(MBB, II, DL, get(RISCV::PseudoReadVLENB), DestReg).setMIFlag(Flag); 3063 assert(isInt<32>(NumOfVReg) && 3064 "Expect the number of vector registers within 32-bits."); 3065 if (llvm::has_single_bit<uint32_t>(NumOfVReg)) { 3066 uint32_t ShiftAmount = Log2_32(NumOfVReg); 3067 if (ShiftAmount == 0) 3068 return; 3069 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3070 .addReg(DestReg, RegState::Kill) 3071 .addImm(ShiftAmount) 3072 .setMIFlag(Flag); 3073 } else if (STI.hasStdExtZba() && 3074 ((NumOfVReg % 3 == 0 && isPowerOf2_64(NumOfVReg / 3)) || 3075 (NumOfVReg % 5 == 0 && isPowerOf2_64(NumOfVReg / 5)) || 3076 (NumOfVReg % 9 == 0 && isPowerOf2_64(NumOfVReg / 9)))) { 3077 // We can use Zba SHXADD+SLLI instructions for multiply in some cases. 3078 unsigned Opc; 3079 uint32_t ShiftAmount; 3080 if (NumOfVReg % 9 == 0) { 3081 Opc = RISCV::SH3ADD; 3082 ShiftAmount = Log2_64(NumOfVReg / 9); 3083 } else if (NumOfVReg % 5 == 0) { 3084 Opc = RISCV::SH2ADD; 3085 ShiftAmount = Log2_64(NumOfVReg / 5); 3086 } else if (NumOfVReg % 3 == 0) { 3087 Opc = RISCV::SH1ADD; 3088 ShiftAmount = Log2_64(NumOfVReg / 3); 3089 } else { 3090 llvm_unreachable("Unexpected number of vregs"); 3091 } 3092 if (ShiftAmount) 3093 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) 3094 .addReg(DestReg, RegState::Kill) 3095 .addImm(ShiftAmount) 3096 .setMIFlag(Flag); 3097 BuildMI(MBB, II, DL, get(Opc), DestReg) 3098 .addReg(DestReg, RegState::Kill) 3099 .addReg(DestReg) 3100 .setMIFlag(Flag); 3101 } else if (llvm::has_single_bit<uint32_t>(NumOfVReg - 1)) { 3102 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3103 uint32_t ShiftAmount = Log2_32(NumOfVReg - 1); 3104 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) 3105 .addReg(DestReg) 3106 .addImm(ShiftAmount) 3107 .setMIFlag(Flag); 3108 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg) 3109 .addReg(ScaledRegister, RegState::Kill) 3110 .addReg(DestReg, RegState::Kill) 3111 .setMIFlag(Flag); 3112 } else if (llvm::has_single_bit<uint32_t>(NumOfVReg + 1)) { 3113 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3114 uint32_t ShiftAmount = Log2_32(NumOfVReg + 1); 3115 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) 3116 .addReg(DestReg) 3117 .addImm(ShiftAmount) 3118 .setMIFlag(Flag); 3119 BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg) 3120 .addReg(ScaledRegister, RegState::Kill) 3121 .addReg(DestReg, RegState::Kill) 3122 .setMIFlag(Flag); 3123 } else { 3124 Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass); 3125 movImm(MBB, II, DL, N, NumOfVReg, Flag); 3126 if (!STI.hasStdExtM() && !STI.hasStdExtZmmul()) 3127 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{ 3128 MF.getFunction(), 3129 "M- or Zmmul-extension must be enabled to calculate the vscaled size/" 3130 "offset."}); 3131 BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg) 3132 .addReg(DestReg, RegState::Kill) 3133 .addReg(N, RegState::Kill) 3134 .setMIFlag(Flag); 3135 } 3136 } 3137 3138 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> 3139 RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const { 3140 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = 3141 {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"}, 3142 {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}}; 3143 return ArrayRef(TargetFlags); 3144 } 3145 3146 // Returns true if this is the sext.w pattern, addiw rd, rs1, 0. 3147 bool RISCV::isSEXT_W(const MachineInstr &MI) { 3148 return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() && 3149 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0; 3150 } 3151 3152 // Returns true if this is the zext.w pattern, adduw rd, rs1, x0. 3153 bool RISCV::isZEXT_W(const MachineInstr &MI) { 3154 return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() && 3155 MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0; 3156 } 3157 3158 // Returns true if this is the zext.b pattern, andi rd, rs1, 255. 3159 bool RISCV::isZEXT_B(const MachineInstr &MI) { 3160 return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() && 3161 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255; 3162 } 3163 3164 static bool isRVVWholeLoadStore(unsigned Opcode) { 3165 switch (Opcode) { 3166 default: 3167 return false; 3168 case RISCV::VS1R_V: 3169 case RISCV::VS2R_V: 3170 case RISCV::VS4R_V: 3171 case RISCV::VS8R_V: 3172 case RISCV::VL1RE8_V: 3173 case RISCV::VL2RE8_V: 3174 case RISCV::VL4RE8_V: 3175 case RISCV::VL8RE8_V: 3176 case RISCV::VL1RE16_V: 3177 case RISCV::VL2RE16_V: 3178 case RISCV::VL4RE16_V: 3179 case RISCV::VL8RE16_V: 3180 case RISCV::VL1RE32_V: 3181 case RISCV::VL2RE32_V: 3182 case RISCV::VL4RE32_V: 3183 case RISCV::VL8RE32_V: 3184 case RISCV::VL1RE64_V: 3185 case RISCV::VL2RE64_V: 3186 case RISCV::VL4RE64_V: 3187 case RISCV::VL8RE64_V: 3188 return true; 3189 } 3190 } 3191 3192 bool RISCV::isRVVSpill(const MachineInstr &MI) { 3193 // RVV lacks any support for immediate addressing for stack addresses, so be 3194 // conservative. 3195 unsigned Opcode = MI.getOpcode(); 3196 if (!RISCVVPseudosTable::getPseudoInfo(Opcode) && 3197 !isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode)) 3198 return false; 3199 return true; 3200 } 3201 3202 std::optional<std::pair<unsigned, unsigned>> 3203 RISCV::isRVVSpillForZvlsseg(unsigned Opcode) { 3204 switch (Opcode) { 3205 default: 3206 return std::nullopt; 3207 case RISCV::PseudoVSPILL2_M1: 3208 case RISCV::PseudoVRELOAD2_M1: 3209 return std::make_pair(2u, 1u); 3210 case RISCV::PseudoVSPILL2_M2: 3211 case RISCV::PseudoVRELOAD2_M2: 3212 return std::make_pair(2u, 2u); 3213 case RISCV::PseudoVSPILL2_M4: 3214 case RISCV::PseudoVRELOAD2_M4: 3215 return std::make_pair(2u, 4u); 3216 case RISCV::PseudoVSPILL3_M1: 3217 case RISCV::PseudoVRELOAD3_M1: 3218 return std::make_pair(3u, 1u); 3219 case RISCV::PseudoVSPILL3_M2: 3220 case RISCV::PseudoVRELOAD3_M2: 3221 return std::make_pair(3u, 2u); 3222 case RISCV::PseudoVSPILL4_M1: 3223 case RISCV::PseudoVRELOAD4_M1: 3224 return std::make_pair(4u, 1u); 3225 case RISCV::PseudoVSPILL4_M2: 3226 case RISCV::PseudoVRELOAD4_M2: 3227 return std::make_pair(4u, 2u); 3228 case RISCV::PseudoVSPILL5_M1: 3229 case RISCV::PseudoVRELOAD5_M1: 3230 return std::make_pair(5u, 1u); 3231 case RISCV::PseudoVSPILL6_M1: 3232 case RISCV::PseudoVRELOAD6_M1: 3233 return std::make_pair(6u, 1u); 3234 case RISCV::PseudoVSPILL7_M1: 3235 case RISCV::PseudoVRELOAD7_M1: 3236 return std::make_pair(7u, 1u); 3237 case RISCV::PseudoVSPILL8_M1: 3238 case RISCV::PseudoVRELOAD8_M1: 3239 return std::make_pair(8u, 1u); 3240 } 3241 } 3242 3243 bool RISCV::isFaultFirstLoad(const MachineInstr &MI) { 3244 return MI.getNumExplicitDefs() == 2 && MI.modifiesRegister(RISCV::VL) && 3245 !MI.isInlineAsm(); 3246 } 3247 3248 bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) { 3249 int16_t MI1FrmOpIdx = 3250 RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm); 3251 int16_t MI2FrmOpIdx = 3252 RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm); 3253 if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0) 3254 return false; 3255 MachineOperand FrmOp1 = MI1.getOperand(MI1FrmOpIdx); 3256 MachineOperand FrmOp2 = MI2.getOperand(MI2FrmOpIdx); 3257 return FrmOp1.getImm() == FrmOp2.getImm(); 3258 } 3259 3260 std::optional<unsigned> 3261 RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) { 3262 // TODO: Handle Zvbb instructions 3263 switch (Opcode) { 3264 default: 3265 return std::nullopt; 3266 3267 // 11.6. Vector Single-Width Shift Instructions 3268 case RISCV::VSLL_VX: 3269 case RISCV::VSRL_VX: 3270 case RISCV::VSRA_VX: 3271 // 12.4. Vector Single-Width Scaling Shift Instructions 3272 case RISCV::VSSRL_VX: 3273 case RISCV::VSSRA_VX: 3274 // Only the low lg2(SEW) bits of the shift-amount value are used. 3275 return Log2SEW; 3276 3277 // 11.7 Vector Narrowing Integer Right Shift Instructions 3278 case RISCV::VNSRL_WX: 3279 case RISCV::VNSRA_WX: 3280 // 12.5. Vector Narrowing Fixed-Point Clip Instructions 3281 case RISCV::VNCLIPU_WX: 3282 case RISCV::VNCLIP_WX: 3283 // Only the low lg2(2*SEW) bits of the shift-amount value are used. 3284 return Log2SEW + 1; 3285 3286 // 11.1. Vector Single-Width Integer Add and Subtract 3287 case RISCV::VADD_VX: 3288 case RISCV::VSUB_VX: 3289 case RISCV::VRSUB_VX: 3290 // 11.2. Vector Widening Integer Add/Subtract 3291 case RISCV::VWADDU_VX: 3292 case RISCV::VWSUBU_VX: 3293 case RISCV::VWADD_VX: 3294 case RISCV::VWSUB_VX: 3295 case RISCV::VWADDU_WX: 3296 case RISCV::VWSUBU_WX: 3297 case RISCV::VWADD_WX: 3298 case RISCV::VWSUB_WX: 3299 // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions 3300 case RISCV::VADC_VXM: 3301 case RISCV::VADC_VIM: 3302 case RISCV::VMADC_VXM: 3303 case RISCV::VMADC_VIM: 3304 case RISCV::VMADC_VX: 3305 case RISCV::VSBC_VXM: 3306 case RISCV::VMSBC_VXM: 3307 case RISCV::VMSBC_VX: 3308 // 11.5 Vector Bitwise Logical Instructions 3309 case RISCV::VAND_VX: 3310 case RISCV::VOR_VX: 3311 case RISCV::VXOR_VX: 3312 // 11.8. Vector Integer Compare Instructions 3313 case RISCV::VMSEQ_VX: 3314 case RISCV::VMSNE_VX: 3315 case RISCV::VMSLTU_VX: 3316 case RISCV::VMSLT_VX: 3317 case RISCV::VMSLEU_VX: 3318 case RISCV::VMSLE_VX: 3319 case RISCV::VMSGTU_VX: 3320 case RISCV::VMSGT_VX: 3321 // 11.9. Vector Integer Min/Max Instructions 3322 case RISCV::VMINU_VX: 3323 case RISCV::VMIN_VX: 3324 case RISCV::VMAXU_VX: 3325 case RISCV::VMAX_VX: 3326 // 11.10. Vector Single-Width Integer Multiply Instructions 3327 case RISCV::VMUL_VX: 3328 case RISCV::VMULH_VX: 3329 case RISCV::VMULHU_VX: 3330 case RISCV::VMULHSU_VX: 3331 // 11.11. Vector Integer Divide Instructions 3332 case RISCV::VDIVU_VX: 3333 case RISCV::VDIV_VX: 3334 case RISCV::VREMU_VX: 3335 case RISCV::VREM_VX: 3336 // 11.12. Vector Widening Integer Multiply Instructions 3337 case RISCV::VWMUL_VX: 3338 case RISCV::VWMULU_VX: 3339 case RISCV::VWMULSU_VX: 3340 // 11.13. Vector Single-Width Integer Multiply-Add Instructions 3341 case RISCV::VMACC_VX: 3342 case RISCV::VNMSAC_VX: 3343 case RISCV::VMADD_VX: 3344 case RISCV::VNMSUB_VX: 3345 // 11.14. Vector Widening Integer Multiply-Add Instructions 3346 case RISCV::VWMACCU_VX: 3347 case RISCV::VWMACC_VX: 3348 case RISCV::VWMACCSU_VX: 3349 case RISCV::VWMACCUS_VX: 3350 // 11.15. Vector Integer Merge Instructions 3351 case RISCV::VMERGE_VXM: 3352 // 11.16. Vector Integer Move Instructions 3353 case RISCV::VMV_V_X: 3354 // 12.1. Vector Single-Width Saturating Add and Subtract 3355 case RISCV::VSADDU_VX: 3356 case RISCV::VSADD_VX: 3357 case RISCV::VSSUBU_VX: 3358 case RISCV::VSSUB_VX: 3359 // 12.2. Vector Single-Width Averaging Add and Subtract 3360 case RISCV::VAADDU_VX: 3361 case RISCV::VAADD_VX: 3362 case RISCV::VASUBU_VX: 3363 case RISCV::VASUB_VX: 3364 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation 3365 case RISCV::VSMUL_VX: 3366 // 16.1. Integer Scalar Move Instructions 3367 case RISCV::VMV_S_X: 3368 return 1U << Log2SEW; 3369 } 3370 } 3371 3372 unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) { 3373 const RISCVVPseudosTable::PseudoInfo *RVV = 3374 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode); 3375 if (!RVV) 3376 return 0; 3377 return RVV->BaseInstr; 3378 } 3379