1518292dbSQingShan Zhang //===- PPCMacroFusion.cpp - PowerPC Macro Fusion --------------------------===// 2518292dbSQingShan Zhang // 3518292dbSQingShan Zhang // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4518292dbSQingShan Zhang // See https://llvm.org/LICENSE.txt for license information. 5518292dbSQingShan Zhang // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6518292dbSQingShan Zhang // 7518292dbSQingShan Zhang //===----------------------------------------------------------------------===// 8518292dbSQingShan Zhang // 9518292dbSQingShan Zhang /// \file This file contains the PowerPC implementation of the DAG scheduling 10518292dbSQingShan Zhang /// mutation to pair instructions back to back. 11518292dbSQingShan Zhang // 12518292dbSQingShan Zhang //===----------------------------------------------------------------------===// 13518292dbSQingShan Zhang 14518292dbSQingShan Zhang #include "PPC.h" 15518292dbSQingShan Zhang #include "PPCSubtarget.h" 16518292dbSQingShan Zhang #include "llvm/ADT/DenseSet.h" 17518292dbSQingShan Zhang #include "llvm/CodeGen/MacroFusion.h" 18989f1c72Sserge-sans-paille #include "llvm/CodeGen/ScheduleDAGMutation.h" 19518292dbSQingShan Zhang 20518292dbSQingShan Zhang using namespace llvm; 21518292dbSQingShan Zhang namespace { 22518292dbSQingShan Zhang 23518292dbSQingShan Zhang class FusionFeature { 24518292dbSQingShan Zhang public: 25518292dbSQingShan Zhang typedef SmallDenseSet<unsigned> FusionOpSet; 26518292dbSQingShan Zhang 27518292dbSQingShan Zhang enum FusionKind { 28518292dbSQingShan Zhang #define FUSION_KIND(KIND) FK_##KIND 29518292dbSQingShan Zhang #define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) \ 30518292dbSQingShan Zhang FUSION_KIND(KIND), 31518292dbSQingShan Zhang #include "PPCMacroFusion.def" 32518292dbSQingShan Zhang FUSION_KIND(END) 33518292dbSQingShan Zhang }; 34518292dbSQingShan Zhang private: 35518292dbSQingShan Zhang // Each fusion feature is assigned with one fusion kind. All the 36518292dbSQingShan Zhang // instructions with the same fusion kind have the same fusion characteristic. 37518292dbSQingShan Zhang FusionKind Kd; 38518292dbSQingShan Zhang // True if this feature is enabled. 39518292dbSQingShan Zhang bool Supported; 40518292dbSQingShan Zhang // li rx, si 41518292dbSQingShan Zhang // load rt, ra, rx 42518292dbSQingShan Zhang // The dependent operand index in the second op(load). And the negative means 43518292dbSQingShan Zhang // it could be any one. 44518292dbSQingShan Zhang int DepOpIdx; 45518292dbSQingShan Zhang // The first fusion op set. 46518292dbSQingShan Zhang FusionOpSet OpSet1; 47518292dbSQingShan Zhang // The second fusion op set. 48518292dbSQingShan Zhang FusionOpSet OpSet2; 49518292dbSQingShan Zhang public: 50518292dbSQingShan Zhang FusionFeature(FusionKind Kind, bool HasFeature, int Index, 51518292dbSQingShan Zhang const FusionOpSet &First, const FusionOpSet &Second) : 52518292dbSQingShan Zhang Kd(Kind), Supported(HasFeature), DepOpIdx(Index), OpSet1(First), 53518292dbSQingShan Zhang OpSet2(Second) {} 54518292dbSQingShan Zhang 55b7c5e0b0SKazu Hirata bool hasOp1(unsigned Opc) const { return OpSet1.contains(Opc); } 56b7c5e0b0SKazu Hirata bool hasOp2(unsigned Opc) const { return OpSet2.contains(Opc); } 57518292dbSQingShan Zhang bool isSupported() const { return Supported; } 58518292dbSQingShan Zhang Optional<unsigned> depOpIdx() const { 59518292dbSQingShan Zhang if (DepOpIdx < 0) 60518292dbSQingShan Zhang return None; 61518292dbSQingShan Zhang return DepOpIdx; 62518292dbSQingShan Zhang } 63518292dbSQingShan Zhang 64518292dbSQingShan Zhang FusionKind getKind() const { return Kd; } 65518292dbSQingShan Zhang }; 66518292dbSQingShan Zhang 67518292dbSQingShan Zhang static bool matchingRegOps(const MachineInstr &FirstMI, 68518292dbSQingShan Zhang int FirstMIOpIndex, 69518292dbSQingShan Zhang const MachineInstr &SecondMI, 70518292dbSQingShan Zhang int SecondMIOpIndex) { 71518292dbSQingShan Zhang const MachineOperand &Op1 = FirstMI.getOperand(FirstMIOpIndex); 72518292dbSQingShan Zhang const MachineOperand &Op2 = SecondMI.getOperand(SecondMIOpIndex); 73518292dbSQingShan Zhang if (!Op1.isReg() || !Op2.isReg()) 74518292dbSQingShan Zhang return false; 75518292dbSQingShan Zhang 76518292dbSQingShan Zhang return Op1.getReg() == Op2.getReg(); 77518292dbSQingShan Zhang } 78518292dbSQingShan Zhang 799b5e2b52SQiu Chaofan static bool matchingImmOps(const MachineInstr &MI, 809b5e2b52SQiu Chaofan int MIOpIndex, 819b5e2b52SQiu Chaofan int64_t Expect, 829b5e2b52SQiu Chaofan unsigned ExtendFrom = 64) { 839b5e2b52SQiu Chaofan const MachineOperand &Op = MI.getOperand(MIOpIndex); 849b5e2b52SQiu Chaofan if (!Op.isImm()) 859b5e2b52SQiu Chaofan return false; 869b5e2b52SQiu Chaofan int64_t Imm = Op.getImm(); 879b5e2b52SQiu Chaofan if (ExtendFrom < 64) 889b5e2b52SQiu Chaofan Imm = SignExtend64(Imm, ExtendFrom); 899b5e2b52SQiu Chaofan return Imm == Expect; 909b5e2b52SQiu Chaofan } 919b5e2b52SQiu Chaofan 92518292dbSQingShan Zhang // Return true if the FirstMI meets the constraints of SecondMI according to 93518292dbSQingShan Zhang // fusion specification. 94518292dbSQingShan Zhang static bool checkOpConstraints(FusionFeature::FusionKind Kd, 95518292dbSQingShan Zhang const MachineInstr &FirstMI, 96518292dbSQingShan Zhang const MachineInstr &SecondMI) { 97518292dbSQingShan Zhang switch (Kd) { 98518292dbSQingShan Zhang // The hardware didn't require any specific check for the fused instructions' 99518292dbSQingShan Zhang // operands. Therefore, return true to indicate that, it is fusable. 100518292dbSQingShan Zhang default: return true; 101518292dbSQingShan Zhang // [addi rt,ra,si - lxvd2x xt,ra,rb] etc. 102518292dbSQingShan Zhang case FusionFeature::FK_AddiLoad: { 103518292dbSQingShan Zhang // lxvd2x(ra) cannot be zero 104518292dbSQingShan Zhang const MachineOperand &RA = SecondMI.getOperand(1); 105518292dbSQingShan Zhang if (!RA.isReg()) 106518292dbSQingShan Zhang return true; 107518292dbSQingShan Zhang 108518292dbSQingShan Zhang return Register::isVirtualRegister(RA.getReg()) || 109518292dbSQingShan Zhang (RA.getReg() != PPC::ZERO && RA.getReg() != PPC::ZERO8); 110518292dbSQingShan Zhang } 111518292dbSQingShan Zhang // [addis rt,ra,si - ld rt,ds(ra)] etc. 112518292dbSQingShan Zhang case FusionFeature::FK_AddisLoad: { 113518292dbSQingShan Zhang const MachineOperand &RT = SecondMI.getOperand(0); 114518292dbSQingShan Zhang if (!RT.isReg()) 115518292dbSQingShan Zhang return true; 116518292dbSQingShan Zhang 117518292dbSQingShan Zhang // Only check it for non-virtual register. 118518292dbSQingShan Zhang if (!Register::isVirtualRegister(RT.getReg())) 119518292dbSQingShan Zhang // addis(rt) = ld(ra) = ld(rt) 120518292dbSQingShan Zhang // ld(rt) cannot be zero 121518292dbSQingShan Zhang if (!matchingRegOps(SecondMI, 0, SecondMI, 2) || 122518292dbSQingShan Zhang (RT.getReg() == PPC::ZERO || RT.getReg() == PPC::ZERO8)) 123518292dbSQingShan Zhang return false; 124518292dbSQingShan Zhang 125518292dbSQingShan Zhang // addis(si) first 12 bits must be all 1s or all 0s 126518292dbSQingShan Zhang const MachineOperand &SI = FirstMI.getOperand(2); 127518292dbSQingShan Zhang if (!SI.isImm()) 128518292dbSQingShan Zhang return true; 129518292dbSQingShan Zhang int64_t Imm = SI.getImm(); 130b83490bdSQingShan Zhang if (((Imm & 0xFFF0) != 0) && ((Imm & 0xFFF0) != 0xFFF0)) 131518292dbSQingShan Zhang return false; 132518292dbSQingShan Zhang 133518292dbSQingShan Zhang // If si = 1111111111110000 and the msb of the d/ds field of the load equals 134518292dbSQingShan Zhang // 1, then fusion does not occur. 135518292dbSQingShan Zhang if ((Imm & 0xFFF0) == 0xFFF0) { 136518292dbSQingShan Zhang const MachineOperand &D = SecondMI.getOperand(1); 137518292dbSQingShan Zhang if (!D.isImm()) 138518292dbSQingShan Zhang return true; 139518292dbSQingShan Zhang 140518292dbSQingShan Zhang // 14 bit for DS field, while 16 bit for D field. 141518292dbSQingShan Zhang int MSB = 15; 142518292dbSQingShan Zhang if (SecondMI.getOpcode() == PPC::LD) 143518292dbSQingShan Zhang MSB = 13; 144518292dbSQingShan Zhang 145518292dbSQingShan Zhang return (D.getImm() & (1ULL << MSB)) == 0; 146518292dbSQingShan Zhang } 147518292dbSQingShan Zhang return true; 148518292dbSQingShan Zhang } 1499b5e2b52SQiu Chaofan 1509b5e2b52SQiu Chaofan case FusionFeature::FK_SldiAdd: 1519b5e2b52SQiu Chaofan return (matchingImmOps(FirstMI, 2, 3) && matchingImmOps(FirstMI, 3, 60)) || 1529b5e2b52SQiu Chaofan (matchingImmOps(FirstMI, 2, 6) && matchingImmOps(FirstMI, 3, 57)); 15359f4b3d3SQiu Chaofan 15459f4b3d3SQiu Chaofan // rldicl rx, ra, 1, 0 - xor 15559f4b3d3SQiu Chaofan case FusionFeature::FK_RotateLeftXor: 15659f4b3d3SQiu Chaofan return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 0); 15759f4b3d3SQiu Chaofan 15859f4b3d3SQiu Chaofan // rldicr rx, ra, 1, 63 - xor 15959f4b3d3SQiu Chaofan case FusionFeature::FK_RotateRightXor: 16059f4b3d3SQiu Chaofan return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 63); 16159f4b3d3SQiu Chaofan 16259f4b3d3SQiu Chaofan // We actually use CMPW* and CMPD*, 'l' doesn't exist as an operand in instr. 16359f4b3d3SQiu Chaofan 16459f4b3d3SQiu Chaofan // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 } 16559f4b3d3SQiu Chaofan // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 } 16659f4b3d3SQiu Chaofan case FusionFeature::FK_LoadCmp1: 16759f4b3d3SQiu Chaofan // { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 } 16859f4b3d3SQiu Chaofan // { ld,ldx } - cmpli 0,1,rx,{ 0,1 } 16959f4b3d3SQiu Chaofan case FusionFeature::FK_LoadCmp2: { 17059f4b3d3SQiu Chaofan const MachineOperand &BT = SecondMI.getOperand(0); 17159f4b3d3SQiu Chaofan if (!BT.isReg() || 17259f4b3d3SQiu Chaofan (!Register::isVirtualRegister(BT.getReg()) && BT.getReg() != PPC::CR0)) 17359f4b3d3SQiu Chaofan return false; 17459f4b3d3SQiu Chaofan if (SecondMI.getOpcode() == PPC::CMPDI && 17559f4b3d3SQiu Chaofan matchingImmOps(SecondMI, 2, -1, 16)) 17659f4b3d3SQiu Chaofan return true; 17759f4b3d3SQiu Chaofan return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1); 17859f4b3d3SQiu Chaofan } 17959f4b3d3SQiu Chaofan 18059f4b3d3SQiu Chaofan // { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 } 18159f4b3d3SQiu Chaofan case FusionFeature::FK_LoadCmp3: { 18259f4b3d3SQiu Chaofan const MachineOperand &BT = SecondMI.getOperand(0); 18359f4b3d3SQiu Chaofan if (!BT.isReg() || 18459f4b3d3SQiu Chaofan (!Register::isVirtualRegister(BT.getReg()) && BT.getReg() != PPC::CR0)) 18559f4b3d3SQiu Chaofan return false; 18659f4b3d3SQiu Chaofan return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1) || 18759f4b3d3SQiu Chaofan matchingImmOps(SecondMI, 2, -1, 16); 18859f4b3d3SQiu Chaofan } 18959f4b3d3SQiu Chaofan 19059f4b3d3SQiu Chaofan // mtctr - { bcctr,bcctrl } 19159f4b3d3SQiu Chaofan case FusionFeature::FK_ZeroMoveCTR: 19259f4b3d3SQiu Chaofan // ( mtctr rx ) is alias of ( mtspr 9, rx ) 19359f4b3d3SQiu Chaofan return (FirstMI.getOpcode() != PPC::MTSPR && 19459f4b3d3SQiu Chaofan FirstMI.getOpcode() != PPC::MTSPR8) || 19559f4b3d3SQiu Chaofan matchingImmOps(FirstMI, 0, 9); 19659f4b3d3SQiu Chaofan 19759f4b3d3SQiu Chaofan // mtlr - { bclr,bclrl } 19859f4b3d3SQiu Chaofan case FusionFeature::FK_ZeroMoveLR: 19959f4b3d3SQiu Chaofan // ( mtlr rx ) is alias of ( mtspr 8, rx ) 20059f4b3d3SQiu Chaofan return (FirstMI.getOpcode() != PPC::MTSPR && 20159f4b3d3SQiu Chaofan FirstMI.getOpcode() != PPC::MTSPR8) || 20259f4b3d3SQiu Chaofan matchingImmOps(FirstMI, 0, 8); 20359f4b3d3SQiu Chaofan 20459f4b3d3SQiu Chaofan // addis rx,ra,si - addi rt,rx,SI, SI >= 0 20559f4b3d3SQiu Chaofan case FusionFeature::FK_AddisAddi: { 20659f4b3d3SQiu Chaofan const MachineOperand &RA = FirstMI.getOperand(1); 20759f4b3d3SQiu Chaofan const MachineOperand &SI = SecondMI.getOperand(2); 20859f4b3d3SQiu Chaofan if (!SI.isImm() || !RA.isReg()) 20959f4b3d3SQiu Chaofan return false; 21059f4b3d3SQiu Chaofan if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8) 21159f4b3d3SQiu Chaofan return false; 21259f4b3d3SQiu Chaofan return SignExtend64(SI.getImm(), 16) >= 0; 21359f4b3d3SQiu Chaofan } 21459f4b3d3SQiu Chaofan 21559f4b3d3SQiu Chaofan // addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2 21659f4b3d3SQiu Chaofan case FusionFeature::FK_AddiAddis: { 21759f4b3d3SQiu Chaofan const MachineOperand &RA = FirstMI.getOperand(1); 21859f4b3d3SQiu Chaofan const MachineOperand &SI = FirstMI.getOperand(2); 21959f4b3d3SQiu Chaofan if (!SI.isImm() || !RA.isReg()) 22059f4b3d3SQiu Chaofan return false; 22159f4b3d3SQiu Chaofan if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8) 22259f4b3d3SQiu Chaofan return false; 22359f4b3d3SQiu Chaofan int64_t ExtendedSI = SignExtend64(SI.getImm(), 16); 22459f4b3d3SQiu Chaofan return ExtendedSI >= 2; 22559f4b3d3SQiu Chaofan } 226518292dbSQingShan Zhang } 227518292dbSQingShan Zhang 228518292dbSQingShan Zhang llvm_unreachable("All the cases should have been handled"); 229518292dbSQingShan Zhang return true; 230518292dbSQingShan Zhang } 231518292dbSQingShan Zhang 232518292dbSQingShan Zhang /// Check if the instr pair, FirstMI and SecondMI, should be fused together. 233518292dbSQingShan Zhang /// Given SecondMI, when FirstMI is unspecified, then check if SecondMI may be 234518292dbSQingShan Zhang /// part of a fused pair at all. 235518292dbSQingShan Zhang static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, 236518292dbSQingShan Zhang const TargetSubtargetInfo &TSI, 237518292dbSQingShan Zhang const MachineInstr *FirstMI, 238518292dbSQingShan Zhang const MachineInstr &SecondMI) { 239518292dbSQingShan Zhang // We use the PPC namespace to avoid the need to prefix opcodes with PPC:: in 240518292dbSQingShan Zhang // the def file. 241518292dbSQingShan Zhang using namespace PPC; 242518292dbSQingShan Zhang 243518292dbSQingShan Zhang const PPCSubtarget &ST = static_cast<const PPCSubtarget&>(TSI); 244518292dbSQingShan Zhang static const FusionFeature FusionFeatures[] = { 245518292dbSQingShan Zhang #define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) { \ 246518292dbSQingShan Zhang FusionFeature::FUSION_KIND(KIND), ST.HAS_FEATURE(), DEP_OP_IDX, { OPSET1 },\ 247518292dbSQingShan Zhang { OPSET2 } }, 248518292dbSQingShan Zhang #include "PPCMacroFusion.def" 249518292dbSQingShan Zhang }; 250518292dbSQingShan Zhang #undef FUSION_KIND 251518292dbSQingShan Zhang 252518292dbSQingShan Zhang for (auto &Feature : FusionFeatures) { 253518292dbSQingShan Zhang // Skip if the feature is not supported. 254518292dbSQingShan Zhang if (!Feature.isSupported()) 255518292dbSQingShan Zhang continue; 256518292dbSQingShan Zhang 257518292dbSQingShan Zhang // Only when the SecondMI is fusable, we are starting to look for the 258518292dbSQingShan Zhang // fusable FirstMI. 259518292dbSQingShan Zhang if (Feature.hasOp2(SecondMI.getOpcode())) { 260518292dbSQingShan Zhang // If FirstMI == nullptr, that means, we're only checking whether SecondMI 261518292dbSQingShan Zhang // can be fused at all. 262518292dbSQingShan Zhang if (!FirstMI) 263518292dbSQingShan Zhang return true; 264518292dbSQingShan Zhang 265518292dbSQingShan Zhang // Checking if the FirstMI is fusable with the SecondMI. 266518292dbSQingShan Zhang if (!Feature.hasOp1(FirstMI->getOpcode())) 267518292dbSQingShan Zhang continue; 268518292dbSQingShan Zhang 269518292dbSQingShan Zhang auto DepOpIdx = Feature.depOpIdx(); 270*a7938c74SKazu Hirata if (DepOpIdx) { 271518292dbSQingShan Zhang // Checking if the result of the FirstMI is the desired operand of the 272518292dbSQingShan Zhang // SecondMI if the DepOpIdx is set. Otherwise, ignore it. 273518292dbSQingShan Zhang if (!matchingRegOps(*FirstMI, 0, SecondMI, *DepOpIdx)) 274518292dbSQingShan Zhang return false; 275518292dbSQingShan Zhang } 276518292dbSQingShan Zhang 277518292dbSQingShan Zhang // Checking more on the instruction operands. 278518292dbSQingShan Zhang if (checkOpConstraints(Feature.getKind(), *FirstMI, SecondMI)) 279518292dbSQingShan Zhang return true; 280518292dbSQingShan Zhang } 281518292dbSQingShan Zhang } 282518292dbSQingShan Zhang 283518292dbSQingShan Zhang return false; 284518292dbSQingShan Zhang } 285518292dbSQingShan Zhang 286518292dbSQingShan Zhang } // end anonymous namespace 287518292dbSQingShan Zhang 288518292dbSQingShan Zhang namespace llvm { 289518292dbSQingShan Zhang 290518292dbSQingShan Zhang std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation () { 291518292dbSQingShan Zhang return createMacroFusionDAGMutation(shouldScheduleAdjacent); 292518292dbSQingShan Zhang } 293518292dbSQingShan Zhang 294518292dbSQingShan Zhang } // end namespace llvm 295