xref: /llvm-project/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp (revision cfee7152d4eb673976b51b831295dcf5b1811634)
1518292dbSQingShan Zhang //===- PPCMacroFusion.cpp - PowerPC Macro Fusion --------------------------===//
2518292dbSQingShan Zhang //
3518292dbSQingShan Zhang // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4518292dbSQingShan Zhang // See https://llvm.org/LICENSE.txt for license information.
5518292dbSQingShan Zhang // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6518292dbSQingShan Zhang //
7518292dbSQingShan Zhang //===----------------------------------------------------------------------===//
8518292dbSQingShan Zhang //
9518292dbSQingShan Zhang /// \file This file contains the PowerPC implementation of the DAG scheduling
10518292dbSQingShan Zhang ///  mutation to pair instructions back to back.
11518292dbSQingShan Zhang //
12518292dbSQingShan Zhang //===----------------------------------------------------------------------===//
13518292dbSQingShan Zhang 
14518292dbSQingShan Zhang #include "PPC.h"
15518292dbSQingShan Zhang #include "PPCSubtarget.h"
16518292dbSQingShan Zhang #include "llvm/ADT/DenseSet.h"
17518292dbSQingShan Zhang #include "llvm/CodeGen/MacroFusion.h"
18989f1c72Sserge-sans-paille #include "llvm/CodeGen/ScheduleDAGMutation.h"
19c672bb67SKazu Hirata #include <optional>
20518292dbSQingShan Zhang 
21518292dbSQingShan Zhang using namespace llvm;
22518292dbSQingShan Zhang namespace {
23518292dbSQingShan Zhang 
24518292dbSQingShan Zhang class FusionFeature {
25518292dbSQingShan Zhang public:
26518292dbSQingShan Zhang   typedef SmallDenseSet<unsigned> FusionOpSet;
27518292dbSQingShan Zhang 
28518292dbSQingShan Zhang   enum FusionKind {
29518292dbSQingShan Zhang   #define FUSION_KIND(KIND) FK_##KIND
30518292dbSQingShan Zhang   #define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) \
31518292dbSQingShan Zhang     FUSION_KIND(KIND),
32518292dbSQingShan Zhang   #include "PPCMacroFusion.def"
33518292dbSQingShan Zhang   FUSION_KIND(END)
34518292dbSQingShan Zhang   };
35518292dbSQingShan Zhang private:
36518292dbSQingShan Zhang   // Each fusion feature is assigned with one fusion kind. All the
37518292dbSQingShan Zhang   // instructions with the same fusion kind have the same fusion characteristic.
38518292dbSQingShan Zhang   FusionKind Kd;
39518292dbSQingShan Zhang   // True if this feature is enabled.
40518292dbSQingShan Zhang   bool Supported;
41518292dbSQingShan Zhang   // li rx, si
42518292dbSQingShan Zhang   // load rt, ra, rx
43518292dbSQingShan Zhang   // The dependent operand index in the second op(load). And the negative means
44518292dbSQingShan Zhang   // it could be any one.
45518292dbSQingShan Zhang   int DepOpIdx;
46518292dbSQingShan Zhang   // The first fusion op set.
47518292dbSQingShan Zhang   FusionOpSet OpSet1;
48518292dbSQingShan Zhang   // The second fusion op set.
49518292dbSQingShan Zhang   FusionOpSet OpSet2;
50518292dbSQingShan Zhang public:
FusionFeature(FusionKind Kind,bool HasFeature,int Index,const FusionOpSet & First,const FusionOpSet & Second)51518292dbSQingShan Zhang   FusionFeature(FusionKind Kind, bool HasFeature, int Index,
52518292dbSQingShan Zhang                 const FusionOpSet &First, const FusionOpSet &Second) :
53518292dbSQingShan Zhang     Kd(Kind), Supported(HasFeature), DepOpIdx(Index), OpSet1(First),
54518292dbSQingShan Zhang     OpSet2(Second) {}
55518292dbSQingShan Zhang 
hasOp1(unsigned Opc) const56b7c5e0b0SKazu Hirata   bool hasOp1(unsigned Opc) const { return OpSet1.contains(Opc); }
hasOp2(unsigned Opc) const57b7c5e0b0SKazu Hirata   bool hasOp2(unsigned Opc) const { return OpSet2.contains(Opc); }
isSupported() const58518292dbSQingShan Zhang   bool isSupported() const { return Supported; }
depOpIdx() const59c672bb67SKazu Hirata   std::optional<unsigned> depOpIdx() const {
60518292dbSQingShan Zhang     if (DepOpIdx < 0)
6120cde154SKazu Hirata       return std::nullopt;
62518292dbSQingShan Zhang     return DepOpIdx;
63518292dbSQingShan Zhang   }
64518292dbSQingShan Zhang 
getKind() const65518292dbSQingShan Zhang   FusionKind getKind() const { return Kd; }
66518292dbSQingShan Zhang };
67518292dbSQingShan Zhang 
matchingRegOps(const MachineInstr & FirstMI,int FirstMIOpIndex,const MachineInstr & SecondMI,int SecondMIOpIndex)68518292dbSQingShan Zhang static bool matchingRegOps(const MachineInstr &FirstMI,
69518292dbSQingShan Zhang                            int FirstMIOpIndex,
70518292dbSQingShan Zhang                            const MachineInstr &SecondMI,
71518292dbSQingShan Zhang                            int SecondMIOpIndex) {
72518292dbSQingShan Zhang   const MachineOperand &Op1 = FirstMI.getOperand(FirstMIOpIndex);
73518292dbSQingShan Zhang   const MachineOperand &Op2 = SecondMI.getOperand(SecondMIOpIndex);
74518292dbSQingShan Zhang   if (!Op1.isReg() || !Op2.isReg())
75518292dbSQingShan Zhang     return false;
76518292dbSQingShan Zhang 
77518292dbSQingShan Zhang   return Op1.getReg() == Op2.getReg();
78518292dbSQingShan Zhang }
79518292dbSQingShan Zhang 
matchingImmOps(const MachineInstr & MI,int MIOpIndex,int64_t Expect,unsigned ExtendFrom=64)809b5e2b52SQiu Chaofan static bool matchingImmOps(const MachineInstr &MI,
819b5e2b52SQiu Chaofan                            int MIOpIndex,
829b5e2b52SQiu Chaofan                            int64_t Expect,
839b5e2b52SQiu Chaofan                            unsigned ExtendFrom = 64) {
849b5e2b52SQiu Chaofan   const MachineOperand &Op = MI.getOperand(MIOpIndex);
859b5e2b52SQiu Chaofan   if (!Op.isImm())
869b5e2b52SQiu Chaofan     return false;
879b5e2b52SQiu Chaofan   int64_t Imm = Op.getImm();
889b5e2b52SQiu Chaofan   if (ExtendFrom < 64)
899b5e2b52SQiu Chaofan     Imm = SignExtend64(Imm, ExtendFrom);
909b5e2b52SQiu Chaofan   return Imm == Expect;
919b5e2b52SQiu Chaofan }
929b5e2b52SQiu Chaofan 
93518292dbSQingShan Zhang // Return true if the FirstMI meets the constraints of SecondMI according to
94518292dbSQingShan Zhang // fusion specification.
checkOpConstraints(FusionFeature::FusionKind Kd,const MachineInstr & FirstMI,const MachineInstr & SecondMI)95518292dbSQingShan Zhang static bool checkOpConstraints(FusionFeature::FusionKind Kd,
96518292dbSQingShan Zhang                                const MachineInstr &FirstMI,
97518292dbSQingShan Zhang                                const MachineInstr &SecondMI) {
98518292dbSQingShan Zhang   switch (Kd) {
99518292dbSQingShan Zhang   // The hardware didn't require any specific check for the fused instructions'
100518292dbSQingShan Zhang   // operands. Therefore, return true to indicate that, it is fusable.
101518292dbSQingShan Zhang   default: return true;
102518292dbSQingShan Zhang   // [addi rt,ra,si - lxvd2x xt,ra,rb] etc.
103518292dbSQingShan Zhang   case FusionFeature::FK_AddiLoad: {
104518292dbSQingShan Zhang     // lxvd2x(ra) cannot be zero
105518292dbSQingShan Zhang     const MachineOperand &RA = SecondMI.getOperand(1);
106518292dbSQingShan Zhang     if (!RA.isReg())
107518292dbSQingShan Zhang       return true;
108518292dbSQingShan Zhang 
109*79858d19SCraig Topper     return RA.getReg().isVirtual() ||
110518292dbSQingShan Zhang            (RA.getReg() != PPC::ZERO && RA.getReg() != PPC::ZERO8);
111518292dbSQingShan Zhang   }
112518292dbSQingShan Zhang   // [addis rt,ra,si - ld rt,ds(ra)] etc.
113518292dbSQingShan Zhang   case FusionFeature::FK_AddisLoad: {
114518292dbSQingShan Zhang     const MachineOperand &RT = SecondMI.getOperand(0);
115518292dbSQingShan Zhang     if (!RT.isReg())
116518292dbSQingShan Zhang       return true;
117518292dbSQingShan Zhang 
118518292dbSQingShan Zhang     // Only check it for non-virtual register.
119*79858d19SCraig Topper     if (!RT.getReg().isVirtual())
120518292dbSQingShan Zhang       // addis(rt) = ld(ra) = ld(rt)
121518292dbSQingShan Zhang       // ld(rt) cannot be zero
122518292dbSQingShan Zhang       if (!matchingRegOps(SecondMI, 0, SecondMI, 2) ||
123518292dbSQingShan Zhang           (RT.getReg() == PPC::ZERO || RT.getReg() == PPC::ZERO8))
124518292dbSQingShan Zhang           return false;
125518292dbSQingShan Zhang 
126518292dbSQingShan Zhang     // addis(si) first 12 bits must be all 1s or all 0s
127518292dbSQingShan Zhang     const MachineOperand &SI = FirstMI.getOperand(2);
128518292dbSQingShan Zhang     if (!SI.isImm())
129518292dbSQingShan Zhang       return true;
130518292dbSQingShan Zhang     int64_t Imm = SI.getImm();
131b83490bdSQingShan Zhang     if (((Imm & 0xFFF0) != 0) && ((Imm & 0xFFF0) != 0xFFF0))
132518292dbSQingShan Zhang       return false;
133518292dbSQingShan Zhang 
134518292dbSQingShan Zhang     // If si = 1111111111110000 and the msb of the d/ds field of the load equals
135518292dbSQingShan Zhang     // 1, then fusion does not occur.
136518292dbSQingShan Zhang     if ((Imm & 0xFFF0) == 0xFFF0) {
137518292dbSQingShan Zhang       const MachineOperand &D = SecondMI.getOperand(1);
138518292dbSQingShan Zhang       if (!D.isImm())
139518292dbSQingShan Zhang         return true;
140518292dbSQingShan Zhang 
141518292dbSQingShan Zhang       // 14 bit for DS field, while 16 bit for D field.
142518292dbSQingShan Zhang       int MSB = 15;
143518292dbSQingShan Zhang       if (SecondMI.getOpcode() == PPC::LD)
144518292dbSQingShan Zhang         MSB = 13;
145518292dbSQingShan Zhang 
146518292dbSQingShan Zhang       return (D.getImm() & (1ULL << MSB)) == 0;
147518292dbSQingShan Zhang     }
148518292dbSQingShan Zhang     return true;
149518292dbSQingShan Zhang   }
1509b5e2b52SQiu Chaofan 
1519b5e2b52SQiu Chaofan   case FusionFeature::FK_SldiAdd:
1529b5e2b52SQiu Chaofan     return (matchingImmOps(FirstMI, 2, 3) && matchingImmOps(FirstMI, 3, 60)) ||
1539b5e2b52SQiu Chaofan            (matchingImmOps(FirstMI, 2, 6) && matchingImmOps(FirstMI, 3, 57));
15459f4b3d3SQiu Chaofan 
15559f4b3d3SQiu Chaofan   // rldicl rx, ra, 1, 0  - xor
15659f4b3d3SQiu Chaofan   case FusionFeature::FK_RotateLeftXor:
15759f4b3d3SQiu Chaofan     return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 0);
15859f4b3d3SQiu Chaofan 
15959f4b3d3SQiu Chaofan   // rldicr rx, ra, 1, 63 - xor
16059f4b3d3SQiu Chaofan   case FusionFeature::FK_RotateRightXor:
16159f4b3d3SQiu Chaofan     return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 63);
16259f4b3d3SQiu Chaofan 
16359f4b3d3SQiu Chaofan   // We actually use CMPW* and CMPD*, 'l' doesn't exist as an operand in instr.
16459f4b3d3SQiu Chaofan 
16559f4b3d3SQiu Chaofan   // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 }
16659f4b3d3SQiu Chaofan   // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 }
16759f4b3d3SQiu Chaofan   case FusionFeature::FK_LoadCmp1:
16859f4b3d3SQiu Chaofan   // { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 }
16959f4b3d3SQiu Chaofan   // { ld,ldx } - cmpli 0,1,rx,{ 0,1 }
17059f4b3d3SQiu Chaofan   case FusionFeature::FK_LoadCmp2: {
17159f4b3d3SQiu Chaofan     const MachineOperand &BT = SecondMI.getOperand(0);
172*79858d19SCraig Topper     if (!BT.isReg() || (!BT.getReg().isVirtual() && BT.getReg() != PPC::CR0))
17359f4b3d3SQiu Chaofan       return false;
17459f4b3d3SQiu Chaofan     if (SecondMI.getOpcode() == PPC::CMPDI &&
17559f4b3d3SQiu Chaofan         matchingImmOps(SecondMI, 2, -1, 16))
17659f4b3d3SQiu Chaofan       return true;
17759f4b3d3SQiu Chaofan     return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1);
17859f4b3d3SQiu Chaofan   }
17959f4b3d3SQiu Chaofan 
18059f4b3d3SQiu Chaofan   // { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 }
18159f4b3d3SQiu Chaofan   case FusionFeature::FK_LoadCmp3: {
18259f4b3d3SQiu Chaofan     const MachineOperand &BT = SecondMI.getOperand(0);
183*79858d19SCraig Topper     if (!BT.isReg() || (!BT.getReg().isVirtual() && BT.getReg() != PPC::CR0))
18459f4b3d3SQiu Chaofan       return false;
18559f4b3d3SQiu Chaofan     return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1) ||
18659f4b3d3SQiu Chaofan            matchingImmOps(SecondMI, 2, -1, 16);
18759f4b3d3SQiu Chaofan   }
18859f4b3d3SQiu Chaofan 
18959f4b3d3SQiu Chaofan   // mtctr - { bcctr,bcctrl }
19059f4b3d3SQiu Chaofan   case FusionFeature::FK_ZeroMoveCTR:
19159f4b3d3SQiu Chaofan     // ( mtctr rx ) is alias of ( mtspr 9, rx )
19259f4b3d3SQiu Chaofan     return (FirstMI.getOpcode() != PPC::MTSPR &&
19359f4b3d3SQiu Chaofan             FirstMI.getOpcode() != PPC::MTSPR8) ||
19459f4b3d3SQiu Chaofan            matchingImmOps(FirstMI, 0, 9);
19559f4b3d3SQiu Chaofan 
19659f4b3d3SQiu Chaofan   // mtlr - { bclr,bclrl }
19759f4b3d3SQiu Chaofan   case FusionFeature::FK_ZeroMoveLR:
19859f4b3d3SQiu Chaofan     // ( mtlr rx ) is alias of ( mtspr 8, rx )
19959f4b3d3SQiu Chaofan     return (FirstMI.getOpcode() != PPC::MTSPR &&
20059f4b3d3SQiu Chaofan             FirstMI.getOpcode() != PPC::MTSPR8) ||
20159f4b3d3SQiu Chaofan            matchingImmOps(FirstMI, 0, 8);
20259f4b3d3SQiu Chaofan 
20359f4b3d3SQiu Chaofan   // addis rx,ra,si - addi rt,rx,SI, SI >= 0
20459f4b3d3SQiu Chaofan   case FusionFeature::FK_AddisAddi: {
20559f4b3d3SQiu Chaofan     const MachineOperand &RA = FirstMI.getOperand(1);
20659f4b3d3SQiu Chaofan     const MachineOperand &SI = SecondMI.getOperand(2);
20759f4b3d3SQiu Chaofan     if (!SI.isImm() || !RA.isReg())
20859f4b3d3SQiu Chaofan       return false;
20959f4b3d3SQiu Chaofan     if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8)
21059f4b3d3SQiu Chaofan       return false;
21159f4b3d3SQiu Chaofan     return SignExtend64(SI.getImm(), 16) >= 0;
21259f4b3d3SQiu Chaofan   }
21359f4b3d3SQiu Chaofan 
21459f4b3d3SQiu Chaofan   // addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2
21559f4b3d3SQiu Chaofan   case FusionFeature::FK_AddiAddis: {
21659f4b3d3SQiu Chaofan     const MachineOperand &RA = FirstMI.getOperand(1);
21759f4b3d3SQiu Chaofan     const MachineOperand &SI = FirstMI.getOperand(2);
21859f4b3d3SQiu Chaofan     if (!SI.isImm() || !RA.isReg())
21959f4b3d3SQiu Chaofan       return false;
22059f4b3d3SQiu Chaofan     if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8)
22159f4b3d3SQiu Chaofan       return false;
22259f4b3d3SQiu Chaofan     int64_t ExtendedSI = SignExtend64(SI.getImm(), 16);
22359f4b3d3SQiu Chaofan     return ExtendedSI >= 2;
22459f4b3d3SQiu Chaofan   }
225518292dbSQingShan Zhang   }
226518292dbSQingShan Zhang 
227518292dbSQingShan Zhang   llvm_unreachable("All the cases should have been handled");
228518292dbSQingShan Zhang   return true;
229518292dbSQingShan Zhang }
230518292dbSQingShan Zhang 
231518292dbSQingShan Zhang /// Check if the instr pair, FirstMI and SecondMI, should be fused together.
232518292dbSQingShan Zhang /// Given SecondMI, when FirstMI is unspecified, then check if SecondMI may be
233518292dbSQingShan Zhang /// part of a fused pair at all.
shouldScheduleAdjacent(const TargetInstrInfo & TII,const TargetSubtargetInfo & TSI,const MachineInstr * FirstMI,const MachineInstr & SecondMI)234518292dbSQingShan Zhang static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
235518292dbSQingShan Zhang                                    const TargetSubtargetInfo &TSI,
236518292dbSQingShan Zhang                                    const MachineInstr *FirstMI,
237518292dbSQingShan Zhang                                    const MachineInstr &SecondMI) {
238518292dbSQingShan Zhang   // We use the PPC namespace to avoid the need to prefix opcodes with PPC:: in
239518292dbSQingShan Zhang   // the def file.
240518292dbSQingShan Zhang   using namespace PPC;
241518292dbSQingShan Zhang 
242518292dbSQingShan Zhang   const PPCSubtarget &ST = static_cast<const PPCSubtarget&>(TSI);
243518292dbSQingShan Zhang   static const FusionFeature FusionFeatures[] = {
244518292dbSQingShan Zhang   #define FUSION_FEATURE(KIND, HAS_FEATURE, DEP_OP_IDX, OPSET1, OPSET2) { \
245518292dbSQingShan Zhang     FusionFeature::FUSION_KIND(KIND), ST.HAS_FEATURE(), DEP_OP_IDX, { OPSET1 },\
246518292dbSQingShan Zhang     { OPSET2 } },
247518292dbSQingShan Zhang    #include "PPCMacroFusion.def"
248518292dbSQingShan Zhang   };
249518292dbSQingShan Zhang   #undef FUSION_KIND
250518292dbSQingShan Zhang 
251518292dbSQingShan Zhang   for (auto &Feature : FusionFeatures) {
252518292dbSQingShan Zhang     // Skip if the feature is not supported.
253518292dbSQingShan Zhang     if (!Feature.isSupported())
254518292dbSQingShan Zhang       continue;
255518292dbSQingShan Zhang 
256518292dbSQingShan Zhang     // Only when the SecondMI is fusable, we are starting to look for the
257518292dbSQingShan Zhang     // fusable FirstMI.
258518292dbSQingShan Zhang     if (Feature.hasOp2(SecondMI.getOpcode())) {
259518292dbSQingShan Zhang       // If FirstMI == nullptr, that means, we're only checking whether SecondMI
260518292dbSQingShan Zhang       // can be fused at all.
261518292dbSQingShan Zhang       if (!FirstMI)
262518292dbSQingShan Zhang         return true;
263518292dbSQingShan Zhang 
264518292dbSQingShan Zhang       // Checking if the FirstMI is fusable with the SecondMI.
265518292dbSQingShan Zhang       if (!Feature.hasOp1(FirstMI->getOpcode()))
266518292dbSQingShan Zhang         continue;
267518292dbSQingShan Zhang 
268518292dbSQingShan Zhang       auto DepOpIdx = Feature.depOpIdx();
269a7938c74SKazu Hirata       if (DepOpIdx) {
270518292dbSQingShan Zhang         // Checking if the result of the FirstMI is the desired operand of the
271518292dbSQingShan Zhang         // SecondMI if the DepOpIdx is set. Otherwise, ignore it.
272518292dbSQingShan Zhang         if (!matchingRegOps(*FirstMI, 0, SecondMI, *DepOpIdx))
273518292dbSQingShan Zhang           return false;
274518292dbSQingShan Zhang       }
275518292dbSQingShan Zhang 
276518292dbSQingShan Zhang       // Checking more on the instruction operands.
277518292dbSQingShan Zhang       if (checkOpConstraints(Feature.getKind(), *FirstMI, SecondMI))
278518292dbSQingShan Zhang         return true;
279518292dbSQingShan Zhang     }
280518292dbSQingShan Zhang   }
281518292dbSQingShan Zhang 
282518292dbSQingShan Zhang   return false;
283518292dbSQingShan Zhang }
284518292dbSQingShan Zhang 
285518292dbSQingShan Zhang } // end anonymous namespace
286518292dbSQingShan Zhang 
287518292dbSQingShan Zhang namespace llvm {
288518292dbSQingShan Zhang 
createPowerPCMacroFusionDAGMutation()289518292dbSQingShan Zhang std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation() {
290518292dbSQingShan Zhang   return createMacroFusionDAGMutation(shouldScheduleAdjacent);
291518292dbSQingShan Zhang }
292518292dbSQingShan Zhang 
293518292dbSQingShan Zhang } // end namespace llvm
294