xref: /llvm-project/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp (revision f71cb9dbb739bb58ce7e52e49fe384ff2ff11687)
1 //===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands atomic pseudo instructions into
10 // target instructions post RA. With such method, LL/SC loop is considered as
11 // a whole blob and make spilling unlikely happens in the LL/SC loop.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPC.h"
17 #include "PPCInstrInfo.h"
18 
19 #include "llvm/CodeGen/LivePhysRegs.h"
20 #include "llvm/CodeGen/MachineFunctionPass.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 
23 using namespace llvm;
24 
25 #define DEBUG_TYPE "ppc-atomic-expand"
26 
27 namespace {
28 
29 class PPCExpandAtomicPseudo : public MachineFunctionPass {
30 public:
31   const PPCInstrInfo *TII;
32   const PPCRegisterInfo *TRI;
33   static char ID;
34 
35   PPCExpandAtomicPseudo() : MachineFunctionPass(ID) {
36     initializePPCExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
37   }
38 
39   bool runOnMachineFunction(MachineFunction &MF) override;
40 
41 private:
42   bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
43                 MachineBasicBlock::iterator &NMBBI);
44   bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI,
45                           MachineBasicBlock::iterator &NMBBI);
46   bool expandAtomicCmpSwap128(MachineBasicBlock &MBB, MachineInstr &MI,
47                               MachineBasicBlock::iterator &NMBBI);
48 };
49 
50 static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB,
51                        MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
52                        Register Dest0, Register Dest1, Register Src0,
53                        Register Src1) {
54   const MCInstrDesc &OR = TII->get(PPC::OR8);
55   const MCInstrDesc &XOR = TII->get(PPC::XOR8);
56   if (Dest0 == Src1 && Dest1 == Src0) {
57     // The most tricky case, swapping values.
58     BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1);
59     BuildMI(MBB, MBBI, DL, XOR, Dest1).addReg(Dest0).addReg(Dest1);
60     BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1);
61   } else if (Dest0 != Src0 || Dest1 != Src1) {
62     if (Dest0 == Src1 || Dest1 != Src0) {
63       BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1);
64       BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0);
65     } else {
66       BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0);
67       BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1);
68     }
69   }
70 }
71 
72 bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
73   bool Changed = false;
74   TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
75   TRI = &TII->getRegisterInfo();
76   for (MachineBasicBlock &MBB : MF) {
77     for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end();
78          MBBI != MBBE;) {
79       MachineInstr &MI = *MBBI;
80       MachineBasicBlock::iterator NMBBI = std::next(MBBI);
81       Changed |= expandMI(MBB, MI, NMBBI);
82       MBBI = NMBBI;
83     }
84   }
85   if (Changed)
86     MF.RenumberBlocks();
87   return Changed;
88 }
89 
90 bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
91                                      MachineBasicBlock::iterator &NMBBI) {
92   switch (MI.getOpcode()) {
93   case PPC::ATOMIC_SWAP_I128:
94   case PPC::ATOMIC_LOAD_ADD_I128:
95   case PPC::ATOMIC_LOAD_SUB_I128:
96   case PPC::ATOMIC_LOAD_XOR_I128:
97   case PPC::ATOMIC_LOAD_NAND_I128:
98   case PPC::ATOMIC_LOAD_AND_I128:
99   case PPC::ATOMIC_LOAD_OR_I128:
100     return expandAtomicRMW128(MBB, MI, NMBBI);
101   case PPC::ATOMIC_CMP_SWAP_I128:
102     return expandAtomicCmpSwap128(MBB, MI, NMBBI);
103   case PPC::BUILD_QUADWORD: {
104     Register Dst = MI.getOperand(0).getReg();
105     Register DstHi = TRI->getSubReg(Dst, PPC::sub_gp8_x0);
106     Register DstLo = TRI->getSubReg(Dst, PPC::sub_gp8_x1);
107     Register Lo = MI.getOperand(1).getReg();
108     Register Hi = MI.getOperand(2).getReg();
109     PairedCopy(TII, MBB, MI, MI.getDebugLoc(), DstHi, DstLo, Hi, Lo);
110     MI.eraseFromParent();
111     return true;
112   }
113   default:
114     return false;
115   }
116 }
117 
118 bool PPCExpandAtomicPseudo::expandAtomicRMW128(
119     MachineBasicBlock &MBB, MachineInstr &MI,
120     MachineBasicBlock::iterator &NMBBI) {
121   const MCInstrDesc &LL = TII->get(PPC::LQARX);
122   const MCInstrDesc &SC = TII->get(PPC::STQCX);
123   DebugLoc DL = MI.getDebugLoc();
124   MachineFunction *MF = MBB.getParent();
125   const BasicBlock *BB = MBB.getBasicBlock();
126   // Create layout of control flow.
127   MachineFunction::iterator MFI = ++MBB.getIterator();
128   MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB);
129   MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
130   MF->insert(MFI, LoopMBB);
131   MF->insert(MFI, ExitMBB);
132   ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()),
133                   MBB.end());
134   ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
135   MBB.addSuccessor(LoopMBB);
136 
137   // For non-min/max operations, control flow is kinda like:
138   // MBB:
139   //   ...
140   // LoopMBB:
141   //   lqarx in, ptr
142   //   addc out.sub_x1, in.sub_x1, op.sub_x1
143   //   adde out.sub_x0, in.sub_x0, op.sub_x0
144   //   stqcx out, ptr
145   //   bne- LoopMBB
146   // ExitMBB:
147   //   ...
148   Register Old = MI.getOperand(0).getReg();
149   Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0);
150   Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1);
151   Register Scratch = MI.getOperand(1).getReg();
152   Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0);
153   Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1);
154   Register RA = MI.getOperand(2).getReg();
155   Register RB = MI.getOperand(3).getReg();
156   Register IncrLo = MI.getOperand(4).getReg();
157   Register IncrHi = MI.getOperand(5).getReg();
158   unsigned RMWOpcode = MI.getOpcode();
159 
160   MachineBasicBlock *CurrentMBB = LoopMBB;
161   BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB);
162 
163   switch (RMWOpcode) {
164   case PPC::ATOMIC_SWAP_I128:
165     PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo,
166                IncrHi, IncrLo);
167     break;
168   case PPC::ATOMIC_LOAD_ADD_I128:
169     BuildMI(CurrentMBB, DL, TII->get(PPC::ADDC8), ScratchLo)
170         .addReg(IncrLo)
171         .addReg(OldLo);
172     BuildMI(CurrentMBB, DL, TII->get(PPC::ADDE8), ScratchHi)
173         .addReg(IncrHi)
174         .addReg(OldHi);
175     break;
176   case PPC::ATOMIC_LOAD_SUB_I128:
177     BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFC8), ScratchLo)
178         .addReg(IncrLo)
179         .addReg(OldLo);
180     BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFE8), ScratchHi)
181         .addReg(IncrHi)
182         .addReg(OldHi);
183     break;
184 
185 #define TRIVIAL_ATOMICRMW(Opcode, Instr)                                       \
186   case Opcode:                                                                 \
187     BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo)                      \
188         .addReg(IncrLo)                                                        \
189         .addReg(OldLo);                                                        \
190     BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi)                      \
191         .addReg(IncrHi)                                                        \
192         .addReg(OldHi);                                                        \
193     break
194 
195     TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8);
196     TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8);
197     TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8);
198     TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8);
199 #undef TRIVIAL_ATOMICRMW
200   default:
201     llvm_unreachable("Unhandled atomic RMW operation");
202   }
203   BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB);
204   BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
205       .addImm(PPC::PRED_NE)
206       .addReg(PPC::CR0)
207       .addMBB(LoopMBB);
208   CurrentMBB->addSuccessor(LoopMBB);
209   CurrentMBB->addSuccessor(ExitMBB);
210   fullyRecomputeLiveIns({ExitMBB, LoopMBB});
211   NMBBI = MBB.end();
212   MI.eraseFromParent();
213   return true;
214 }
215 
216 bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
217     MachineBasicBlock &MBB, MachineInstr &MI,
218     MachineBasicBlock::iterator &NMBBI) {
219   const MCInstrDesc &LL = TII->get(PPC::LQARX);
220   const MCInstrDesc &SC = TII->get(PPC::STQCX);
221   DebugLoc DL = MI.getDebugLoc();
222   MachineFunction *MF = MBB.getParent();
223   const BasicBlock *BB = MBB.getBasicBlock();
224   Register Old = MI.getOperand(0).getReg();
225   Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0);
226   Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1);
227   Register Scratch = MI.getOperand(1).getReg();
228   Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0);
229   Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1);
230   Register RA = MI.getOperand(2).getReg();
231   Register RB = MI.getOperand(3).getReg();
232   Register CmpLo = MI.getOperand(4).getReg();
233   Register CmpHi = MI.getOperand(5).getReg();
234   Register NewLo = MI.getOperand(6).getReg();
235   Register NewHi = MI.getOperand(7).getReg();
236   // Create layout of control flow.
237   // loop:
238   //   old = lqarx ptr
239   //   <compare old, cmp>
240   //   bne 0, exit
241   // succ:
242   //   stqcx new ptr
243   //   bne 0, loop
244   // exit:
245   //   ....
246   MachineFunction::iterator MFI = ++MBB.getIterator();
247   MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB);
248   MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB);
249   MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
250   MF->insert(MFI, LoopCmpMBB);
251   MF->insert(MFI, CmpSuccMBB);
252   MF->insert(MFI, ExitMBB);
253   ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()),
254                   MBB.end());
255   ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
256   MBB.addSuccessor(LoopCmpMBB);
257   // Build loop.
258   MachineBasicBlock *CurrentMBB = LoopCmpMBB;
259   BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB);
260   BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchLo)
261       .addReg(OldLo)
262       .addReg(CmpLo);
263   BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchHi)
264       .addReg(OldHi)
265       .addReg(CmpHi);
266   BuildMI(CurrentMBB, DL, TII->get(PPC::OR8_rec), ScratchLo)
267       .addReg(ScratchLo)
268       .addReg(ScratchHi);
269   BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
270       .addImm(PPC::PRED_NE)
271       .addReg(PPC::CR0)
272       .addMBB(ExitMBB);
273   CurrentMBB->addSuccessor(CmpSuccMBB);
274   CurrentMBB->addSuccessor(ExitMBB);
275   // Build succ.
276   CurrentMBB = CmpSuccMBB;
277   PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo,
278              NewHi, NewLo);
279   BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB);
280   BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
281       .addImm(PPC::PRED_NE)
282       .addReg(PPC::CR0)
283       .addMBB(LoopCmpMBB);
284   CurrentMBB->addSuccessor(LoopCmpMBB);
285   CurrentMBB->addSuccessor(ExitMBB);
286 
287   fullyRecomputeLiveIns({ExitMBB, CmpSuccMBB, LoopCmpMBB});
288   NMBBI = MBB.end();
289   MI.eraseFromParent();
290   return true;
291 }
292 
293 } // namespace
294 
295 INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic",
296                 false, false)
297 
298 char PPCExpandAtomicPseudo::ID = 0;
299 FunctionPass *llvm::createPPCExpandAtomicPseudoPass() {
300   return new PPCExpandAtomicPseudo();
301 }
302