1b9c3941cSKai Luo //===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===// 2b9c3941cSKai Luo // 3b9c3941cSKai Luo // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4b9c3941cSKai Luo // See https://llvm.org/LICENSE.txt for license information. 5b9c3941cSKai Luo // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6b9c3941cSKai Luo // 7b9c3941cSKai Luo //===----------------------------------------------------------------------===// 8b9c3941cSKai Luo // 9b9c3941cSKai Luo // This file contains a pass that expands atomic pseudo instructions into 10b9c3941cSKai Luo // target instructions post RA. With such method, LL/SC loop is considered as 11b9c3941cSKai Luo // a whole blob and make spilling unlikely happens in the LL/SC loop. 12b9c3941cSKai Luo // 13b9c3941cSKai Luo //===----------------------------------------------------------------------===// 14b9c3941cSKai Luo 15b9c3941cSKai Luo #include "MCTargetDesc/PPCPredicates.h" 16b9c3941cSKai Luo #include "PPC.h" 17b9c3941cSKai Luo #include "PPCInstrInfo.h" 18b9c3941cSKai Luo 19b9c3941cSKai Luo #include "llvm/CodeGen/LivePhysRegs.h" 20b9c3941cSKai Luo #include "llvm/CodeGen/MachineFunctionPass.h" 21b9c3941cSKai Luo #include "llvm/CodeGen/MachineInstrBuilder.h" 22b9c3941cSKai Luo 23b9c3941cSKai Luo using namespace llvm; 24b9c3941cSKai Luo 25b9c3941cSKai Luo #define DEBUG_TYPE "ppc-atomic-expand" 26b9c3941cSKai Luo 27b9c3941cSKai Luo namespace { 28b9c3941cSKai Luo 29b9c3941cSKai Luo class PPCExpandAtomicPseudo : public MachineFunctionPass { 30b9c3941cSKai Luo public: 31b9c3941cSKai Luo const PPCInstrInfo *TII; 32b9c3941cSKai Luo const PPCRegisterInfo *TRI; 33b9c3941cSKai Luo static char ID; 34b9c3941cSKai Luo 35b9c3941cSKai Luo PPCExpandAtomicPseudo() : MachineFunctionPass(ID) { 36b9c3941cSKai Luo initializePPCExpandAtomicPseudoPass(*PassRegistry::getPassRegistry()); 37b9c3941cSKai Luo } 38b9c3941cSKai Luo 39b9c3941cSKai Luo bool runOnMachineFunction(MachineFunction &MF) override; 40b9c3941cSKai Luo 41b9c3941cSKai Luo private: 42b9c3941cSKai Luo bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI, 43b9c3941cSKai Luo MachineBasicBlock::iterator &NMBBI); 44b9c3941cSKai Luo bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI, 45b9c3941cSKai Luo MachineBasicBlock::iterator &NMBBI); 46b9c3941cSKai Luo bool expandAtomicCmpSwap128(MachineBasicBlock &MBB, MachineInstr &MI, 47b9c3941cSKai Luo MachineBasicBlock::iterator &NMBBI); 48b9c3941cSKai Luo }; 49b9c3941cSKai Luo 50b9c3941cSKai Luo static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB, 51b9c3941cSKai Luo MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 52b9c3941cSKai Luo Register Dest0, Register Dest1, Register Src0, 53b9c3941cSKai Luo Register Src1) { 54b9c3941cSKai Luo const MCInstrDesc &OR = TII->get(PPC::OR8); 55b9c3941cSKai Luo const MCInstrDesc &XOR = TII->get(PPC::XOR8); 56b9c3941cSKai Luo if (Dest0 == Src1 && Dest1 == Src0) { 57b9c3941cSKai Luo // The most tricky case, swapping values. 58b9c3941cSKai Luo BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1); 59b9c3941cSKai Luo BuildMI(MBB, MBBI, DL, XOR, Dest1).addReg(Dest0).addReg(Dest1); 60b9c3941cSKai Luo BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1); 61b9c3941cSKai Luo } else if (Dest0 != Src0 || Dest1 != Src1) { 62b9c3941cSKai Luo if (Dest0 == Src1 || Dest1 != Src0) { 63b9c3941cSKai Luo BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1); 64b9c3941cSKai Luo BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0); 65b9c3941cSKai Luo } else { 66b9c3941cSKai Luo BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0); 67b9c3941cSKai Luo BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1); 68b9c3941cSKai Luo } 69b9c3941cSKai Luo } 70b9c3941cSKai Luo } 71b9c3941cSKai Luo 72b9c3941cSKai Luo bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { 73b9c3941cSKai Luo bool Changed = false; 74b9c3941cSKai Luo TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); 75b9c3941cSKai Luo TRI = &TII->getRegisterInfo(); 76d5b73a70SKazu Hirata for (MachineBasicBlock &MBB : MF) { 77b9c3941cSKai Luo for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end(); 78b9c3941cSKai Luo MBBI != MBBE;) { 79b9c3941cSKai Luo MachineInstr &MI = *MBBI; 80b9c3941cSKai Luo MachineBasicBlock::iterator NMBBI = std::next(MBBI); 81b9c3941cSKai Luo Changed |= expandMI(MBB, MI, NMBBI); 82b9c3941cSKai Luo MBBI = NMBBI; 83b9c3941cSKai Luo } 84b9c3941cSKai Luo } 85b9c3941cSKai Luo if (Changed) 86b9c3941cSKai Luo MF.RenumberBlocks(); 87b9c3941cSKai Luo return Changed; 88b9c3941cSKai Luo } 89b9c3941cSKai Luo 90b9c3941cSKai Luo bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI, 91b9c3941cSKai Luo MachineBasicBlock::iterator &NMBBI) { 92b9c3941cSKai Luo switch (MI.getOpcode()) { 93b9c3941cSKai Luo case PPC::ATOMIC_SWAP_I128: 94b9c3941cSKai Luo case PPC::ATOMIC_LOAD_ADD_I128: 95b9c3941cSKai Luo case PPC::ATOMIC_LOAD_SUB_I128: 96b9c3941cSKai Luo case PPC::ATOMIC_LOAD_XOR_I128: 97b9c3941cSKai Luo case PPC::ATOMIC_LOAD_NAND_I128: 98b9c3941cSKai Luo case PPC::ATOMIC_LOAD_AND_I128: 99b9c3941cSKai Luo case PPC::ATOMIC_LOAD_OR_I128: 100b9c3941cSKai Luo return expandAtomicRMW128(MBB, MI, NMBBI); 101b9c3941cSKai Luo case PPC::ATOMIC_CMP_SWAP_I128: 102b9c3941cSKai Luo return expandAtomicCmpSwap128(MBB, MI, NMBBI); 1035eaebd5dSKai Luo case PPC::BUILD_QUADWORD: { 1045eaebd5dSKai Luo Register Dst = MI.getOperand(0).getReg(); 1055eaebd5dSKai Luo Register DstHi = TRI->getSubReg(Dst, PPC::sub_gp8_x0); 1065eaebd5dSKai Luo Register DstLo = TRI->getSubReg(Dst, PPC::sub_gp8_x1); 1075eaebd5dSKai Luo Register Lo = MI.getOperand(1).getReg(); 1085eaebd5dSKai Luo Register Hi = MI.getOperand(2).getReg(); 1095eaebd5dSKai Luo PairedCopy(TII, MBB, MI, MI.getDebugLoc(), DstHi, DstLo, Hi, Lo); 1105eaebd5dSKai Luo MI.eraseFromParent(); 1115eaebd5dSKai Luo return true; 1125eaebd5dSKai Luo } 113b9c3941cSKai Luo default: 114b9c3941cSKai Luo return false; 115b9c3941cSKai Luo } 116b9c3941cSKai Luo } 117b9c3941cSKai Luo 118b9c3941cSKai Luo bool PPCExpandAtomicPseudo::expandAtomicRMW128( 119b9c3941cSKai Luo MachineBasicBlock &MBB, MachineInstr &MI, 120b9c3941cSKai Luo MachineBasicBlock::iterator &NMBBI) { 121b9c3941cSKai Luo const MCInstrDesc &LL = TII->get(PPC::LQARX); 122b9c3941cSKai Luo const MCInstrDesc &SC = TII->get(PPC::STQCX); 123b9c3941cSKai Luo DebugLoc DL = MI.getDebugLoc(); 124b9c3941cSKai Luo MachineFunction *MF = MBB.getParent(); 125b9c3941cSKai Luo const BasicBlock *BB = MBB.getBasicBlock(); 126b9c3941cSKai Luo // Create layout of control flow. 127b9c3941cSKai Luo MachineFunction::iterator MFI = ++MBB.getIterator(); 128b9c3941cSKai Luo MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB); 129b9c3941cSKai Luo MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); 130b9c3941cSKai Luo MF->insert(MFI, LoopMBB); 131b9c3941cSKai Luo MF->insert(MFI, ExitMBB); 132b9c3941cSKai Luo ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()), 133b9c3941cSKai Luo MBB.end()); 134b9c3941cSKai Luo ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 135b9c3941cSKai Luo MBB.addSuccessor(LoopMBB); 136b9c3941cSKai Luo 137b9c3941cSKai Luo // For non-min/max operations, control flow is kinda like: 138b9c3941cSKai Luo // MBB: 139b9c3941cSKai Luo // ... 140b9c3941cSKai Luo // LoopMBB: 141b9c3941cSKai Luo // lqarx in, ptr 142b9c3941cSKai Luo // addc out.sub_x1, in.sub_x1, op.sub_x1 143b9c3941cSKai Luo // adde out.sub_x0, in.sub_x0, op.sub_x0 144b9c3941cSKai Luo // stqcx out, ptr 145b9c3941cSKai Luo // bne- LoopMBB 146b9c3941cSKai Luo // ExitMBB: 147b9c3941cSKai Luo // ... 148b9c3941cSKai Luo Register Old = MI.getOperand(0).getReg(); 149b9c3941cSKai Luo Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0); 150b9c3941cSKai Luo Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1); 151b9c3941cSKai Luo Register Scratch = MI.getOperand(1).getReg(); 152b9c3941cSKai Luo Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0); 153b9c3941cSKai Luo Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1); 154b9c3941cSKai Luo Register RA = MI.getOperand(2).getReg(); 155b9c3941cSKai Luo Register RB = MI.getOperand(3).getReg(); 156b9c3941cSKai Luo Register IncrLo = MI.getOperand(4).getReg(); 157b9c3941cSKai Luo Register IncrHi = MI.getOperand(5).getReg(); 158b9c3941cSKai Luo unsigned RMWOpcode = MI.getOpcode(); 159b9c3941cSKai Luo 160b9c3941cSKai Luo MachineBasicBlock *CurrentMBB = LoopMBB; 161b9c3941cSKai Luo BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB); 162b9c3941cSKai Luo 163b9c3941cSKai Luo switch (RMWOpcode) { 164b9c3941cSKai Luo case PPC::ATOMIC_SWAP_I128: 165b9c3941cSKai Luo PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo, 166b9c3941cSKai Luo IncrHi, IncrLo); 167b9c3941cSKai Luo break; 168b9c3941cSKai Luo case PPC::ATOMIC_LOAD_ADD_I128: 169b9c3941cSKai Luo BuildMI(CurrentMBB, DL, TII->get(PPC::ADDC8), ScratchLo) 170b9c3941cSKai Luo .addReg(IncrLo) 171b9c3941cSKai Luo .addReg(OldLo); 172b9c3941cSKai Luo BuildMI(CurrentMBB, DL, TII->get(PPC::ADDE8), ScratchHi) 173b9c3941cSKai Luo .addReg(IncrHi) 174b9c3941cSKai Luo .addReg(OldHi); 175b9c3941cSKai Luo break; 176b9c3941cSKai Luo case PPC::ATOMIC_LOAD_SUB_I128: 177b9c3941cSKai Luo BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFC8), ScratchLo) 178b9c3941cSKai Luo .addReg(IncrLo) 179b9c3941cSKai Luo .addReg(OldLo); 180b9c3941cSKai Luo BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFE8), ScratchHi) 181b9c3941cSKai Luo .addReg(IncrHi) 182b9c3941cSKai Luo .addReg(OldHi); 183b9c3941cSKai Luo break; 184b9c3941cSKai Luo 185b9c3941cSKai Luo #define TRIVIAL_ATOMICRMW(Opcode, Instr) \ 186b9c3941cSKai Luo case Opcode: \ 187b9c3941cSKai Luo BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo) \ 188b9c3941cSKai Luo .addReg(IncrLo) \ 189b9c3941cSKai Luo .addReg(OldLo); \ 190b9c3941cSKai Luo BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi) \ 191b9c3941cSKai Luo .addReg(IncrHi) \ 192b9c3941cSKai Luo .addReg(OldHi); \ 193b9c3941cSKai Luo break 194b9c3941cSKai Luo 195b9c3941cSKai Luo TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8); 196b9c3941cSKai Luo TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8); 197b9c3941cSKai Luo TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8); 198b9c3941cSKai Luo TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8); 199b9c3941cSKai Luo #undef TRIVIAL_ATOMICRMW 200b9c3941cSKai Luo default: 201b9c3941cSKai Luo llvm_unreachable("Unhandled atomic RMW operation"); 202b9c3941cSKai Luo } 203b9c3941cSKai Luo BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB); 204b9c3941cSKai Luo BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) 205b9c3941cSKai Luo .addImm(PPC::PRED_NE) 206b9c3941cSKai Luo .addReg(PPC::CR0) 207b9c3941cSKai Luo .addMBB(LoopMBB); 208b9c3941cSKai Luo CurrentMBB->addSuccessor(LoopMBB); 209b9c3941cSKai Luo CurrentMBB->addSuccessor(ExitMBB); 210*21d17709SKai Nacke fullyRecomputeLiveIns({ExitMBB, LoopMBB}); 211b9c3941cSKai Luo NMBBI = MBB.end(); 212b9c3941cSKai Luo MI.eraseFromParent(); 213b9c3941cSKai Luo return true; 214b9c3941cSKai Luo } 215b9c3941cSKai Luo 216b9c3941cSKai Luo bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128( 217b9c3941cSKai Luo MachineBasicBlock &MBB, MachineInstr &MI, 218b9c3941cSKai Luo MachineBasicBlock::iterator &NMBBI) { 219b9c3941cSKai Luo const MCInstrDesc &LL = TII->get(PPC::LQARX); 220b9c3941cSKai Luo const MCInstrDesc &SC = TII->get(PPC::STQCX); 221b9c3941cSKai Luo DebugLoc DL = MI.getDebugLoc(); 222b9c3941cSKai Luo MachineFunction *MF = MBB.getParent(); 223b9c3941cSKai Luo const BasicBlock *BB = MBB.getBasicBlock(); 224b9c3941cSKai Luo Register Old = MI.getOperand(0).getReg(); 225b9c3941cSKai Luo Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0); 226b9c3941cSKai Luo Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1); 227b9c3941cSKai Luo Register Scratch = MI.getOperand(1).getReg(); 228b9c3941cSKai Luo Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0); 229b9c3941cSKai Luo Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1); 230b9c3941cSKai Luo Register RA = MI.getOperand(2).getReg(); 231b9c3941cSKai Luo Register RB = MI.getOperand(3).getReg(); 232b9c3941cSKai Luo Register CmpLo = MI.getOperand(4).getReg(); 233b9c3941cSKai Luo Register CmpHi = MI.getOperand(5).getReg(); 234b9c3941cSKai Luo Register NewLo = MI.getOperand(6).getReg(); 235b9c3941cSKai Luo Register NewHi = MI.getOperand(7).getReg(); 236b9c3941cSKai Luo // Create layout of control flow. 237b9c3941cSKai Luo // loop: 238b9c3941cSKai Luo // old = lqarx ptr 239b9c3941cSKai Luo // <compare old, cmp> 24031046816SKai Luo // bne 0, exit 241b9c3941cSKai Luo // succ: 242b9c3941cSKai Luo // stqcx new ptr 243b9c3941cSKai Luo // bne 0, loop 244b9c3941cSKai Luo // exit: 245b9c3941cSKai Luo // .... 246b9c3941cSKai Luo MachineFunction::iterator MFI = ++MBB.getIterator(); 247b9c3941cSKai Luo MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB); 248b9c3941cSKai Luo MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB); 249b9c3941cSKai Luo MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); 250b9c3941cSKai Luo MF->insert(MFI, LoopCmpMBB); 251b9c3941cSKai Luo MF->insert(MFI, CmpSuccMBB); 252b9c3941cSKai Luo MF->insert(MFI, ExitMBB); 253b9c3941cSKai Luo ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()), 254b9c3941cSKai Luo MBB.end()); 255b9c3941cSKai Luo ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 256b9c3941cSKai Luo MBB.addSuccessor(LoopCmpMBB); 257b9c3941cSKai Luo // Build loop. 258b9c3941cSKai Luo MachineBasicBlock *CurrentMBB = LoopCmpMBB; 259b9c3941cSKai Luo BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB); 260b9c3941cSKai Luo BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchLo) 261b9c3941cSKai Luo .addReg(OldLo) 262b9c3941cSKai Luo .addReg(CmpLo); 263b9c3941cSKai Luo BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchHi) 264b9c3941cSKai Luo .addReg(OldHi) 265b9c3941cSKai Luo .addReg(CmpHi); 266b9c3941cSKai Luo BuildMI(CurrentMBB, DL, TII->get(PPC::OR8_rec), ScratchLo) 267b9c3941cSKai Luo .addReg(ScratchLo) 268b9c3941cSKai Luo .addReg(ScratchHi); 269b9c3941cSKai Luo BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) 270b9c3941cSKai Luo .addImm(PPC::PRED_NE) 271b9c3941cSKai Luo .addReg(PPC::CR0) 27231046816SKai Luo .addMBB(ExitMBB); 273b9c3941cSKai Luo CurrentMBB->addSuccessor(CmpSuccMBB); 27431046816SKai Luo CurrentMBB->addSuccessor(ExitMBB); 275b9c3941cSKai Luo // Build succ. 276b9c3941cSKai Luo CurrentMBB = CmpSuccMBB; 277b9c3941cSKai Luo PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo, 278b9c3941cSKai Luo NewHi, NewLo); 279b9c3941cSKai Luo BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB); 280b9c3941cSKai Luo BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) 281b9c3941cSKai Luo .addImm(PPC::PRED_NE) 282b9c3941cSKai Luo .addReg(PPC::CR0) 283b9c3941cSKai Luo .addMBB(LoopCmpMBB); 284b9c3941cSKai Luo CurrentMBB->addSuccessor(LoopCmpMBB); 285b9c3941cSKai Luo CurrentMBB->addSuccessor(ExitMBB); 286b9c3941cSKai Luo 287*21d17709SKai Nacke fullyRecomputeLiveIns({ExitMBB, CmpSuccMBB, LoopCmpMBB}); 288b9c3941cSKai Luo NMBBI = MBB.end(); 289b9c3941cSKai Luo MI.eraseFromParent(); 290b9c3941cSKai Luo return true; 291b9c3941cSKai Luo } 292b9c3941cSKai Luo 293b9c3941cSKai Luo } // namespace 294b9c3941cSKai Luo 295b9c3941cSKai Luo INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic", 296b9c3941cSKai Luo false, false) 297b9c3941cSKai Luo 298b9c3941cSKai Luo char PPCExpandAtomicPseudo::ID = 0; 299b9c3941cSKai Luo FunctionPass *llvm::createPPCExpandAtomicPseudoPass() { 300b9c3941cSKai Luo return new PPCExpandAtomicPseudo(); 301b9c3941cSKai Luo } 302