xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp (revision 68d75eff68281c1b445e3010bb975eae07aac225)
1 //===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ARM.h"
10 #include "ARMMachineFunctionInfo.h"
11 #include "ARMSubtarget.h"
12 #include "MCTargetDesc/ARMBaseInfo.h"
13 #include "Thumb2InstrInfo.h"
14 #include "llvm/ADT/SmallSet.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/CodeGen/MachineBasicBlock.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineFunctionPass.h"
21 #include "llvm/CodeGen/MachineInstr.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineInstrBundle.h"
24 #include "llvm/CodeGen/MachineOperand.h"
25 #include "llvm/IR/DebugLoc.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/Support/Debug.h"
29 #include <cassert>
30 #include <new>
31 
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "arm-mve-vpt"
35 
36 namespace {
37   class MVEVPTBlock : public MachineFunctionPass {
38   public:
39     static char ID;
40     const Thumb2InstrInfo *TII;
41     const TargetRegisterInfo *TRI;
42 
43     MVEVPTBlock() : MachineFunctionPass(ID) {}
44 
45     bool runOnMachineFunction(MachineFunction &Fn) override;
46 
47     MachineFunctionProperties getRequiredProperties() const override {
48       return MachineFunctionProperties().set(
49           MachineFunctionProperties::Property::NoVRegs);
50     }
51 
52     StringRef getPassName() const override {
53       return "MVE VPT block insertion pass";
54     }
55 
56   private:
57     bool InsertVPTBlocks(MachineBasicBlock &MBB);
58   };
59 
60   char MVEVPTBlock::ID = 0;
61 
62 } // end anonymous namespace
63 
64 INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false)
65 
66 enum VPTMaskValue {
67   T     =  8, // 0b1000
68   TT    =  4, // 0b0100
69   TE    = 12, // 0b1100
70   TTT   =  2, // 0b0010
71   TTE   =  6, // 0b0110
72   TEE   = 10, // 0b1010
73   TET   = 14, // 0b1110
74   TTTT  =  1, // 0b0001
75   TTTE  =  3, // 0b0011
76   TTEE  =  5, // 0b0101
77   TTET  =  7, // 0b0111
78   TEEE  =  9, // 0b1001
79   TEET  = 11, // 0b1011
80   TETT  = 13, // 0b1101
81   TETE  = 15  // 0b1111
82 };
83 
84 static unsigned VCMPOpcodeToVPT(unsigned Opcode) {
85   switch (Opcode) {
86   case ARM::MVE_VCMPf32:
87     return ARM::MVE_VPTv4f32;
88   case ARM::MVE_VCMPf16:
89     return ARM::MVE_VPTv8f16;
90   case ARM::MVE_VCMPi8:
91     return ARM::MVE_VPTv16i8;
92   case ARM::MVE_VCMPi16:
93     return ARM::MVE_VPTv8i16;
94   case ARM::MVE_VCMPi32:
95     return ARM::MVE_VPTv4i32;
96   case ARM::MVE_VCMPu8:
97     return ARM::MVE_VPTv16u8;
98   case ARM::MVE_VCMPu16:
99     return ARM::MVE_VPTv8u16;
100   case ARM::MVE_VCMPu32:
101     return ARM::MVE_VPTv4u32;
102   case ARM::MVE_VCMPs8:
103     return ARM::MVE_VPTv16s8;
104   case ARM::MVE_VCMPs16:
105     return ARM::MVE_VPTv8s16;
106   case ARM::MVE_VCMPs32:
107     return ARM::MVE_VPTv4s32;
108 
109   case ARM::MVE_VCMPf32r:
110     return ARM::MVE_VPTv4f32r;
111   case ARM::MVE_VCMPf16r:
112     return ARM::MVE_VPTv8f16r;
113   case ARM::MVE_VCMPi8r:
114     return ARM::MVE_VPTv16i8r;
115   case ARM::MVE_VCMPi16r:
116     return ARM::MVE_VPTv8i16r;
117   case ARM::MVE_VCMPi32r:
118     return ARM::MVE_VPTv4i32r;
119   case ARM::MVE_VCMPu8r:
120     return ARM::MVE_VPTv16u8r;
121   case ARM::MVE_VCMPu16r:
122     return ARM::MVE_VPTv8u16r;
123   case ARM::MVE_VCMPu32r:
124     return ARM::MVE_VPTv4u32r;
125   case ARM::MVE_VCMPs8r:
126     return ARM::MVE_VPTv16s8r;
127   case ARM::MVE_VCMPs16r:
128     return ARM::MVE_VPTv8s16r;
129   case ARM::MVE_VCMPs32r:
130     return ARM::MVE_VPTv4s32r;
131 
132   default:
133     return 0;
134   }
135 }
136 
137 static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI,
138                                             const TargetRegisterInfo *TRI,
139                                             unsigned &NewOpcode) {
140   // Search backwards to the instruction that defines VPR. This may or not
141   // be a VCMP, we check that after this loop. If we find another instruction
142   // that reads cpsr, we return nullptr.
143   MachineBasicBlock::iterator CmpMI = MI;
144   while (CmpMI != MI->getParent()->begin()) {
145     --CmpMI;
146     if (CmpMI->modifiesRegister(ARM::VPR, TRI))
147       break;
148     if (CmpMI->readsRegister(ARM::VPR, TRI))
149       break;
150   }
151 
152   if (CmpMI == MI)
153     return nullptr;
154   NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode());
155   if (NewOpcode == 0)
156     return nullptr;
157 
158   // Search forward from CmpMI to MI, checking if either register was def'd
159   if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI),
160                              MI, TRI))
161     return nullptr;
162   if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI),
163                              MI, TRI))
164     return nullptr;
165   return &*CmpMI;
166 }
167 
168 bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
169   bool Modified = false;
170   MachineBasicBlock::instr_iterator MBIter = Block.instr_begin();
171   MachineBasicBlock::instr_iterator EndIter = Block.instr_end();
172 
173   while (MBIter != EndIter) {
174     MachineInstr *MI = &*MBIter;
175     unsigned PredReg = 0;
176     DebugLoc dl = MI->getDebugLoc();
177 
178     ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg);
179 
180     // The idea of the predicate is that None, Then and Else are for use when
181     // handling assembly language: they correspond to the three possible
182     // suffixes "", "t" and "e" on the mnemonic. So when instructions are read
183     // from assembly source or disassembled from object code, you expect to see
184     // a mixture whenever there's a long VPT block. But in code generation, we
185     // hope we'll never generate an Else as input to this pass.
186     assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds");
187 
188     if (Pred == ARMVCC::None) {
189       ++MBIter;
190       continue;
191     }
192 
193     LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump());
194     int VPTInstCnt = 1;
195     ARMVCC::VPTCodes NextPred;
196 
197     // Look at subsequent instructions, checking if they can be in the same VPT
198     // block.
199     ++MBIter;
200     while (MBIter != EndIter && VPTInstCnt < 4) {
201       NextPred = getVPTInstrPredicate(*MBIter, PredReg);
202       assert(NextPred != ARMVCC::Else &&
203              "VPT block pass does not expect Else preds");
204       if (NextPred != Pred)
205         break;
206       LLVM_DEBUG(dbgs() << "  adding : "; MBIter->dump());
207       ++VPTInstCnt;
208       ++MBIter;
209     };
210 
211     unsigned BlockMask = 0;
212     switch (VPTInstCnt) {
213     case 1:
214       BlockMask = VPTMaskValue::T;
215       break;
216     case 2:
217       BlockMask = VPTMaskValue::TT;
218       break;
219     case 3:
220       BlockMask = VPTMaskValue::TTT;
221       break;
222     case 4:
223       BlockMask = VPTMaskValue::TTTT;
224       break;
225     default:
226       llvm_unreachable("Unexpected number of instruction in a VPT block");
227     };
228 
229     // Search back for a VCMP that can be folded to create a VPT, or else create
230     // a VPST directly
231     MachineInstrBuilder MIBuilder;
232     unsigned NewOpcode;
233     MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode);
234     if (VCMP) {
235       LLVM_DEBUG(dbgs() << "  folding VCMP into VPST: "; VCMP->dump());
236       MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode));
237       MIBuilder.addImm(BlockMask);
238       MIBuilder.add(VCMP->getOperand(1));
239       MIBuilder.add(VCMP->getOperand(2));
240       MIBuilder.add(VCMP->getOperand(3));
241       VCMP->eraseFromParent();
242     } else {
243       MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST));
244       MIBuilder.addImm(BlockMask);
245     }
246 
247     finalizeBundle(
248         Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter);
249 
250     Modified = true;
251   }
252   return Modified;
253 }
254 
255 bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) {
256   const ARMSubtarget &STI =
257       static_cast<const ARMSubtarget &>(Fn.getSubtarget());
258 
259   if (!STI.isThumb2() || !STI.hasMVEIntegerOps())
260     return false;
261 
262   TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
263   TRI = STI.getRegisterInfo();
264 
265   LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n"
266                     << "********** Function: " << Fn.getName() << '\n');
267 
268   bool Modified = false;
269   for (MachineBasicBlock &MBB : Fn)
270     Modified |= InsertVPTBlocks(MBB);
271 
272   LLVM_DEBUG(dbgs() << "**************************************\n");
273   return Modified;
274 }
275 
276 /// createMVEVPTBlock - Returns an instance of the MVE VPT block
277 /// insertion pass.
278 FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); }
279