1 //=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // before the legalizer.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64GlobalISelUtils.h"
15 #include "AArch64TargetMachine.h"
16 #include "llvm/CodeGen/GlobalISel/Combiner.h"
17 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
18 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
19 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
20 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
21 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/TargetPassConfig.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/Support/Debug.h"
29
30 #define DEBUG_TYPE "aarch64-prelegalizer-combiner"
31
32 using namespace llvm;
33 using namespace MIPatternMatch;
34
35 /// Return true if a G_FCONSTANT instruction is known to be better-represented
36 /// as a G_CONSTANT.
matchFConstantToConstant(MachineInstr & MI,MachineRegisterInfo & MRI)37 static bool matchFConstantToConstant(MachineInstr &MI,
38 MachineRegisterInfo &MRI) {
39 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
40 Register DstReg = MI.getOperand(0).getReg();
41 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
42 if (DstSize != 32 && DstSize != 64)
43 return false;
44
45 // When we're storing a value, it doesn't matter what register bank it's on.
46 // Since not all floating point constants can be materialized using a fmov,
47 // it makes more sense to just use a GPR.
48 return all_of(MRI.use_nodbg_instructions(DstReg),
49 [](const MachineInstr &Use) { return Use.mayStore(); });
50 }
51
52 /// Change a G_FCONSTANT into a G_CONSTANT.
applyFConstantToConstant(MachineInstr & MI)53 static void applyFConstantToConstant(MachineInstr &MI) {
54 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
55 MachineIRBuilder MIB(MI);
56 const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
57 MIB.buildConstant(MI.getOperand(0).getReg(), ImmValAPF.bitcastToAPInt());
58 MI.eraseFromParent();
59 }
60
61 /// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
62 /// are sign bits. In this case, we can transform the G_ICMP to directly compare
63 /// the wide value with a zero.
matchICmpRedundantTrunc(MachineInstr & MI,MachineRegisterInfo & MRI,GISelKnownBits * KB,Register & MatchInfo)64 static bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
65 GISelKnownBits *KB, Register &MatchInfo) {
66 assert(MI.getOpcode() == TargetOpcode::G_ICMP && KB);
67
68 auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
69 if (!ICmpInst::isEquality(Pred))
70 return false;
71
72 Register LHS = MI.getOperand(2).getReg();
73 LLT LHSTy = MRI.getType(LHS);
74 if (!LHSTy.isScalar())
75 return false;
76
77 Register RHS = MI.getOperand(3).getReg();
78 Register WideReg;
79
80 if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg))) ||
81 !mi_match(RHS, MRI, m_SpecificICst(0)))
82 return false;
83
84 LLT WideTy = MRI.getType(WideReg);
85 if (KB->computeNumSignBits(WideReg) <=
86 WideTy.getSizeInBits() - LHSTy.getSizeInBits())
87 return false;
88
89 MatchInfo = WideReg;
90 return true;
91 }
92
applyICmpRedundantTrunc(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & Builder,GISelChangeObserver & Observer,Register & WideReg)93 static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
94 MachineIRBuilder &Builder,
95 GISelChangeObserver &Observer,
96 Register &WideReg) {
97 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
98
99 LLT WideTy = MRI.getType(WideReg);
100 // We're going to directly use the wide register as the LHS, and then use an
101 // equivalent size zero for RHS.
102 Builder.setInstrAndDebugLoc(MI);
103 auto WideZero = Builder.buildConstant(WideTy, 0);
104 Observer.changingInstr(MI);
105 MI.getOperand(2).setReg(WideReg);
106 MI.getOperand(3).setReg(WideZero.getReg(0));
107 Observer.changedInstr(MI);
108 return true;
109 }
110
111 /// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE.
112 ///
113 /// e.g.
114 ///
115 /// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst
matchFoldGlobalOffset(MachineInstr & MI,MachineRegisterInfo & MRI,std::pair<uint64_t,uint64_t> & MatchInfo)116 static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
117 std::pair<uint64_t, uint64_t> &MatchInfo) {
118 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
119 MachineFunction &MF = *MI.getMF();
120 auto &GlobalOp = MI.getOperand(1);
121 auto *GV = GlobalOp.getGlobal();
122 if (GV->isThreadLocal())
123 return false;
124
125 // Don't allow anything that could represent offsets etc.
126 if (MF.getSubtarget<AArch64Subtarget>().ClassifyGlobalReference(
127 GV, MF.getTarget()) != AArch64II::MO_NO_FLAG)
128 return false;
129
130 // Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants:
131 //
132 // %g = G_GLOBAL_VALUE @x
133 // %ptr1 = G_PTR_ADD %g, cst1
134 // %ptr2 = G_PTR_ADD %g, cst2
135 // ...
136 // %ptrN = G_PTR_ADD %g, cstN
137 //
138 // Identify the *smallest* constant. We want to be able to form this:
139 //
140 // %offset_g = G_GLOBAL_VALUE @x + min_cst
141 // %g = G_PTR_ADD %offset_g, -min_cst
142 // %ptr1 = G_PTR_ADD %g, cst1
143 // ...
144 Register Dst = MI.getOperand(0).getReg();
145 uint64_t MinOffset = -1ull;
146 for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) {
147 if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
148 return false;
149 auto Cst =
150 getConstantVRegValWithLookThrough(UseInstr.getOperand(2).getReg(), MRI);
151 if (!Cst)
152 return false;
153 MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
154 }
155
156 // Require that the new offset is larger than the existing one to avoid
157 // infinite loops.
158 uint64_t CurrOffset = GlobalOp.getOffset();
159 uint64_t NewOffset = MinOffset + CurrOffset;
160 if (NewOffset <= CurrOffset)
161 return false;
162
163 // Check whether folding this offset is legal. It must not go out of bounds of
164 // the referenced object to avoid violating the code model, and must be
165 // smaller than 2^21 because this is the largest offset expressible in all
166 // object formats.
167 //
168 // This check also prevents us from folding negative offsets, which will end
169 // up being treated in the same way as large positive ones. They could also
170 // cause code model violations, and aren't really common enough to matter.
171 if (NewOffset >= (1 << 21))
172 return false;
173
174 Type *T = GV->getValueType();
175 if (!T->isSized() ||
176 NewOffset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
177 return false;
178 MatchInfo = std::make_pair(NewOffset, MinOffset);
179 return true;
180 }
181
applyFoldGlobalOffset(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,GISelChangeObserver & Observer,std::pair<uint64_t,uint64_t> & MatchInfo)182 static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
183 MachineIRBuilder &B,
184 GISelChangeObserver &Observer,
185 std::pair<uint64_t, uint64_t> &MatchInfo) {
186 // Change:
187 //
188 // %g = G_GLOBAL_VALUE @x
189 // %ptr1 = G_PTR_ADD %g, cst1
190 // %ptr2 = G_PTR_ADD %g, cst2
191 // ...
192 // %ptrN = G_PTR_ADD %g, cstN
193 //
194 // To:
195 //
196 // %offset_g = G_GLOBAL_VALUE @x + min_cst
197 // %g = G_PTR_ADD %offset_g, -min_cst
198 // %ptr1 = G_PTR_ADD %g, cst1
199 // ...
200 // %ptrN = G_PTR_ADD %g, cstN
201 //
202 // Then, the original G_PTR_ADDs should be folded later on so that they look
203 // like this:
204 //
205 // %ptrN = G_PTR_ADD %offset_g, cstN - min_cst
206 uint64_t Offset, MinOffset;
207 std::tie(Offset, MinOffset) = MatchInfo;
208 B.setInstrAndDebugLoc(MI);
209 Observer.changingInstr(MI);
210 auto &GlobalOp = MI.getOperand(1);
211 auto *GV = GlobalOp.getGlobal();
212 GlobalOp.ChangeToGA(GV, Offset, GlobalOp.getTargetFlags());
213 Register Dst = MI.getOperand(0).getReg();
214 Register NewGVDst = MRI.cloneVirtualRegister(Dst);
215 MI.getOperand(0).setReg(NewGVDst);
216 Observer.changedInstr(MI);
217 B.buildPtrAdd(
218 Dst, NewGVDst,
219 B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
220 return true;
221 }
222
223 class AArch64PreLegalizerCombinerHelperState {
224 protected:
225 CombinerHelper &Helper;
226
227 public:
AArch64PreLegalizerCombinerHelperState(CombinerHelper & Helper)228 AArch64PreLegalizerCombinerHelperState(CombinerHelper &Helper)
229 : Helper(Helper) {}
230 };
231
232 #define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
233 #include "AArch64GenPreLegalizeGICombiner.inc"
234 #undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
235
236 namespace {
237 #define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
238 #include "AArch64GenPreLegalizeGICombiner.inc"
239 #undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
240
241 class AArch64PreLegalizerCombinerInfo : public CombinerInfo {
242 GISelKnownBits *KB;
243 MachineDominatorTree *MDT;
244 AArch64GenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
245
246 public:
AArch64PreLegalizerCombinerInfo(bool EnableOpt,bool OptSize,bool MinSize,GISelKnownBits * KB,MachineDominatorTree * MDT)247 AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
248 GISelKnownBits *KB, MachineDominatorTree *MDT)
249 : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
250 /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
251 KB(KB), MDT(MDT) {
252 if (!GeneratedRuleCfg.parseCommandLineOption())
253 report_fatal_error("Invalid rule identifier");
254 }
255
256 virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
257 MachineIRBuilder &B) const override;
258 };
259
combine(GISelChangeObserver & Observer,MachineInstr & MI,MachineIRBuilder & B) const260 bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
261 MachineInstr &MI,
262 MachineIRBuilder &B) const {
263 CombinerHelper Helper(Observer, B, KB, MDT);
264 AArch64GenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper);
265
266 if (Generated.tryCombineAll(Observer, MI, B))
267 return true;
268
269 unsigned Opc = MI.getOpcode();
270 switch (Opc) {
271 case TargetOpcode::G_CONCAT_VECTORS:
272 return Helper.tryCombineConcatVectors(MI);
273 case TargetOpcode::G_SHUFFLE_VECTOR:
274 return Helper.tryCombineShuffleVector(MI);
275 case TargetOpcode::G_MEMCPY:
276 case TargetOpcode::G_MEMMOVE:
277 case TargetOpcode::G_MEMSET: {
278 // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
279 // heuristics decide.
280 unsigned MaxLen = EnableOpt ? 0 : 32;
281 // Try to inline memcpy type calls if optimizations are enabled.
282 if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
283 return true;
284 if (Opc == TargetOpcode::G_MEMSET)
285 return llvm::AArch64GISelUtils::tryEmitBZero(MI, B, EnableMinSize);
286 return false;
287 }
288 }
289
290 return false;
291 }
292
293 #define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
294 #include "AArch64GenPreLegalizeGICombiner.inc"
295 #undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
296
297 // Pass boilerplate
298 // ================
299
300 class AArch64PreLegalizerCombiner : public MachineFunctionPass {
301 public:
302 static char ID;
303
304 AArch64PreLegalizerCombiner();
305
getPassName() const306 StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; }
307
308 bool runOnMachineFunction(MachineFunction &MF) override;
309
310 void getAnalysisUsage(AnalysisUsage &AU) const override;
311 };
312 } // end anonymous namespace
313
getAnalysisUsage(AnalysisUsage & AU) const314 void AArch64PreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
315 AU.addRequired<TargetPassConfig>();
316 AU.setPreservesCFG();
317 getSelectionDAGFallbackAnalysisUsage(AU);
318 AU.addRequired<GISelKnownBitsAnalysis>();
319 AU.addPreserved<GISelKnownBitsAnalysis>();
320 AU.addRequired<MachineDominatorTree>();
321 AU.addPreserved<MachineDominatorTree>();
322 AU.addRequired<GISelCSEAnalysisWrapperPass>();
323 AU.addPreserved<GISelCSEAnalysisWrapperPass>();
324 MachineFunctionPass::getAnalysisUsage(AU);
325 }
326
AArch64PreLegalizerCombiner()327 AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner()
328 : MachineFunctionPass(ID) {
329 initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
330 }
331
runOnMachineFunction(MachineFunction & MF)332 bool AArch64PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
333 if (MF.getProperties().hasProperty(
334 MachineFunctionProperties::Property::FailedISel))
335 return false;
336 auto &TPC = getAnalysis<TargetPassConfig>();
337
338 // Enable CSE.
339 GISelCSEAnalysisWrapper &Wrapper =
340 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
341 auto *CSEInfo = &Wrapper.get(TPC.getCSEConfig());
342
343 const Function &F = MF.getFunction();
344 bool EnableOpt =
345 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
346 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
347 MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
348 AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
349 F.hasMinSize(), KB, MDT);
350 Combiner C(PCInfo, &TPC);
351 return C.combineMachineInstrs(MF, CSEInfo);
352 }
353
354 char AArch64PreLegalizerCombiner::ID = 0;
355 INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombiner, DEBUG_TYPE,
356 "Combine AArch64 machine instrs before legalization",
357 false, false)
358 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
359 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
360 INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
361 INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
362 "Combine AArch64 machine instrs before legalization", false,
363 false)
364
365
366 namespace llvm {
createAArch64PreLegalizerCombiner()367 FunctionPass *createAArch64PreLegalizerCombiner() {
368 return new AArch64PreLegalizerCombiner();
369 }
370 } // end namespace llvm
371