1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/ADT/SetVector.h"
10 #include "llvm/ADT/SmallBitVector.h"
11 #include "llvm/CodeGen/GlobalISel/Combiner.h"
12 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
13 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
14 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
15 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
16 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
17 #include "llvm/CodeGen/GlobalISel/Utils.h"
18 #include "llvm/CodeGen/LowLevelType.h"
19 #include "llvm/CodeGen/MachineBasicBlock.h"
20 #include "llvm/CodeGen/MachineDominators.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineInstr.h"
23 #include "llvm/CodeGen/MachineMemOperand.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetLowering.h"
27 #include "llvm/CodeGen/TargetOpcodes.h"
28 #include "llvm/Support/MathExtras.h"
29 #include "llvm/Target/TargetMachine.h"
30
31 #define DEBUG_TYPE "gi-combiner"
32
33 using namespace llvm;
34 using namespace MIPatternMatch;
35
36 // Option to allow testing of the combiner while no targets know about indexed
37 // addressing.
38 static cl::opt<bool>
39 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
40 cl::desc("Force all indexed operations to be "
41 "legal for the GlobalISel combiner"));
42
CombinerHelper(GISelChangeObserver & Observer,MachineIRBuilder & B,GISelKnownBits * KB,MachineDominatorTree * MDT,const LegalizerInfo * LI)43 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
44 MachineIRBuilder &B, GISelKnownBits *KB,
45 MachineDominatorTree *MDT,
46 const LegalizerInfo *LI)
47 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
48 KB(KB), MDT(MDT), LI(LI) {
49 (void)this->KB;
50 }
51
getTargetLowering() const52 const TargetLowering &CombinerHelper::getTargetLowering() const {
53 return *Builder.getMF().getSubtarget().getTargetLowering();
54 }
55
56 /// \returns The little endian in-memory byte position of byte \p I in a
57 /// \p ByteWidth bytes wide type.
58 ///
59 /// E.g. Given a 4-byte type x, x[0] -> byte 0
littleEndianByteAt(const unsigned ByteWidth,const unsigned I)60 static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
61 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
62 return I;
63 }
64
65 /// \returns The big endian in-memory byte position of byte \p I in a
66 /// \p ByteWidth bytes wide type.
67 ///
68 /// E.g. Given a 4-byte type x, x[0] -> byte 3
bigEndianByteAt(const unsigned ByteWidth,const unsigned I)69 static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
70 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
71 return ByteWidth - I - 1;
72 }
73
74 /// Given a map from byte offsets in memory to indices in a load/store,
75 /// determine if that map corresponds to a little or big endian byte pattern.
76 ///
77 /// \param MemOffset2Idx maps memory offsets to address offsets.
78 /// \param LowestIdx is the lowest index in \p MemOffset2Idx.
79 ///
80 /// \returns true if the map corresponds to a big endian byte pattern, false
81 /// if it corresponds to a little endian byte pattern, and None otherwise.
82 ///
83 /// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
84 /// are as follows:
85 ///
86 /// AddrOffset Little endian Big endian
87 /// 0 0 3
88 /// 1 1 2
89 /// 2 2 1
90 /// 3 3 0
91 static Optional<bool>
isBigEndian(const SmallDenseMap<int64_t,int64_t,8> & MemOffset2Idx,int64_t LowestIdx)92 isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
93 int64_t LowestIdx) {
94 // Need at least two byte positions to decide on endianness.
95 unsigned Width = MemOffset2Idx.size();
96 if (Width < 2)
97 return None;
98 bool BigEndian = true, LittleEndian = true;
99 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
100 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
101 if (MemOffsetAndIdx == MemOffset2Idx.end())
102 return None;
103 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
104 assert(Idx >= 0 && "Expected non-negative byte offset?");
105 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
106 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
107 if (!BigEndian && !LittleEndian)
108 return None;
109 }
110
111 assert((BigEndian != LittleEndian) &&
112 "Pattern cannot be both big and little endian!");
113 return BigEndian;
114 }
115
isLegalOrBeforeLegalizer(const LegalityQuery & Query) const116 bool CombinerHelper::isLegalOrBeforeLegalizer(
117 const LegalityQuery &Query) const {
118 return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
119 }
120
replaceRegWith(MachineRegisterInfo & MRI,Register FromReg,Register ToReg) const121 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
122 Register ToReg) const {
123 Observer.changingAllUsesOfReg(MRI, FromReg);
124
125 if (MRI.constrainRegAttrs(ToReg, FromReg))
126 MRI.replaceRegWith(FromReg, ToReg);
127 else
128 Builder.buildCopy(ToReg, FromReg);
129
130 Observer.finishedChangingAllUsesOfReg();
131 }
132
replaceRegOpWith(MachineRegisterInfo & MRI,MachineOperand & FromRegOp,Register ToReg) const133 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
134 MachineOperand &FromRegOp,
135 Register ToReg) const {
136 assert(FromRegOp.getParent() && "Expected an operand in an MI");
137 Observer.changingInstr(*FromRegOp.getParent());
138
139 FromRegOp.setReg(ToReg);
140
141 Observer.changedInstr(*FromRegOp.getParent());
142 }
143
tryCombineCopy(MachineInstr & MI)144 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
145 if (matchCombineCopy(MI)) {
146 applyCombineCopy(MI);
147 return true;
148 }
149 return false;
150 }
matchCombineCopy(MachineInstr & MI)151 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
152 if (MI.getOpcode() != TargetOpcode::COPY)
153 return false;
154 Register DstReg = MI.getOperand(0).getReg();
155 Register SrcReg = MI.getOperand(1).getReg();
156 return canReplaceReg(DstReg, SrcReg, MRI);
157 }
applyCombineCopy(MachineInstr & MI)158 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
159 Register DstReg = MI.getOperand(0).getReg();
160 Register SrcReg = MI.getOperand(1).getReg();
161 MI.eraseFromParent();
162 replaceRegWith(MRI, DstReg, SrcReg);
163 }
164
tryCombineConcatVectors(MachineInstr & MI)165 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
166 bool IsUndef = false;
167 SmallVector<Register, 4> Ops;
168 if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
169 applyCombineConcatVectors(MI, IsUndef, Ops);
170 return true;
171 }
172 return false;
173 }
174
matchCombineConcatVectors(MachineInstr & MI,bool & IsUndef,SmallVectorImpl<Register> & Ops)175 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
176 SmallVectorImpl<Register> &Ops) {
177 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
178 "Invalid instruction");
179 IsUndef = true;
180 MachineInstr *Undef = nullptr;
181
182 // Walk over all the operands of concat vectors and check if they are
183 // build_vector themselves or undef.
184 // Then collect their operands in Ops.
185 for (const MachineOperand &MO : MI.uses()) {
186 Register Reg = MO.getReg();
187 MachineInstr *Def = MRI.getVRegDef(Reg);
188 assert(Def && "Operand not defined");
189 switch (Def->getOpcode()) {
190 case TargetOpcode::G_BUILD_VECTOR:
191 IsUndef = false;
192 // Remember the operands of the build_vector to fold
193 // them into the yet-to-build flattened concat vectors.
194 for (const MachineOperand &BuildVecMO : Def->uses())
195 Ops.push_back(BuildVecMO.getReg());
196 break;
197 case TargetOpcode::G_IMPLICIT_DEF: {
198 LLT OpType = MRI.getType(Reg);
199 // Keep one undef value for all the undef operands.
200 if (!Undef) {
201 Builder.setInsertPt(*MI.getParent(), MI);
202 Undef = Builder.buildUndef(OpType.getScalarType());
203 }
204 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
205 OpType.getScalarType() &&
206 "All undefs should have the same type");
207 // Break the undef vector in as many scalar elements as needed
208 // for the flattening.
209 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
210 EltIdx != EltEnd; ++EltIdx)
211 Ops.push_back(Undef->getOperand(0).getReg());
212 break;
213 }
214 default:
215 return false;
216 }
217 }
218 return true;
219 }
applyCombineConcatVectors(MachineInstr & MI,bool IsUndef,const ArrayRef<Register> Ops)220 void CombinerHelper::applyCombineConcatVectors(
221 MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
222 // We determined that the concat_vectors can be flatten.
223 // Generate the flattened build_vector.
224 Register DstReg = MI.getOperand(0).getReg();
225 Builder.setInsertPt(*MI.getParent(), MI);
226 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
227
228 // Note: IsUndef is sort of redundant. We could have determine it by
229 // checking that at all Ops are undef. Alternatively, we could have
230 // generate a build_vector of undefs and rely on another combine to
231 // clean that up. For now, given we already gather this information
232 // in tryCombineConcatVectors, just save compile time and issue the
233 // right thing.
234 if (IsUndef)
235 Builder.buildUndef(NewDstReg);
236 else
237 Builder.buildBuildVector(NewDstReg, Ops);
238 MI.eraseFromParent();
239 replaceRegWith(MRI, DstReg, NewDstReg);
240 }
241
tryCombineShuffleVector(MachineInstr & MI)242 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
243 SmallVector<Register, 4> Ops;
244 if (matchCombineShuffleVector(MI, Ops)) {
245 applyCombineShuffleVector(MI, Ops);
246 return true;
247 }
248 return false;
249 }
250
matchCombineShuffleVector(MachineInstr & MI,SmallVectorImpl<Register> & Ops)251 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
252 SmallVectorImpl<Register> &Ops) {
253 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
254 "Invalid instruction kind");
255 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
256 Register Src1 = MI.getOperand(1).getReg();
257 LLT SrcType = MRI.getType(Src1);
258 // As bizarre as it may look, shuffle vector can actually produce
259 // scalar! This is because at the IR level a <1 x ty> shuffle
260 // vector is perfectly valid.
261 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
262 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
263
264 // If the resulting vector is smaller than the size of the source
265 // vectors being concatenated, we won't be able to replace the
266 // shuffle vector into a concat_vectors.
267 //
268 // Note: We may still be able to produce a concat_vectors fed by
269 // extract_vector_elt and so on. It is less clear that would
270 // be better though, so don't bother for now.
271 //
272 // If the destination is a scalar, the size of the sources doesn't
273 // matter. we will lower the shuffle to a plain copy. This will
274 // work only if the source and destination have the same size. But
275 // that's covered by the next condition.
276 //
277 // TODO: If the size between the source and destination don't match
278 // we could still emit an extract vector element in that case.
279 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
280 return false;
281
282 // Check that the shuffle mask can be broken evenly between the
283 // different sources.
284 if (DstNumElts % SrcNumElts != 0)
285 return false;
286
287 // Mask length is a multiple of the source vector length.
288 // Check if the shuffle is some kind of concatenation of the input
289 // vectors.
290 unsigned NumConcat = DstNumElts / SrcNumElts;
291 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
292 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
293 for (unsigned i = 0; i != DstNumElts; ++i) {
294 int Idx = Mask[i];
295 // Undef value.
296 if (Idx < 0)
297 continue;
298 // Ensure the indices in each SrcType sized piece are sequential and that
299 // the same source is used for the whole piece.
300 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
301 (ConcatSrcs[i / SrcNumElts] >= 0 &&
302 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
303 return false;
304 // Remember which source this index came from.
305 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
306 }
307
308 // The shuffle is concatenating multiple vectors together.
309 // Collect the different operands for that.
310 Register UndefReg;
311 Register Src2 = MI.getOperand(2).getReg();
312 for (auto Src : ConcatSrcs) {
313 if (Src < 0) {
314 if (!UndefReg) {
315 Builder.setInsertPt(*MI.getParent(), MI);
316 UndefReg = Builder.buildUndef(SrcType).getReg(0);
317 }
318 Ops.push_back(UndefReg);
319 } else if (Src == 0)
320 Ops.push_back(Src1);
321 else
322 Ops.push_back(Src2);
323 }
324 return true;
325 }
326
applyCombineShuffleVector(MachineInstr & MI,const ArrayRef<Register> Ops)327 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
328 const ArrayRef<Register> Ops) {
329 Register DstReg = MI.getOperand(0).getReg();
330 Builder.setInsertPt(*MI.getParent(), MI);
331 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
332
333 if (Ops.size() == 1)
334 Builder.buildCopy(NewDstReg, Ops[0]);
335 else
336 Builder.buildMerge(NewDstReg, Ops);
337
338 MI.eraseFromParent();
339 replaceRegWith(MRI, DstReg, NewDstReg);
340 }
341
342 namespace {
343
344 /// Select a preference between two uses. CurrentUse is the current preference
345 /// while *ForCandidate is attributes of the candidate under consideration.
ChoosePreferredUse(PreferredTuple & CurrentUse,const LLT TyForCandidate,unsigned OpcodeForCandidate,MachineInstr * MIForCandidate)346 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
347 const LLT TyForCandidate,
348 unsigned OpcodeForCandidate,
349 MachineInstr *MIForCandidate) {
350 if (!CurrentUse.Ty.isValid()) {
351 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
352 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
353 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
354 return CurrentUse;
355 }
356
357 // We permit the extend to hoist through basic blocks but this is only
358 // sensible if the target has extending loads. If you end up lowering back
359 // into a load and extend during the legalizer then the end result is
360 // hoisting the extend up to the load.
361
362 // Prefer defined extensions to undefined extensions as these are more
363 // likely to reduce the number of instructions.
364 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
365 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
366 return CurrentUse;
367 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
368 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
369 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
370
371 // Prefer sign extensions to zero extensions as sign-extensions tend to be
372 // more expensive.
373 if (CurrentUse.Ty == TyForCandidate) {
374 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
375 OpcodeForCandidate == TargetOpcode::G_ZEXT)
376 return CurrentUse;
377 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
378 OpcodeForCandidate == TargetOpcode::G_SEXT)
379 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
380 }
381
382 // This is potentially target specific. We've chosen the largest type
383 // because G_TRUNC is usually free. One potential catch with this is that
384 // some targets have a reduced number of larger registers than smaller
385 // registers and this choice potentially increases the live-range for the
386 // larger value.
387 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
388 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
389 }
390 return CurrentUse;
391 }
392
393 /// Find a suitable place to insert some instructions and insert them. This
394 /// function accounts for special cases like inserting before a PHI node.
395 /// The current strategy for inserting before PHI's is to duplicate the
396 /// instructions for each predecessor. However, while that's ok for G_TRUNC
397 /// on most targets since it generally requires no code, other targets/cases may
398 /// want to try harder to find a dominating block.
InsertInsnsWithoutSideEffectsBeforeUse(MachineIRBuilder & Builder,MachineInstr & DefMI,MachineOperand & UseMO,std::function<void (MachineBasicBlock *,MachineBasicBlock::iterator,MachineOperand & UseMO)> Inserter)399 static void InsertInsnsWithoutSideEffectsBeforeUse(
400 MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
401 std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
402 MachineOperand &UseMO)>
403 Inserter) {
404 MachineInstr &UseMI = *UseMO.getParent();
405
406 MachineBasicBlock *InsertBB = UseMI.getParent();
407
408 // If the use is a PHI then we want the predecessor block instead.
409 if (UseMI.isPHI()) {
410 MachineOperand *PredBB = std::next(&UseMO);
411 InsertBB = PredBB->getMBB();
412 }
413
414 // If the block is the same block as the def then we want to insert just after
415 // the def instead of at the start of the block.
416 if (InsertBB == DefMI.getParent()) {
417 MachineBasicBlock::iterator InsertPt = &DefMI;
418 Inserter(InsertBB, std::next(InsertPt), UseMO);
419 return;
420 }
421
422 // Otherwise we want the start of the BB
423 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
424 }
425 } // end anonymous namespace
426
tryCombineExtendingLoads(MachineInstr & MI)427 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
428 PreferredTuple Preferred;
429 if (matchCombineExtendingLoads(MI, Preferred)) {
430 applyCombineExtendingLoads(MI, Preferred);
431 return true;
432 }
433 return false;
434 }
435
matchCombineExtendingLoads(MachineInstr & MI,PreferredTuple & Preferred)436 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
437 PreferredTuple &Preferred) {
438 // We match the loads and follow the uses to the extend instead of matching
439 // the extends and following the def to the load. This is because the load
440 // must remain in the same position for correctness (unless we also add code
441 // to find a safe place to sink it) whereas the extend is freely movable.
442 // It also prevents us from duplicating the load for the volatile case or just
443 // for performance.
444
445 if (MI.getOpcode() != TargetOpcode::G_LOAD &&
446 MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
447 MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
448 return false;
449
450 auto &LoadValue = MI.getOperand(0);
451 assert(LoadValue.isReg() && "Result wasn't a register?");
452
453 LLT LoadValueTy = MRI.getType(LoadValue.getReg());
454 if (!LoadValueTy.isScalar())
455 return false;
456
457 // Most architectures are going to legalize <s8 loads into at least a 1 byte
458 // load, and the MMOs can only describe memory accesses in multiples of bytes.
459 // If we try to perform extload combining on those, we can end up with
460 // %a(s8) = extload %ptr (load 1 byte from %ptr)
461 // ... which is an illegal extload instruction.
462 if (LoadValueTy.getSizeInBits() < 8)
463 return false;
464
465 // For non power-of-2 types, they will very likely be legalized into multiple
466 // loads. Don't bother trying to match them into extending loads.
467 if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
468 return false;
469
470 // Find the preferred type aside from the any-extends (unless it's the only
471 // one) and non-extending ops. We'll emit an extending load to that type and
472 // and emit a variant of (extend (trunc X)) for the others according to the
473 // relative type sizes. At the same time, pick an extend to use based on the
474 // extend involved in the chosen type.
475 unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
476 ? TargetOpcode::G_ANYEXT
477 : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
478 ? TargetOpcode::G_SEXT
479 : TargetOpcode::G_ZEXT;
480 Preferred = {LLT(), PreferredOpcode, nullptr};
481 for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
482 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
483 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
484 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
485 const auto &MMO = **MI.memoperands_begin();
486 // For atomics, only form anyextending loads.
487 if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT)
488 continue;
489 // Check for legality.
490 if (LI) {
491 LegalityQuery::MemDesc MMDesc;
492 MMDesc.SizeInBits = MMO.getSizeInBits();
493 MMDesc.AlignInBits = MMO.getAlign().value() * 8;
494 MMDesc.Ordering = MMO.getOrdering();
495 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
496 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
497 if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action !=
498 LegalizeActions::Legal)
499 continue;
500 }
501 Preferred = ChoosePreferredUse(Preferred,
502 MRI.getType(UseMI.getOperand(0).getReg()),
503 UseMI.getOpcode(), &UseMI);
504 }
505 }
506
507 // There were no extends
508 if (!Preferred.MI)
509 return false;
510 // It should be impossible to chose an extend without selecting a different
511 // type since by definition the result of an extend is larger.
512 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
513
514 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
515 return true;
516 }
517
applyCombineExtendingLoads(MachineInstr & MI,PreferredTuple & Preferred)518 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
519 PreferredTuple &Preferred) {
520 // Rewrite the load to the chosen extending load.
521 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
522
523 // Inserter to insert a truncate back to the original type at a given point
524 // with some basic CSE to limit truncate duplication to one per BB.
525 DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
526 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
527 MachineBasicBlock::iterator InsertBefore,
528 MachineOperand &UseMO) {
529 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
530 if (PreviouslyEmitted) {
531 Observer.changingInstr(*UseMO.getParent());
532 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
533 Observer.changedInstr(*UseMO.getParent());
534 return;
535 }
536
537 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
538 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
539 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
540 EmittedInsns[InsertIntoBB] = NewMI;
541 replaceRegOpWith(MRI, UseMO, NewDstReg);
542 };
543
544 Observer.changingInstr(MI);
545 MI.setDesc(
546 Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
547 ? TargetOpcode::G_SEXTLOAD
548 : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
549 ? TargetOpcode::G_ZEXTLOAD
550 : TargetOpcode::G_LOAD));
551
552 // Rewrite all the uses to fix up the types.
553 auto &LoadValue = MI.getOperand(0);
554 SmallVector<MachineOperand *, 4> Uses;
555 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
556 Uses.push_back(&UseMO);
557
558 for (auto *UseMO : Uses) {
559 MachineInstr *UseMI = UseMO->getParent();
560
561 // If the extend is compatible with the preferred extend then we should fix
562 // up the type and extend so that it uses the preferred use.
563 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
564 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
565 Register UseDstReg = UseMI->getOperand(0).getReg();
566 MachineOperand &UseSrcMO = UseMI->getOperand(1);
567 const LLT UseDstTy = MRI.getType(UseDstReg);
568 if (UseDstReg != ChosenDstReg) {
569 if (Preferred.Ty == UseDstTy) {
570 // If the use has the same type as the preferred use, then merge
571 // the vregs and erase the extend. For example:
572 // %1:_(s8) = G_LOAD ...
573 // %2:_(s32) = G_SEXT %1(s8)
574 // %3:_(s32) = G_ANYEXT %1(s8)
575 // ... = ... %3(s32)
576 // rewrites to:
577 // %2:_(s32) = G_SEXTLOAD ...
578 // ... = ... %2(s32)
579 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
580 Observer.erasingInstr(*UseMO->getParent());
581 UseMO->getParent()->eraseFromParent();
582 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
583 // If the preferred size is smaller, then keep the extend but extend
584 // from the result of the extending load. For example:
585 // %1:_(s8) = G_LOAD ...
586 // %2:_(s32) = G_SEXT %1(s8)
587 // %3:_(s64) = G_ANYEXT %1(s8)
588 // ... = ... %3(s64)
589 /// rewrites to:
590 // %2:_(s32) = G_SEXTLOAD ...
591 // %3:_(s64) = G_ANYEXT %2:_(s32)
592 // ... = ... %3(s64)
593 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
594 } else {
595 // If the preferred size is large, then insert a truncate. For
596 // example:
597 // %1:_(s8) = G_LOAD ...
598 // %2:_(s64) = G_SEXT %1(s8)
599 // %3:_(s32) = G_ZEXT %1(s8)
600 // ... = ... %3(s32)
601 /// rewrites to:
602 // %2:_(s64) = G_SEXTLOAD ...
603 // %4:_(s8) = G_TRUNC %2:_(s32)
604 // %3:_(s64) = G_ZEXT %2:_(s8)
605 // ... = ... %3(s64)
606 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
607 InsertTruncAt);
608 }
609 continue;
610 }
611 // The use is (one of) the uses of the preferred use we chose earlier.
612 // We're going to update the load to def this value later so just erase
613 // the old extend.
614 Observer.erasingInstr(*UseMO->getParent());
615 UseMO->getParent()->eraseFromParent();
616 continue;
617 }
618
619 // The use isn't an extend. Truncate back to the type we originally loaded.
620 // This is free on many targets.
621 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
622 }
623
624 MI.getOperand(0).setReg(ChosenDstReg);
625 Observer.changedInstr(MI);
626 }
627
isPredecessor(const MachineInstr & DefMI,const MachineInstr & UseMI)628 bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
629 const MachineInstr &UseMI) {
630 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
631 "shouldn't consider debug uses");
632 assert(DefMI.getParent() == UseMI.getParent());
633 if (&DefMI == &UseMI)
634 return false;
635 const MachineBasicBlock &MBB = *DefMI.getParent();
636 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
637 return &MI == &DefMI || &MI == &UseMI;
638 });
639 if (DefOrUse == MBB.end())
640 llvm_unreachable("Block must contain both DefMI and UseMI!");
641 return &*DefOrUse == &DefMI;
642 }
643
dominates(const MachineInstr & DefMI,const MachineInstr & UseMI)644 bool CombinerHelper::dominates(const MachineInstr &DefMI,
645 const MachineInstr &UseMI) {
646 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
647 "shouldn't consider debug uses");
648 if (MDT)
649 return MDT->dominates(&DefMI, &UseMI);
650 else if (DefMI.getParent() != UseMI.getParent())
651 return false;
652
653 return isPredecessor(DefMI, UseMI);
654 }
655
matchSextTruncSextLoad(MachineInstr & MI)656 bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
657 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
658 Register SrcReg = MI.getOperand(1).getReg();
659 Register LoadUser = SrcReg;
660
661 if (MRI.getType(SrcReg).isVector())
662 return false;
663
664 Register TruncSrc;
665 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
666 LoadUser = TruncSrc;
667
668 uint64_t SizeInBits = MI.getOperand(2).getImm();
669 // If the source is a G_SEXTLOAD from the same bit width, then we don't
670 // need any extend at all, just a truncate.
671 if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
672 const auto &MMO = **LoadMI->memoperands_begin();
673 // If truncating more than the original extended value, abort.
674 if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
675 return false;
676 if (MMO.getSizeInBits() == SizeInBits)
677 return true;
678 }
679 return false;
680 }
681
applySextTruncSextLoad(MachineInstr & MI)682 bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
683 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
684 Builder.setInstrAndDebugLoc(MI);
685 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
686 MI.eraseFromParent();
687 return true;
688 }
689
matchSextInRegOfLoad(MachineInstr & MI,std::tuple<Register,unsigned> & MatchInfo)690 bool CombinerHelper::matchSextInRegOfLoad(
691 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
692 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
693
694 // Only supports scalars for now.
695 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
696 return false;
697
698 Register SrcReg = MI.getOperand(1).getReg();
699 MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
700 if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()))
701 return false;
702
703 // If the sign extend extends from a narrower width than the load's width,
704 // then we can narrow the load width when we combine to a G_SEXTLOAD.
705 auto &MMO = **LoadDef->memoperands_begin();
706 // Don't do this for non-simple loads.
707 if (MMO.isAtomic() || MMO.isVolatile())
708 return false;
709
710 // Avoid widening the load at all.
711 unsigned NewSizeBits =
712 std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits());
713
714 // Don't generate G_SEXTLOADs with a < 1 byte width.
715 if (NewSizeBits < 8)
716 return false;
717 // Don't bother creating a non-power-2 sextload, it will likely be broken up
718 // anyway for most targets.
719 if (!isPowerOf2_32(NewSizeBits))
720 return false;
721 MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits);
722 return true;
723 }
724
applySextInRegOfLoad(MachineInstr & MI,std::tuple<Register,unsigned> & MatchInfo)725 bool CombinerHelper::applySextInRegOfLoad(
726 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
727 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
728 Register LoadReg;
729 unsigned ScalarSizeBits;
730 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
731 auto *LoadDef = MRI.getVRegDef(LoadReg);
732 assert(LoadDef && "Expected a load reg");
733
734 // If we have the following:
735 // %ld = G_LOAD %ptr, (load 2)
736 // %ext = G_SEXT_INREG %ld, 8
737 // ==>
738 // %ld = G_SEXTLOAD %ptr (load 1)
739
740 auto &MMO = **LoadDef->memoperands_begin();
741 Builder.setInstrAndDebugLoc(*LoadDef);
742 auto &MF = Builder.getMF();
743 auto PtrInfo = MMO.getPointerInfo();
744 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
745 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
746 LoadDef->getOperand(1).getReg(), *NewMMO);
747 MI.eraseFromParent();
748 return true;
749 }
750
findPostIndexCandidate(MachineInstr & MI,Register & Addr,Register & Base,Register & Offset)751 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
752 Register &Base, Register &Offset) {
753 auto &MF = *MI.getParent()->getParent();
754 const auto &TLI = *MF.getSubtarget().getTargetLowering();
755
756 #ifndef NDEBUG
757 unsigned Opcode = MI.getOpcode();
758 assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
759 Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
760 #endif
761
762 Base = MI.getOperand(1).getReg();
763 MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
764 if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
765 return false;
766
767 LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
768 // FIXME: The following use traversal needs a bail out for patholigical cases.
769 for (auto &Use : MRI.use_nodbg_instructions(Base)) {
770 if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
771 continue;
772
773 Offset = Use.getOperand(2).getReg();
774 if (!ForceLegalIndexing &&
775 !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
776 LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
777 << Use);
778 continue;
779 }
780
781 // Make sure the offset calculation is before the potentially indexed op.
782 // FIXME: we really care about dependency here. The offset calculation might
783 // be movable.
784 MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
785 if (!OffsetDef || !dominates(*OffsetDef, MI)) {
786 LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
787 << Use);
788 continue;
789 }
790
791 // FIXME: check whether all uses of Base are load/store with foldable
792 // addressing modes. If so, using the normal addr-modes is better than
793 // forming an indexed one.
794
795 bool MemOpDominatesAddrUses = true;
796 for (auto &PtrAddUse :
797 MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
798 if (!dominates(MI, PtrAddUse)) {
799 MemOpDominatesAddrUses = false;
800 break;
801 }
802 }
803
804 if (!MemOpDominatesAddrUses) {
805 LLVM_DEBUG(
806 dbgs() << " Ignoring candidate as memop does not dominate uses: "
807 << Use);
808 continue;
809 }
810
811 LLVM_DEBUG(dbgs() << " Found match: " << Use);
812 Addr = Use.getOperand(0).getReg();
813 return true;
814 }
815
816 return false;
817 }
818
findPreIndexCandidate(MachineInstr & MI,Register & Addr,Register & Base,Register & Offset)819 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
820 Register &Base, Register &Offset) {
821 auto &MF = *MI.getParent()->getParent();
822 const auto &TLI = *MF.getSubtarget().getTargetLowering();
823
824 #ifndef NDEBUG
825 unsigned Opcode = MI.getOpcode();
826 assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
827 Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
828 #endif
829
830 Addr = MI.getOperand(1).getReg();
831 MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
832 if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
833 return false;
834
835 Base = AddrDef->getOperand(1).getReg();
836 Offset = AddrDef->getOperand(2).getReg();
837
838 LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
839
840 if (!ForceLegalIndexing &&
841 !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
842 LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
843 return false;
844 }
845
846 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
847 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
848 LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
849 return false;
850 }
851
852 if (MI.getOpcode() == TargetOpcode::G_STORE) {
853 // Would require a copy.
854 if (Base == MI.getOperand(0).getReg()) {
855 LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
856 return false;
857 }
858
859 // We're expecting one use of Addr in MI, but it could also be the
860 // value stored, which isn't actually dominated by the instruction.
861 if (MI.getOperand(0).getReg() == Addr) {
862 LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
863 return false;
864 }
865 }
866
867 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
868 // That might allow us to end base's liveness here by adjusting the constant.
869
870 for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
871 if (!dominates(MI, UseMI)) {
872 LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
873 return false;
874 }
875 }
876
877 return true;
878 }
879
tryCombineIndexedLoadStore(MachineInstr & MI)880 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
881 IndexedLoadStoreMatchInfo MatchInfo;
882 if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
883 applyCombineIndexedLoadStore(MI, MatchInfo);
884 return true;
885 }
886 return false;
887 }
888
matchCombineIndexedLoadStore(MachineInstr & MI,IndexedLoadStoreMatchInfo & MatchInfo)889 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
890 unsigned Opcode = MI.getOpcode();
891 if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
892 Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
893 return false;
894
895 // For now, no targets actually support these opcodes so don't waste time
896 // running these unless we're forced to for testing.
897 if (!ForceLegalIndexing)
898 return false;
899
900 MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
901 MatchInfo.Offset);
902 if (!MatchInfo.IsPre &&
903 !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
904 MatchInfo.Offset))
905 return false;
906
907 return true;
908 }
909
applyCombineIndexedLoadStore(MachineInstr & MI,IndexedLoadStoreMatchInfo & MatchInfo)910 void CombinerHelper::applyCombineIndexedLoadStore(
911 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
912 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
913 MachineIRBuilder MIRBuilder(MI);
914 unsigned Opcode = MI.getOpcode();
915 bool IsStore = Opcode == TargetOpcode::G_STORE;
916 unsigned NewOpcode;
917 switch (Opcode) {
918 case TargetOpcode::G_LOAD:
919 NewOpcode = TargetOpcode::G_INDEXED_LOAD;
920 break;
921 case TargetOpcode::G_SEXTLOAD:
922 NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
923 break;
924 case TargetOpcode::G_ZEXTLOAD:
925 NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
926 break;
927 case TargetOpcode::G_STORE:
928 NewOpcode = TargetOpcode::G_INDEXED_STORE;
929 break;
930 default:
931 llvm_unreachable("Unknown load/store opcode");
932 }
933
934 auto MIB = MIRBuilder.buildInstr(NewOpcode);
935 if (IsStore) {
936 MIB.addDef(MatchInfo.Addr);
937 MIB.addUse(MI.getOperand(0).getReg());
938 } else {
939 MIB.addDef(MI.getOperand(0).getReg());
940 MIB.addDef(MatchInfo.Addr);
941 }
942
943 MIB.addUse(MatchInfo.Base);
944 MIB.addUse(MatchInfo.Offset);
945 MIB.addImm(MatchInfo.IsPre);
946 MI.eraseFromParent();
947 AddrDef.eraseFromParent();
948
949 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
950 }
951
matchCombineDivRem(MachineInstr & MI,MachineInstr * & OtherMI)952 bool CombinerHelper::matchCombineDivRem(MachineInstr &MI,
953 MachineInstr *&OtherMI) {
954 unsigned Opcode = MI.getOpcode();
955 bool IsDiv, IsSigned;
956
957 switch (Opcode) {
958 default:
959 llvm_unreachable("Unexpected opcode!");
960 case TargetOpcode::G_SDIV:
961 case TargetOpcode::G_UDIV: {
962 IsDiv = true;
963 IsSigned = Opcode == TargetOpcode::G_SDIV;
964 break;
965 }
966 case TargetOpcode::G_SREM:
967 case TargetOpcode::G_UREM: {
968 IsDiv = false;
969 IsSigned = Opcode == TargetOpcode::G_SREM;
970 break;
971 }
972 }
973
974 Register Src1 = MI.getOperand(1).getReg();
975 unsigned DivOpcode, RemOpcode, DivremOpcode;
976 if (IsSigned) {
977 DivOpcode = TargetOpcode::G_SDIV;
978 RemOpcode = TargetOpcode::G_SREM;
979 DivremOpcode = TargetOpcode::G_SDIVREM;
980 } else {
981 DivOpcode = TargetOpcode::G_UDIV;
982 RemOpcode = TargetOpcode::G_UREM;
983 DivremOpcode = TargetOpcode::G_UDIVREM;
984 }
985
986 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
987 return false;
988
989 // Combine:
990 // %div:_ = G_[SU]DIV %src1:_, %src2:_
991 // %rem:_ = G_[SU]REM %src1:_, %src2:_
992 // into:
993 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
994
995 // Combine:
996 // %rem:_ = G_[SU]REM %src1:_, %src2:_
997 // %div:_ = G_[SU]DIV %src1:_, %src2:_
998 // into:
999 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1000
1001 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1002 if (MI.getParent() == UseMI.getParent() &&
1003 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1004 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1005 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2))) {
1006 OtherMI = &UseMI;
1007 return true;
1008 }
1009 }
1010
1011 return false;
1012 }
1013
applyCombineDivRem(MachineInstr & MI,MachineInstr * & OtherMI)1014 void CombinerHelper::applyCombineDivRem(MachineInstr &MI,
1015 MachineInstr *&OtherMI) {
1016 unsigned Opcode = MI.getOpcode();
1017 assert(OtherMI && "OtherMI shouldn't be empty.");
1018
1019 Register DestDivReg, DestRemReg;
1020 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1021 DestDivReg = MI.getOperand(0).getReg();
1022 DestRemReg = OtherMI->getOperand(0).getReg();
1023 } else {
1024 DestDivReg = OtherMI->getOperand(0).getReg();
1025 DestRemReg = MI.getOperand(0).getReg();
1026 }
1027
1028 bool IsSigned =
1029 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1030
1031 // Check which instruction is first in the block so we don't break def-use
1032 // deps by "moving" the instruction incorrectly.
1033 if (dominates(MI, *OtherMI))
1034 Builder.setInstrAndDebugLoc(MI);
1035 else
1036 Builder.setInstrAndDebugLoc(*OtherMI);
1037
1038 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1039 : TargetOpcode::G_UDIVREM,
1040 {DestDivReg, DestRemReg},
1041 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
1042 MI.eraseFromParent();
1043 OtherMI->eraseFromParent();
1044 }
1045
matchOptBrCondByInvertingCond(MachineInstr & MI,MachineInstr * & BrCond)1046 bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI,
1047 MachineInstr *&BrCond) {
1048 assert(MI.getOpcode() == TargetOpcode::G_BR);
1049
1050 // Try to match the following:
1051 // bb1:
1052 // G_BRCOND %c1, %bb2
1053 // G_BR %bb3
1054 // bb2:
1055 // ...
1056 // bb3:
1057
1058 // The above pattern does not have a fall through to the successor bb2, always
1059 // resulting in a branch no matter which path is taken. Here we try to find
1060 // and replace that pattern with conditional branch to bb3 and otherwise
1061 // fallthrough to bb2. This is generally better for branch predictors.
1062
1063 MachineBasicBlock *MBB = MI.getParent();
1064 MachineBasicBlock::iterator BrIt(MI);
1065 if (BrIt == MBB->begin())
1066 return false;
1067 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1068
1069 BrCond = &*std::prev(BrIt);
1070 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1071 return false;
1072
1073 // Check that the next block is the conditional branch target. Also make sure
1074 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1075 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1076 return BrCondTarget != MI.getOperand(0).getMBB() &&
1077 MBB->isLayoutSuccessor(BrCondTarget);
1078 }
1079
applyOptBrCondByInvertingCond(MachineInstr & MI,MachineInstr * & BrCond)1080 void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
1081 MachineInstr *&BrCond) {
1082 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1083 Builder.setInstrAndDebugLoc(*BrCond);
1084 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1085 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1086 // this to i1 only since we might not know for sure what kind of
1087 // compare generated the condition value.
1088 auto True = Builder.buildConstant(
1089 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1090 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1091
1092 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1093 Observer.changingInstr(MI);
1094 MI.getOperand(0).setMBB(FallthroughBB);
1095 Observer.changedInstr(MI);
1096
1097 // Change the conditional branch to use the inverted condition and
1098 // new target block.
1099 Observer.changingInstr(*BrCond);
1100 BrCond->getOperand(0).setReg(Xor.getReg(0));
1101 BrCond->getOperand(1).setMBB(BrTarget);
1102 Observer.changedInstr(*BrCond);
1103 }
1104
shouldLowerMemFuncForSize(const MachineFunction & MF)1105 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
1106 // On Darwin, -Os means optimize for size without hurting performance, so
1107 // only really optimize for size when -Oz (MinSize) is used.
1108 if (MF.getTarget().getTargetTriple().isOSDarwin())
1109 return MF.getFunction().hasMinSize();
1110 return MF.getFunction().hasOptSize();
1111 }
1112
1113 // Returns a list of types to use for memory op lowering in MemOps. A partial
1114 // port of findOptimalMemOpLowering in TargetLowering.
findGISelOptimalMemOpLowering(std::vector<LLT> & MemOps,unsigned Limit,const MemOp & Op,unsigned DstAS,unsigned SrcAS,const AttributeList & FuncAttributes,const TargetLowering & TLI)1115 static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
1116 unsigned Limit, const MemOp &Op,
1117 unsigned DstAS, unsigned SrcAS,
1118 const AttributeList &FuncAttributes,
1119 const TargetLowering &TLI) {
1120 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
1121 return false;
1122
1123 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
1124
1125 if (Ty == LLT()) {
1126 // Use the largest scalar type whose alignment constraints are satisfied.
1127 // We only need to check DstAlign here as SrcAlign is always greater or
1128 // equal to DstAlign (or zero).
1129 Ty = LLT::scalar(64);
1130 if (Op.isFixedDstAlign())
1131 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
1132 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
1133 Ty = LLT::scalar(Ty.getSizeInBytes());
1134 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
1135 // FIXME: check for the largest legal type we can load/store to.
1136 }
1137
1138 unsigned NumMemOps = 0;
1139 uint64_t Size = Op.size();
1140 while (Size) {
1141 unsigned TySize = Ty.getSizeInBytes();
1142 while (TySize > Size) {
1143 // For now, only use non-vector load / store's for the left-over pieces.
1144 LLT NewTy = Ty;
1145 // FIXME: check for mem op safety and legality of the types. Not all of
1146 // SDAGisms map cleanly to GISel concepts.
1147 if (NewTy.isVector())
1148 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
1149 NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
1150 unsigned NewTySize = NewTy.getSizeInBytes();
1151 assert(NewTySize > 0 && "Could not find appropriate type");
1152
1153 // If the new LLT cannot cover all of the remaining bits, then consider
1154 // issuing a (or a pair of) unaligned and overlapping load / store.
1155 bool Fast;
1156 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
1157 MVT VT = getMVTForLLT(Ty);
1158 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
1159 TLI.allowsMisalignedMemoryAccesses(
1160 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
1161 MachineMemOperand::MONone, &Fast) &&
1162 Fast)
1163 TySize = Size;
1164 else {
1165 Ty = NewTy;
1166 TySize = NewTySize;
1167 }
1168 }
1169
1170 if (++NumMemOps > Limit)
1171 return false;
1172
1173 MemOps.push_back(Ty);
1174 Size -= TySize;
1175 }
1176
1177 return true;
1178 }
1179
getTypeForLLT(LLT Ty,LLVMContext & C)1180 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
1181 if (Ty.isVector())
1182 return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
1183 Ty.getNumElements());
1184 return IntegerType::get(C, Ty.getSizeInBits());
1185 }
1186
1187 // Get a vectorized representation of the memset value operand, GISel edition.
getMemsetValue(Register Val,LLT Ty,MachineIRBuilder & MIB)1188 static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
1189 MachineRegisterInfo &MRI = *MIB.getMRI();
1190 unsigned NumBits = Ty.getScalarSizeInBits();
1191 auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1192 if (!Ty.isVector() && ValVRegAndVal) {
1193 APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
1194 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
1195 return MIB.buildConstant(Ty, SplatVal).getReg(0);
1196 }
1197
1198 // Extend the byte value to the larger type, and then multiply by a magic
1199 // value 0x010101... in order to replicate it across every byte.
1200 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
1201 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
1202 return MIB.buildConstant(Ty, 0).getReg(0);
1203 }
1204
1205 LLT ExtType = Ty.getScalarType();
1206 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
1207 if (NumBits > 8) {
1208 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
1209 auto MagicMI = MIB.buildConstant(ExtType, Magic);
1210 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
1211 }
1212
1213 // For vector types create a G_BUILD_VECTOR.
1214 if (Ty.isVector())
1215 Val = MIB.buildSplatVector(Ty, Val).getReg(0);
1216
1217 return Val;
1218 }
1219
optimizeMemset(MachineInstr & MI,Register Dst,Register Val,unsigned KnownLen,Align Alignment,bool IsVolatile)1220 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
1221 Register Val, unsigned KnownLen,
1222 Align Alignment, bool IsVolatile) {
1223 auto &MF = *MI.getParent()->getParent();
1224 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1225 auto &DL = MF.getDataLayout();
1226 LLVMContext &C = MF.getFunction().getContext();
1227
1228 assert(KnownLen != 0 && "Have a zero length memset length!");
1229
1230 bool DstAlignCanChange = false;
1231 MachineFrameInfo &MFI = MF.getFrameInfo();
1232 bool OptSize = shouldLowerMemFuncForSize(MF);
1233
1234 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1235 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1236 DstAlignCanChange = true;
1237
1238 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
1239 std::vector<LLT> MemOps;
1240
1241 const auto &DstMMO = **MI.memoperands_begin();
1242 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1243
1244 auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1245 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
1246
1247 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
1248 MemOp::Set(KnownLen, DstAlignCanChange,
1249 Alignment,
1250 /*IsZeroMemset=*/IsZeroVal,
1251 /*IsVolatile=*/IsVolatile),
1252 DstPtrInfo.getAddrSpace(), ~0u,
1253 MF.getFunction().getAttributes(), TLI))
1254 return false;
1255
1256 if (DstAlignCanChange) {
1257 // Get an estimate of the type from the LLT.
1258 Type *IRTy = getTypeForLLT(MemOps[0], C);
1259 Align NewAlign = DL.getABITypeAlign(IRTy);
1260 if (NewAlign > Alignment) {
1261 Alignment = NewAlign;
1262 unsigned FI = FIDef->getOperand(1).getIndex();
1263 // Give the stack frame object a larger alignment if needed.
1264 if (MFI.getObjectAlign(FI) < Alignment)
1265 MFI.setObjectAlignment(FI, Alignment);
1266 }
1267 }
1268
1269 MachineIRBuilder MIB(MI);
1270 // Find the largest store and generate the bit pattern for it.
1271 LLT LargestTy = MemOps[0];
1272 for (unsigned i = 1; i < MemOps.size(); i++)
1273 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
1274 LargestTy = MemOps[i];
1275
1276 // The memset stored value is always defined as an s8, so in order to make it
1277 // work with larger store types we need to repeat the bit pattern across the
1278 // wider type.
1279 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
1280
1281 if (!MemSetValue)
1282 return false;
1283
1284 // Generate the stores. For each store type in the list, we generate the
1285 // matching store of that type to the destination address.
1286 LLT PtrTy = MRI.getType(Dst);
1287 unsigned DstOff = 0;
1288 unsigned Size = KnownLen;
1289 for (unsigned I = 0; I < MemOps.size(); I++) {
1290 LLT Ty = MemOps[I];
1291 unsigned TySize = Ty.getSizeInBytes();
1292 if (TySize > Size) {
1293 // Issuing an unaligned load / store pair that overlaps with the previous
1294 // pair. Adjust the offset accordingly.
1295 assert(I == MemOps.size() - 1 && I != 0);
1296 DstOff -= TySize - Size;
1297 }
1298
1299 // If this store is smaller than the largest store see whether we can get
1300 // the smaller value for free with a truncate.
1301 Register Value = MemSetValue;
1302 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1303 MVT VT = getMVTForLLT(Ty);
1304 MVT LargestVT = getMVTForLLT(LargestTy);
1305 if (!LargestTy.isVector() && !Ty.isVector() &&
1306 TLI.isTruncateFree(LargestVT, VT))
1307 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1308 else
1309 Value = getMemsetValue(Val, Ty, MIB);
1310 if (!Value)
1311 return false;
1312 }
1313
1314 auto *StoreMMO =
1315 MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
1316
1317 Register Ptr = Dst;
1318 if (DstOff != 0) {
1319 auto Offset =
1320 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1321 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1322 }
1323
1324 MIB.buildStore(Value, Ptr, *StoreMMO);
1325 DstOff += Ty.getSizeInBytes();
1326 Size -= TySize;
1327 }
1328
1329 MI.eraseFromParent();
1330 return true;
1331 }
1332
optimizeMemcpy(MachineInstr & MI,Register Dst,Register Src,unsigned KnownLen,Align DstAlign,Align SrcAlign,bool IsVolatile)1333 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1334 Register Src, unsigned KnownLen,
1335 Align DstAlign, Align SrcAlign,
1336 bool IsVolatile) {
1337 auto &MF = *MI.getParent()->getParent();
1338 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1339 auto &DL = MF.getDataLayout();
1340 LLVMContext &C = MF.getFunction().getContext();
1341
1342 assert(KnownLen != 0 && "Have a zero length memcpy length!");
1343
1344 bool DstAlignCanChange = false;
1345 MachineFrameInfo &MFI = MF.getFrameInfo();
1346 bool OptSize = shouldLowerMemFuncForSize(MF);
1347 Align Alignment = commonAlignment(DstAlign, SrcAlign);
1348
1349 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1350 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1351 DstAlignCanChange = true;
1352
1353 // FIXME: infer better src pointer alignment like SelectionDAG does here.
1354 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1355 // if the memcpy is in a tail call position.
1356
1357 unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1358 std::vector<LLT> MemOps;
1359
1360 const auto &DstMMO = **MI.memoperands_begin();
1361 const auto &SrcMMO = **std::next(MI.memoperands_begin());
1362 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1363 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1364
1365 if (!findGISelOptimalMemOpLowering(
1366 MemOps, Limit,
1367 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1368 IsVolatile),
1369 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1370 MF.getFunction().getAttributes(), TLI))
1371 return false;
1372
1373 if (DstAlignCanChange) {
1374 // Get an estimate of the type from the LLT.
1375 Type *IRTy = getTypeForLLT(MemOps[0], C);
1376 Align NewAlign = DL.getABITypeAlign(IRTy);
1377
1378 // Don't promote to an alignment that would require dynamic stack
1379 // realignment.
1380 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1381 if (!TRI->hasStackRealignment(MF))
1382 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1383 NewAlign = NewAlign / 2;
1384
1385 if (NewAlign > Alignment) {
1386 Alignment = NewAlign;
1387 unsigned FI = FIDef->getOperand(1).getIndex();
1388 // Give the stack frame object a larger alignment if needed.
1389 if (MFI.getObjectAlign(FI) < Alignment)
1390 MFI.setObjectAlignment(FI, Alignment);
1391 }
1392 }
1393
1394 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1395
1396 MachineIRBuilder MIB(MI);
1397 // Now we need to emit a pair of load and stores for each of the types we've
1398 // collected. I.e. for each type, generate a load from the source pointer of
1399 // that type width, and then generate a corresponding store to the dest buffer
1400 // of that value loaded. This can result in a sequence of loads and stores
1401 // mixed types, depending on what the target specifies as good types to use.
1402 unsigned CurrOffset = 0;
1403 LLT PtrTy = MRI.getType(Src);
1404 unsigned Size = KnownLen;
1405 for (auto CopyTy : MemOps) {
1406 // Issuing an unaligned load / store pair that overlaps with the previous
1407 // pair. Adjust the offset accordingly.
1408 if (CopyTy.getSizeInBytes() > Size)
1409 CurrOffset -= CopyTy.getSizeInBytes() - Size;
1410
1411 // Construct MMOs for the accesses.
1412 auto *LoadMMO =
1413 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1414 auto *StoreMMO =
1415 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1416
1417 // Create the load.
1418 Register LoadPtr = Src;
1419 Register Offset;
1420 if (CurrOffset != 0) {
1421 Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1422 .getReg(0);
1423 LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1424 }
1425 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1426
1427 // Create the store.
1428 Register StorePtr =
1429 CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1430 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1431 CurrOffset += CopyTy.getSizeInBytes();
1432 Size -= CopyTy.getSizeInBytes();
1433 }
1434
1435 MI.eraseFromParent();
1436 return true;
1437 }
1438
optimizeMemmove(MachineInstr & MI,Register Dst,Register Src,unsigned KnownLen,Align DstAlign,Align SrcAlign,bool IsVolatile)1439 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1440 Register Src, unsigned KnownLen,
1441 Align DstAlign, Align SrcAlign,
1442 bool IsVolatile) {
1443 auto &MF = *MI.getParent()->getParent();
1444 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1445 auto &DL = MF.getDataLayout();
1446 LLVMContext &C = MF.getFunction().getContext();
1447
1448 assert(KnownLen != 0 && "Have a zero length memmove length!");
1449
1450 bool DstAlignCanChange = false;
1451 MachineFrameInfo &MFI = MF.getFrameInfo();
1452 bool OptSize = shouldLowerMemFuncForSize(MF);
1453 Align Alignment = commonAlignment(DstAlign, SrcAlign);
1454
1455 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1456 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1457 DstAlignCanChange = true;
1458
1459 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1460 std::vector<LLT> MemOps;
1461
1462 const auto &DstMMO = **MI.memoperands_begin();
1463 const auto &SrcMMO = **std::next(MI.memoperands_begin());
1464 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1465 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1466
1467 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1468 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1469 // same thing here.
1470 if (!findGISelOptimalMemOpLowering(
1471 MemOps, Limit,
1472 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1473 /*IsVolatile*/ true),
1474 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1475 MF.getFunction().getAttributes(), TLI))
1476 return false;
1477
1478 if (DstAlignCanChange) {
1479 // Get an estimate of the type from the LLT.
1480 Type *IRTy = getTypeForLLT(MemOps[0], C);
1481 Align NewAlign = DL.getABITypeAlign(IRTy);
1482
1483 // Don't promote to an alignment that would require dynamic stack
1484 // realignment.
1485 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1486 if (!TRI->hasStackRealignment(MF))
1487 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1488 NewAlign = NewAlign / 2;
1489
1490 if (NewAlign > Alignment) {
1491 Alignment = NewAlign;
1492 unsigned FI = FIDef->getOperand(1).getIndex();
1493 // Give the stack frame object a larger alignment if needed.
1494 if (MFI.getObjectAlign(FI) < Alignment)
1495 MFI.setObjectAlignment(FI, Alignment);
1496 }
1497 }
1498
1499 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1500
1501 MachineIRBuilder MIB(MI);
1502 // Memmove requires that we perform the loads first before issuing the stores.
1503 // Apart from that, this loop is pretty much doing the same thing as the
1504 // memcpy codegen function.
1505 unsigned CurrOffset = 0;
1506 LLT PtrTy = MRI.getType(Src);
1507 SmallVector<Register, 16> LoadVals;
1508 for (auto CopyTy : MemOps) {
1509 // Construct MMO for the load.
1510 auto *LoadMMO =
1511 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1512
1513 // Create the load.
1514 Register LoadPtr = Src;
1515 if (CurrOffset != 0) {
1516 auto Offset =
1517 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1518 LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1519 }
1520 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1521 CurrOffset += CopyTy.getSizeInBytes();
1522 }
1523
1524 CurrOffset = 0;
1525 for (unsigned I = 0; I < MemOps.size(); ++I) {
1526 LLT CopyTy = MemOps[I];
1527 // Now store the values loaded.
1528 auto *StoreMMO =
1529 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1530
1531 Register StorePtr = Dst;
1532 if (CurrOffset != 0) {
1533 auto Offset =
1534 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1535 StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1536 }
1537 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1538 CurrOffset += CopyTy.getSizeInBytes();
1539 }
1540 MI.eraseFromParent();
1541 return true;
1542 }
1543
tryCombineMemCpyFamily(MachineInstr & MI,unsigned MaxLen)1544 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1545 const unsigned Opc = MI.getOpcode();
1546 // This combine is fairly complex so it's not written with a separate
1547 // matcher function.
1548 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
1549 Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
1550
1551 auto MMOIt = MI.memoperands_begin();
1552 const MachineMemOperand *MemOp = *MMOIt;
1553 bool IsVolatile = MemOp->isVolatile();
1554 // Don't try to optimize volatile.
1555 if (IsVolatile)
1556 return false;
1557
1558 Align DstAlign = MemOp->getBaseAlign();
1559 Align SrcAlign;
1560 Register Dst = MI.getOperand(0).getReg();
1561 Register Src = MI.getOperand(1).getReg();
1562 Register Len = MI.getOperand(2).getReg();
1563
1564 if (Opc != TargetOpcode::G_MEMSET) {
1565 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1566 MemOp = *(++MMOIt);
1567 SrcAlign = MemOp->getBaseAlign();
1568 }
1569
1570 // See if this is a constant length copy
1571 auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1572 if (!LenVRegAndVal)
1573 return false; // Leave it to the legalizer to lower it to a libcall.
1574 unsigned KnownLen = LenVRegAndVal->Value.getZExtValue();
1575
1576 if (KnownLen == 0) {
1577 MI.eraseFromParent();
1578 return true;
1579 }
1580
1581 if (MaxLen && KnownLen > MaxLen)
1582 return false;
1583
1584 if (Opc == TargetOpcode::G_MEMCPY)
1585 return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1586 if (Opc == TargetOpcode::G_MEMMOVE)
1587 return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1588 if (Opc == TargetOpcode::G_MEMSET)
1589 return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1590 return false;
1591 }
1592
constantFoldFpUnary(unsigned Opcode,LLT DstTy,const Register Op,const MachineRegisterInfo & MRI)1593 static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
1594 const Register Op,
1595 const MachineRegisterInfo &MRI) {
1596 const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
1597 if (!MaybeCst)
1598 return None;
1599
1600 APFloat V = MaybeCst->getValueAPF();
1601 switch (Opcode) {
1602 default:
1603 llvm_unreachable("Unexpected opcode!");
1604 case TargetOpcode::G_FNEG: {
1605 V.changeSign();
1606 return V;
1607 }
1608 case TargetOpcode::G_FABS: {
1609 V.clearSign();
1610 return V;
1611 }
1612 case TargetOpcode::G_FPTRUNC:
1613 break;
1614 case TargetOpcode::G_FSQRT: {
1615 bool Unused;
1616 V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
1617 V = APFloat(sqrt(V.convertToDouble()));
1618 break;
1619 }
1620 case TargetOpcode::G_FLOG2: {
1621 bool Unused;
1622 V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
1623 V = APFloat(log2(V.convertToDouble()));
1624 break;
1625 }
1626 }
1627 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1628 // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
1629 // and `G_FLOG2` reach here.
1630 bool Unused;
1631 V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
1632 return V;
1633 }
1634
matchCombineConstantFoldFpUnary(MachineInstr & MI,Optional<APFloat> & Cst)1635 bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
1636 Optional<APFloat> &Cst) {
1637 Register DstReg = MI.getOperand(0).getReg();
1638 Register SrcReg = MI.getOperand(1).getReg();
1639 LLT DstTy = MRI.getType(DstReg);
1640 Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
1641 return Cst.hasValue();
1642 }
1643
applyCombineConstantFoldFpUnary(MachineInstr & MI,Optional<APFloat> & Cst)1644 bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
1645 Optional<APFloat> &Cst) {
1646 assert(Cst.hasValue() && "Optional is unexpectedly empty!");
1647 Builder.setInstrAndDebugLoc(MI);
1648 MachineFunction &MF = Builder.getMF();
1649 auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
1650 Register DstReg = MI.getOperand(0).getReg();
1651 Builder.buildFConstant(DstReg, *FPVal);
1652 MI.eraseFromParent();
1653 return true;
1654 }
1655
matchPtrAddImmedChain(MachineInstr & MI,PtrAddChain & MatchInfo)1656 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1657 PtrAddChain &MatchInfo) {
1658 // We're trying to match the following pattern:
1659 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1660 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1661 // -->
1662 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1663
1664 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1665 return false;
1666
1667 Register Add2 = MI.getOperand(1).getReg();
1668 Register Imm1 = MI.getOperand(2).getReg();
1669 auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1670 if (!MaybeImmVal)
1671 return false;
1672
1673 MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
1674 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1675 return false;
1676
1677 Register Base = Add2Def->getOperand(1).getReg();
1678 Register Imm2 = Add2Def->getOperand(2).getReg();
1679 auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1680 if (!MaybeImm2Val)
1681 return false;
1682
1683 // Pass the combined immediate to the apply function.
1684 MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue();
1685 MatchInfo.Base = Base;
1686 return true;
1687 }
1688
applyPtrAddImmedChain(MachineInstr & MI,PtrAddChain & MatchInfo)1689 bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1690 PtrAddChain &MatchInfo) {
1691 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1692 MachineIRBuilder MIB(MI);
1693 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1694 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1695 Observer.changingInstr(MI);
1696 MI.getOperand(1).setReg(MatchInfo.Base);
1697 MI.getOperand(2).setReg(NewOffset.getReg(0));
1698 Observer.changedInstr(MI);
1699 return true;
1700 }
1701
matchShiftImmedChain(MachineInstr & MI,RegisterImmPair & MatchInfo)1702 bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
1703 RegisterImmPair &MatchInfo) {
1704 // We're trying to match the following pattern with any of
1705 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1706 // %t1 = SHIFT %base, G_CONSTANT imm1
1707 // %root = SHIFT %t1, G_CONSTANT imm2
1708 // -->
1709 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1710
1711 unsigned Opcode = MI.getOpcode();
1712 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1713 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1714 Opcode == TargetOpcode::G_USHLSAT) &&
1715 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1716
1717 Register Shl2 = MI.getOperand(1).getReg();
1718 Register Imm1 = MI.getOperand(2).getReg();
1719 auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1720 if (!MaybeImmVal)
1721 return false;
1722
1723 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1724 if (Shl2Def->getOpcode() != Opcode)
1725 return false;
1726
1727 Register Base = Shl2Def->getOperand(1).getReg();
1728 Register Imm2 = Shl2Def->getOperand(2).getReg();
1729 auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1730 if (!MaybeImm2Val)
1731 return false;
1732
1733 // Pass the combined immediate to the apply function.
1734 MatchInfo.Imm =
1735 (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
1736 MatchInfo.Reg = Base;
1737
1738 // There is no simple replacement for a saturating unsigned left shift that
1739 // exceeds the scalar size.
1740 if (Opcode == TargetOpcode::G_USHLSAT &&
1741 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1742 return false;
1743
1744 return true;
1745 }
1746
applyShiftImmedChain(MachineInstr & MI,RegisterImmPair & MatchInfo)1747 bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
1748 RegisterImmPair &MatchInfo) {
1749 unsigned Opcode = MI.getOpcode();
1750 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1751 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1752 Opcode == TargetOpcode::G_USHLSAT) &&
1753 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1754
1755 Builder.setInstrAndDebugLoc(MI);
1756 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1757 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1758 auto Imm = MatchInfo.Imm;
1759
1760 if (Imm >= ScalarSizeInBits) {
1761 // Any logical shift that exceeds scalar size will produce zero.
1762 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1763 Builder.buildConstant(MI.getOperand(0), 0);
1764 MI.eraseFromParent();
1765 return true;
1766 }
1767 // Arithmetic shift and saturating signed left shift have no effect beyond
1768 // scalar size.
1769 Imm = ScalarSizeInBits - 1;
1770 }
1771
1772 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1773 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1774 Observer.changingInstr(MI);
1775 MI.getOperand(1).setReg(MatchInfo.Reg);
1776 MI.getOperand(2).setReg(NewImm);
1777 Observer.changedInstr(MI);
1778 return true;
1779 }
1780
matchShiftOfShiftedLogic(MachineInstr & MI,ShiftOfShiftedLogic & MatchInfo)1781 bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
1782 ShiftOfShiftedLogic &MatchInfo) {
1783 // We're trying to match the following pattern with any of
1784 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1785 // with any of G_AND/G_OR/G_XOR logic instructions.
1786 // %t1 = SHIFT %X, G_CONSTANT C0
1787 // %t2 = LOGIC %t1, %Y
1788 // %root = SHIFT %t2, G_CONSTANT C1
1789 // -->
1790 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1791 // %t4 = SHIFT %Y, G_CONSTANT C1
1792 // %root = LOGIC %t3, %t4
1793 unsigned ShiftOpcode = MI.getOpcode();
1794 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1795 ShiftOpcode == TargetOpcode::G_ASHR ||
1796 ShiftOpcode == TargetOpcode::G_LSHR ||
1797 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1798 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1799 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1800
1801 // Match a one-use bitwise logic op.
1802 Register LogicDest = MI.getOperand(1).getReg();
1803 if (!MRI.hasOneNonDBGUse(LogicDest))
1804 return false;
1805
1806 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1807 unsigned LogicOpcode = LogicMI->getOpcode();
1808 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1809 LogicOpcode != TargetOpcode::G_XOR)
1810 return false;
1811
1812 // Find a matching one-use shift by constant.
1813 const Register C1 = MI.getOperand(2).getReg();
1814 auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
1815 if (!MaybeImmVal)
1816 return false;
1817
1818 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1819
1820 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1821 // Shift should match previous one and should be a one-use.
1822 if (MI->getOpcode() != ShiftOpcode ||
1823 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1824 return false;
1825
1826 // Must be a constant.
1827 auto MaybeImmVal =
1828 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1829 if (!MaybeImmVal)
1830 return false;
1831
1832 ShiftVal = MaybeImmVal->Value.getSExtValue();
1833 return true;
1834 };
1835
1836 // Logic ops are commutative, so check each operand for a match.
1837 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1838 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1839 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1840 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1841 uint64_t C0Val;
1842
1843 if (matchFirstShift(LogicMIOp1, C0Val)) {
1844 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1845 MatchInfo.Shift2 = LogicMIOp1;
1846 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1847 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1848 MatchInfo.Shift2 = LogicMIOp2;
1849 } else
1850 return false;
1851
1852 MatchInfo.ValSum = C0Val + C1Val;
1853
1854 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1855 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1856 return false;
1857
1858 MatchInfo.Logic = LogicMI;
1859 return true;
1860 }
1861
applyShiftOfShiftedLogic(MachineInstr & MI,ShiftOfShiftedLogic & MatchInfo)1862 bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
1863 ShiftOfShiftedLogic &MatchInfo) {
1864 unsigned Opcode = MI.getOpcode();
1865 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1866 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1867 Opcode == TargetOpcode::G_SSHLSAT) &&
1868 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1869
1870 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1871 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1872 Builder.setInstrAndDebugLoc(MI);
1873
1874 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1875
1876 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1877 Register Shift1 =
1878 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1879
1880 Register Shift2Const = MI.getOperand(2).getReg();
1881 Register Shift2 = Builder
1882 .buildInstr(Opcode, {DestType},
1883 {MatchInfo.LogicNonShiftReg, Shift2Const})
1884 .getReg(0);
1885
1886 Register Dest = MI.getOperand(0).getReg();
1887 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1888
1889 // These were one use so it's safe to remove them.
1890 MatchInfo.Shift2->eraseFromParent();
1891 MatchInfo.Logic->eraseFromParent();
1892
1893 MI.eraseFromParent();
1894 return true;
1895 }
1896
matchCombineMulToShl(MachineInstr & MI,unsigned & ShiftVal)1897 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1898 unsigned &ShiftVal) {
1899 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1900 auto MaybeImmVal =
1901 getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1902 if (!MaybeImmVal)
1903 return false;
1904
1905 ShiftVal = MaybeImmVal->Value.exactLogBase2();
1906 return (static_cast<int32_t>(ShiftVal) != -1);
1907 }
1908
applyCombineMulToShl(MachineInstr & MI,unsigned & ShiftVal)1909 bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1910 unsigned &ShiftVal) {
1911 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1912 MachineIRBuilder MIB(MI);
1913 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1914 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1915 Observer.changingInstr(MI);
1916 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1917 MI.getOperand(2).setReg(ShiftCst.getReg(0));
1918 Observer.changedInstr(MI);
1919 return true;
1920 }
1921
1922 // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
matchCombineShlOfExtend(MachineInstr & MI,RegisterImmPair & MatchData)1923 bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
1924 RegisterImmPair &MatchData) {
1925 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
1926
1927 Register LHS = MI.getOperand(1).getReg();
1928
1929 Register ExtSrc;
1930 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
1931 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
1932 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
1933 return false;
1934
1935 // TODO: Should handle vector splat.
1936 Register RHS = MI.getOperand(2).getReg();
1937 auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
1938 if (!MaybeShiftAmtVal)
1939 return false;
1940
1941 if (LI) {
1942 LLT SrcTy = MRI.getType(ExtSrc);
1943
1944 // We only really care about the legality with the shifted value. We can
1945 // pick any type the constant shift amount, so ask the target what to
1946 // use. Otherwise we would have to guess and hope it is reported as legal.
1947 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
1948 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
1949 return false;
1950 }
1951
1952 int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
1953 MatchData.Reg = ExtSrc;
1954 MatchData.Imm = ShiftAmt;
1955
1956 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
1957 return MinLeadingZeros >= ShiftAmt;
1958 }
1959
applyCombineShlOfExtend(MachineInstr & MI,const RegisterImmPair & MatchData)1960 bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
1961 const RegisterImmPair &MatchData) {
1962 Register ExtSrcReg = MatchData.Reg;
1963 int64_t ShiftAmtVal = MatchData.Imm;
1964
1965 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
1966 Builder.setInstrAndDebugLoc(MI);
1967 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
1968 auto NarrowShift =
1969 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
1970 Builder.buildZExt(MI.getOperand(0), NarrowShift);
1971 MI.eraseFromParent();
1972 return true;
1973 }
1974
peekThroughBitcast(Register Reg,const MachineRegisterInfo & MRI)1975 static Register peekThroughBitcast(Register Reg,
1976 const MachineRegisterInfo &MRI) {
1977 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
1978 ;
1979
1980 return Reg;
1981 }
1982
matchCombineUnmergeMergeToPlainValues(MachineInstr & MI,SmallVectorImpl<Register> & Operands)1983 bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
1984 MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
1985 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1986 "Expected an unmerge");
1987 Register SrcReg =
1988 peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
1989
1990 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
1991 if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
1992 SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
1993 SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
1994 return false;
1995
1996 // Check the source type of the merge.
1997 LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
1998 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
1999 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2000 if (SrcMergeTy != Dst0Ty && !SameSize)
2001 return false;
2002 // They are the same now (modulo a bitcast).
2003 // We can collect all the src registers.
2004 for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
2005 ++Idx)
2006 Operands.push_back(SrcInstr->getOperand(Idx).getReg());
2007 return true;
2008 }
2009
applyCombineUnmergeMergeToPlainValues(MachineInstr & MI,SmallVectorImpl<Register> & Operands)2010 bool CombinerHelper::applyCombineUnmergeMergeToPlainValues(
2011 MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
2012 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2013 "Expected an unmerge");
2014 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2015 "Not enough operands to replace all defs");
2016 unsigned NumElems = MI.getNumOperands() - 1;
2017
2018 LLT SrcTy = MRI.getType(Operands[0]);
2019 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2020 bool CanReuseInputDirectly = DstTy == SrcTy;
2021 Builder.setInstrAndDebugLoc(MI);
2022 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2023 Register DstReg = MI.getOperand(Idx).getReg();
2024 Register SrcReg = Operands[Idx];
2025 if (CanReuseInputDirectly)
2026 replaceRegWith(MRI, DstReg, SrcReg);
2027 else
2028 Builder.buildCast(DstReg, SrcReg);
2029 }
2030 MI.eraseFromParent();
2031 return true;
2032 }
2033
matchCombineUnmergeConstant(MachineInstr & MI,SmallVectorImpl<APInt> & Csts)2034 bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
2035 SmallVectorImpl<APInt> &Csts) {
2036 unsigned SrcIdx = MI.getNumOperands() - 1;
2037 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2038 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2039 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2040 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2041 return false;
2042 // Break down the big constant in smaller ones.
2043 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2044 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2045 ? CstVal.getCImm()->getValue()
2046 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2047
2048 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2049 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2050 // Unmerge a constant.
2051 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2052 Csts.emplace_back(Val.trunc(ShiftAmt));
2053 Val = Val.lshr(ShiftAmt);
2054 }
2055
2056 return true;
2057 }
2058
applyCombineUnmergeConstant(MachineInstr & MI,SmallVectorImpl<APInt> & Csts)2059 bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
2060 SmallVectorImpl<APInt> &Csts) {
2061 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2062 "Expected an unmerge");
2063 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2064 "Not enough operands to replace all defs");
2065 unsigned NumElems = MI.getNumOperands() - 1;
2066 Builder.setInstrAndDebugLoc(MI);
2067 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2068 Register DstReg = MI.getOperand(Idx).getReg();
2069 Builder.buildConstant(DstReg, Csts[Idx]);
2070 }
2071
2072 MI.eraseFromParent();
2073 return true;
2074 }
2075
matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr & MI)2076 bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
2077 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2078 "Expected an unmerge");
2079 // Check that all the lanes are dead except the first one.
2080 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2081 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2082 return false;
2083 }
2084 return true;
2085 }
2086
applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr & MI)2087 bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
2088 Builder.setInstrAndDebugLoc(MI);
2089 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2090 // Truncating a vector is going to truncate every single lane,
2091 // whereas we want the full lowbits.
2092 // Do the operation on a scalar instead.
2093 LLT SrcTy = MRI.getType(SrcReg);
2094 if (SrcTy.isVector())
2095 SrcReg =
2096 Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
2097
2098 Register Dst0Reg = MI.getOperand(0).getReg();
2099 LLT Dst0Ty = MRI.getType(Dst0Reg);
2100 if (Dst0Ty.isVector()) {
2101 auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
2102 Builder.buildCast(Dst0Reg, MIB);
2103 } else
2104 Builder.buildTrunc(Dst0Reg, SrcReg);
2105 MI.eraseFromParent();
2106 return true;
2107 }
2108
matchCombineUnmergeZExtToZExt(MachineInstr & MI)2109 bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
2110 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2111 "Expected an unmerge");
2112 Register Dst0Reg = MI.getOperand(0).getReg();
2113 LLT Dst0Ty = MRI.getType(Dst0Reg);
2114 // G_ZEXT on vector applies to each lane, so it will
2115 // affect all destinations. Therefore we won't be able
2116 // to simplify the unmerge to just the first definition.
2117 if (Dst0Ty.isVector())
2118 return false;
2119 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2120 LLT SrcTy = MRI.getType(SrcReg);
2121 if (SrcTy.isVector())
2122 return false;
2123
2124 Register ZExtSrcReg;
2125 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2126 return false;
2127
2128 // Finally we can replace the first definition with
2129 // a zext of the source if the definition is big enough to hold
2130 // all of ZExtSrc bits.
2131 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2132 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2133 }
2134
applyCombineUnmergeZExtToZExt(MachineInstr & MI)2135 bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
2136 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2137 "Expected an unmerge");
2138
2139 Register Dst0Reg = MI.getOperand(0).getReg();
2140
2141 MachineInstr *ZExtInstr =
2142 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2143 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2144 "Expecting a G_ZEXT");
2145
2146 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2147 LLT Dst0Ty = MRI.getType(Dst0Reg);
2148 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2149
2150 Builder.setInstrAndDebugLoc(MI);
2151
2152 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2153 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2154 } else {
2155 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2156 "ZExt src doesn't fit in destination");
2157 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2158 }
2159
2160 Register ZeroReg;
2161 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2162 if (!ZeroReg)
2163 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2164 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2165 }
2166 MI.eraseFromParent();
2167 return true;
2168 }
2169
matchCombineShiftToUnmerge(MachineInstr & MI,unsigned TargetShiftSize,unsigned & ShiftVal)2170 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
2171 unsigned TargetShiftSize,
2172 unsigned &ShiftVal) {
2173 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2174 MI.getOpcode() == TargetOpcode::G_LSHR ||
2175 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2176
2177 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2178 if (Ty.isVector()) // TODO:
2179 return false;
2180
2181 // Don't narrow further than the requested size.
2182 unsigned Size = Ty.getSizeInBits();
2183 if (Size <= TargetShiftSize)
2184 return false;
2185
2186 auto MaybeImmVal =
2187 getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2188 if (!MaybeImmVal)
2189 return false;
2190
2191 ShiftVal = MaybeImmVal->Value.getSExtValue();
2192 return ShiftVal >= Size / 2 && ShiftVal < Size;
2193 }
2194
applyCombineShiftToUnmerge(MachineInstr & MI,const unsigned & ShiftVal)2195 bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
2196 const unsigned &ShiftVal) {
2197 Register DstReg = MI.getOperand(0).getReg();
2198 Register SrcReg = MI.getOperand(1).getReg();
2199 LLT Ty = MRI.getType(SrcReg);
2200 unsigned Size = Ty.getSizeInBits();
2201 unsigned HalfSize = Size / 2;
2202 assert(ShiftVal >= HalfSize);
2203
2204 LLT HalfTy = LLT::scalar(HalfSize);
2205
2206 Builder.setInstr(MI);
2207 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2208 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2209
2210 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2211 Register Narrowed = Unmerge.getReg(1);
2212
2213 // dst = G_LSHR s64:x, C for C >= 32
2214 // =>
2215 // lo, hi = G_UNMERGE_VALUES x
2216 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2217
2218 if (NarrowShiftAmt != 0) {
2219 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2220 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2221 }
2222
2223 auto Zero = Builder.buildConstant(HalfTy, 0);
2224 Builder.buildMerge(DstReg, { Narrowed, Zero });
2225 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2226 Register Narrowed = Unmerge.getReg(0);
2227 // dst = G_SHL s64:x, C for C >= 32
2228 // =>
2229 // lo, hi = G_UNMERGE_VALUES x
2230 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2231 if (NarrowShiftAmt != 0) {
2232 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2233 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2234 }
2235
2236 auto Zero = Builder.buildConstant(HalfTy, 0);
2237 Builder.buildMerge(DstReg, { Zero, Narrowed });
2238 } else {
2239 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2240 auto Hi = Builder.buildAShr(
2241 HalfTy, Unmerge.getReg(1),
2242 Builder.buildConstant(HalfTy, HalfSize - 1));
2243
2244 if (ShiftVal == HalfSize) {
2245 // (G_ASHR i64:x, 32) ->
2246 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2247 Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
2248 } else if (ShiftVal == Size - 1) {
2249 // Don't need a second shift.
2250 // (G_ASHR i64:x, 63) ->
2251 // %narrowed = (G_ASHR hi_32(x), 31)
2252 // G_MERGE_VALUES %narrowed, %narrowed
2253 Builder.buildMerge(DstReg, { Hi, Hi });
2254 } else {
2255 auto Lo = Builder.buildAShr(
2256 HalfTy, Unmerge.getReg(1),
2257 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2258
2259 // (G_ASHR i64:x, C) ->, for C >= 32
2260 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2261 Builder.buildMerge(DstReg, { Lo, Hi });
2262 }
2263 }
2264
2265 MI.eraseFromParent();
2266 return true;
2267 }
2268
tryCombineShiftToUnmerge(MachineInstr & MI,unsigned TargetShiftAmount)2269 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
2270 unsigned TargetShiftAmount) {
2271 unsigned ShiftAmt;
2272 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2273 applyCombineShiftToUnmerge(MI, ShiftAmt);
2274 return true;
2275 }
2276
2277 return false;
2278 }
2279
matchCombineI2PToP2I(MachineInstr & MI,Register & Reg)2280 bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
2281 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2282 Register DstReg = MI.getOperand(0).getReg();
2283 LLT DstTy = MRI.getType(DstReg);
2284 Register SrcReg = MI.getOperand(1).getReg();
2285 return mi_match(SrcReg, MRI,
2286 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2287 }
2288
applyCombineI2PToP2I(MachineInstr & MI,Register & Reg)2289 bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
2290 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2291 Register DstReg = MI.getOperand(0).getReg();
2292 Builder.setInstr(MI);
2293 Builder.buildCopy(DstReg, Reg);
2294 MI.eraseFromParent();
2295 return true;
2296 }
2297
matchCombineP2IToI2P(MachineInstr & MI,Register & Reg)2298 bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
2299 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2300 Register SrcReg = MI.getOperand(1).getReg();
2301 return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
2302 }
2303
applyCombineP2IToI2P(MachineInstr & MI,Register & Reg)2304 bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
2305 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2306 Register DstReg = MI.getOperand(0).getReg();
2307 Builder.setInstr(MI);
2308 Builder.buildZExtOrTrunc(DstReg, Reg);
2309 MI.eraseFromParent();
2310 return true;
2311 }
2312
matchCombineAddP2IToPtrAdd(MachineInstr & MI,std::pair<Register,bool> & PtrReg)2313 bool CombinerHelper::matchCombineAddP2IToPtrAdd(
2314 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2315 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2316 Register LHS = MI.getOperand(1).getReg();
2317 Register RHS = MI.getOperand(2).getReg();
2318 LLT IntTy = MRI.getType(LHS);
2319
2320 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2321 // instruction.
2322 PtrReg.second = false;
2323 for (Register SrcReg : {LHS, RHS}) {
2324 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2325 // Don't handle cases where the integer is implicitly converted to the
2326 // pointer width.
2327 LLT PtrTy = MRI.getType(PtrReg.first);
2328 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2329 return true;
2330 }
2331
2332 PtrReg.second = true;
2333 }
2334
2335 return false;
2336 }
2337
applyCombineAddP2IToPtrAdd(MachineInstr & MI,std::pair<Register,bool> & PtrReg)2338 bool CombinerHelper::applyCombineAddP2IToPtrAdd(
2339 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2340 Register Dst = MI.getOperand(0).getReg();
2341 Register LHS = MI.getOperand(1).getReg();
2342 Register RHS = MI.getOperand(2).getReg();
2343
2344 const bool DoCommute = PtrReg.second;
2345 if (DoCommute)
2346 std::swap(LHS, RHS);
2347 LHS = PtrReg.first;
2348
2349 LLT PtrTy = MRI.getType(LHS);
2350
2351 Builder.setInstrAndDebugLoc(MI);
2352 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2353 Builder.buildPtrToInt(Dst, PtrAdd);
2354 MI.eraseFromParent();
2355 return true;
2356 }
2357
matchCombineConstPtrAddToI2P(MachineInstr & MI,int64_t & NewCst)2358 bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
2359 int64_t &NewCst) {
2360 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
2361 Register LHS = MI.getOperand(1).getReg();
2362 Register RHS = MI.getOperand(2).getReg();
2363 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2364
2365 if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
2366 int64_t Cst;
2367 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2368 NewCst = Cst + *RHSCst;
2369 return true;
2370 }
2371 }
2372
2373 return false;
2374 }
2375
applyCombineConstPtrAddToI2P(MachineInstr & MI,int64_t & NewCst)2376 bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
2377 int64_t &NewCst) {
2378 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
2379 Register Dst = MI.getOperand(0).getReg();
2380
2381 Builder.setInstrAndDebugLoc(MI);
2382 Builder.buildConstant(Dst, NewCst);
2383 MI.eraseFromParent();
2384 return true;
2385 }
2386
matchCombineAnyExtTrunc(MachineInstr & MI,Register & Reg)2387 bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
2388 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2389 Register DstReg = MI.getOperand(0).getReg();
2390 Register SrcReg = MI.getOperand(1).getReg();
2391 LLT DstTy = MRI.getType(DstReg);
2392 return mi_match(SrcReg, MRI,
2393 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2394 }
2395
matchCombineZextTrunc(MachineInstr & MI,Register & Reg)2396 bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) {
2397 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2398 Register DstReg = MI.getOperand(0).getReg();
2399 Register SrcReg = MI.getOperand(1).getReg();
2400 LLT DstTy = MRI.getType(DstReg);
2401 if (mi_match(SrcReg, MRI,
2402 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2403 unsigned DstSize = DstTy.getScalarSizeInBits();
2404 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2405 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2406 }
2407 return false;
2408 }
2409
matchCombineExtOfExt(MachineInstr & MI,std::tuple<Register,unsigned> & MatchInfo)2410 bool CombinerHelper::matchCombineExtOfExt(
2411 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2412 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2413 MI.getOpcode() == TargetOpcode::G_SEXT ||
2414 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2415 "Expected a G_[ASZ]EXT");
2416 Register SrcReg = MI.getOperand(1).getReg();
2417 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2418 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2419 unsigned Opc = MI.getOpcode();
2420 unsigned SrcOpc = SrcMI->getOpcode();
2421 if (Opc == SrcOpc ||
2422 (Opc == TargetOpcode::G_ANYEXT &&
2423 (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2424 (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2425 MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2426 return true;
2427 }
2428 return false;
2429 }
2430
applyCombineExtOfExt(MachineInstr & MI,std::tuple<Register,unsigned> & MatchInfo)2431 bool CombinerHelper::applyCombineExtOfExt(
2432 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2433 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2434 MI.getOpcode() == TargetOpcode::G_SEXT ||
2435 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2436 "Expected a G_[ASZ]EXT");
2437
2438 Register Reg = std::get<0>(MatchInfo);
2439 unsigned SrcExtOp = std::get<1>(MatchInfo);
2440
2441 // Combine exts with the same opcode.
2442 if (MI.getOpcode() == SrcExtOp) {
2443 Observer.changingInstr(MI);
2444 MI.getOperand(1).setReg(Reg);
2445 Observer.changedInstr(MI);
2446 return true;
2447 }
2448
2449 // Combine:
2450 // - anyext([sz]ext x) to [sz]ext x
2451 // - sext(zext x) to zext x
2452 if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2453 (MI.getOpcode() == TargetOpcode::G_SEXT &&
2454 SrcExtOp == TargetOpcode::G_ZEXT)) {
2455 Register DstReg = MI.getOperand(0).getReg();
2456 Builder.setInstrAndDebugLoc(MI);
2457 Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2458 MI.eraseFromParent();
2459 return true;
2460 }
2461
2462 return false;
2463 }
2464
applyCombineMulByNegativeOne(MachineInstr & MI)2465 bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
2466 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2467 Register DstReg = MI.getOperand(0).getReg();
2468 Register SrcReg = MI.getOperand(1).getReg();
2469 LLT DstTy = MRI.getType(DstReg);
2470
2471 Builder.setInstrAndDebugLoc(MI);
2472 Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
2473 MI.getFlags());
2474 MI.eraseFromParent();
2475 return true;
2476 }
2477
matchCombineFNegOfFNeg(MachineInstr & MI,Register & Reg)2478 bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
2479 assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG");
2480 Register SrcReg = MI.getOperand(1).getReg();
2481 return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg)));
2482 }
2483
matchCombineFAbsOfFAbs(MachineInstr & MI,Register & Src)2484 bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
2485 assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
2486 Src = MI.getOperand(1).getReg();
2487 Register AbsSrc;
2488 return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
2489 }
2490
matchCombineTruncOfExt(MachineInstr & MI,std::pair<Register,unsigned> & MatchInfo)2491 bool CombinerHelper::matchCombineTruncOfExt(
2492 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2493 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2494 Register SrcReg = MI.getOperand(1).getReg();
2495 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2496 unsigned SrcOpc = SrcMI->getOpcode();
2497 if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2498 SrcOpc == TargetOpcode::G_ZEXT) {
2499 MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2500 return true;
2501 }
2502 return false;
2503 }
2504
applyCombineTruncOfExt(MachineInstr & MI,std::pair<Register,unsigned> & MatchInfo)2505 bool CombinerHelper::applyCombineTruncOfExt(
2506 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2507 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2508 Register SrcReg = MatchInfo.first;
2509 unsigned SrcExtOp = MatchInfo.second;
2510 Register DstReg = MI.getOperand(0).getReg();
2511 LLT SrcTy = MRI.getType(SrcReg);
2512 LLT DstTy = MRI.getType(DstReg);
2513 if (SrcTy == DstTy) {
2514 MI.eraseFromParent();
2515 replaceRegWith(MRI, DstReg, SrcReg);
2516 return true;
2517 }
2518 Builder.setInstrAndDebugLoc(MI);
2519 if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2520 Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2521 else
2522 Builder.buildTrunc(DstReg, SrcReg);
2523 MI.eraseFromParent();
2524 return true;
2525 }
2526
matchCombineTruncOfShl(MachineInstr & MI,std::pair<Register,Register> & MatchInfo)2527 bool CombinerHelper::matchCombineTruncOfShl(
2528 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2529 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2530 Register DstReg = MI.getOperand(0).getReg();
2531 Register SrcReg = MI.getOperand(1).getReg();
2532 LLT DstTy = MRI.getType(DstReg);
2533 Register ShiftSrc;
2534 Register ShiftAmt;
2535
2536 if (MRI.hasOneNonDBGUse(SrcReg) &&
2537 mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
2538 isLegalOrBeforeLegalizer(
2539 {TargetOpcode::G_SHL,
2540 {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
2541 KnownBits Known = KB->getKnownBits(ShiftAmt);
2542 unsigned Size = DstTy.getSizeInBits();
2543 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
2544 MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
2545 return true;
2546 }
2547 }
2548 return false;
2549 }
2550
applyCombineTruncOfShl(MachineInstr & MI,std::pair<Register,Register> & MatchInfo)2551 bool CombinerHelper::applyCombineTruncOfShl(
2552 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2553 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2554 Register DstReg = MI.getOperand(0).getReg();
2555 Register SrcReg = MI.getOperand(1).getReg();
2556 LLT DstTy = MRI.getType(DstReg);
2557 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2558
2559 Register ShiftSrc = MatchInfo.first;
2560 Register ShiftAmt = MatchInfo.second;
2561 Builder.setInstrAndDebugLoc(MI);
2562 auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
2563 Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
2564 MI.eraseFromParent();
2565 return true;
2566 }
2567
matchAnyExplicitUseIsUndef(MachineInstr & MI)2568 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
2569 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2570 return MO.isReg() &&
2571 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2572 });
2573 }
2574
matchAllExplicitUsesAreUndef(MachineInstr & MI)2575 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
2576 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2577 return !MO.isReg() ||
2578 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2579 });
2580 }
2581
matchUndefShuffleVectorMask(MachineInstr & MI)2582 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
2583 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2584 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2585 return all_of(Mask, [](int Elt) { return Elt < 0; });
2586 }
2587
matchUndefStore(MachineInstr & MI)2588 bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
2589 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2590 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2591 MRI);
2592 }
2593
matchUndefSelectCmp(MachineInstr & MI)2594 bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
2595 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2596 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2597 MRI);
2598 }
2599
matchConstantSelectCmp(MachineInstr & MI,unsigned & OpIdx)2600 bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
2601 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2602 if (auto MaybeCstCmp =
2603 getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
2604 OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
2605 return true;
2606 }
2607 return false;
2608 }
2609
eraseInst(MachineInstr & MI)2610 bool CombinerHelper::eraseInst(MachineInstr &MI) {
2611 MI.eraseFromParent();
2612 return true;
2613 }
2614
matchEqualDefs(const MachineOperand & MOP1,const MachineOperand & MOP2)2615 bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
2616 const MachineOperand &MOP2) {
2617 if (!MOP1.isReg() || !MOP2.isReg())
2618 return false;
2619 MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
2620 if (!I1)
2621 return false;
2622 MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
2623 if (!I2)
2624 return false;
2625
2626 // Handle a case like this:
2627 //
2628 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2629 //
2630 // Even though %0 and %1 are produced by the same instruction they are not
2631 // the same values.
2632 if (I1 == I2)
2633 return MOP1.getReg() == MOP2.getReg();
2634
2635 // If we have an instruction which loads or stores, we can't guarantee that
2636 // it is identical.
2637 //
2638 // For example, we may have
2639 //
2640 // %x1 = G_LOAD %addr (load N from @somewhere)
2641 // ...
2642 // call @foo
2643 // ...
2644 // %x2 = G_LOAD %addr (load N from @somewhere)
2645 // ...
2646 // %or = G_OR %x1, %x2
2647 //
2648 // It's possible that @foo will modify whatever lives at the address we're
2649 // loading from. To be safe, let's just assume that all loads and stores
2650 // are different (unless we have something which is guaranteed to not
2651 // change.)
2652 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
2653 return false;
2654
2655 // Check for physical registers on the instructions first to avoid cases
2656 // like this:
2657 //
2658 // %a = COPY $physreg
2659 // ...
2660 // SOMETHING implicit-def $physreg
2661 // ...
2662 // %b = COPY $physreg
2663 //
2664 // These copies are not equivalent.
2665 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2666 return MO.isReg() && MO.getReg().isPhysical();
2667 })) {
2668 // Check if we have a case like this:
2669 //
2670 // %a = COPY $physreg
2671 // %b = COPY %a
2672 //
2673 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2674 // From that, we know that they must have the same value, since they must
2675 // have come from the same COPY.
2676 return I1->isIdenticalTo(*I2);
2677 }
2678
2679 // We don't have any physical registers, so we don't necessarily need the
2680 // same vreg defs.
2681 //
2682 // On the off-chance that there's some target instruction feeding into the
2683 // instruction, let's use produceSameValue instead of isIdenticalTo.
2684 return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
2685 }
2686
matchConstantOp(const MachineOperand & MOP,int64_t C)2687 bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
2688 if (!MOP.isReg())
2689 return false;
2690 // MIPatternMatch doesn't let us look through G_ZEXT etc.
2691 auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
2692 return ValAndVReg && ValAndVReg->Value == C;
2693 }
2694
replaceSingleDefInstWithOperand(MachineInstr & MI,unsigned OpIdx)2695 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
2696 unsigned OpIdx) {
2697 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2698 Register OldReg = MI.getOperand(0).getReg();
2699 Register Replacement = MI.getOperand(OpIdx).getReg();
2700 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2701 MI.eraseFromParent();
2702 replaceRegWith(MRI, OldReg, Replacement);
2703 return true;
2704 }
2705
replaceSingleDefInstWithReg(MachineInstr & MI,Register Replacement)2706 bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
2707 Register Replacement) {
2708 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2709 Register OldReg = MI.getOperand(0).getReg();
2710 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2711 MI.eraseFromParent();
2712 replaceRegWith(MRI, OldReg, Replacement);
2713 return true;
2714 }
2715
matchSelectSameVal(MachineInstr & MI)2716 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
2717 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2718 // Match (cond ? x : x)
2719 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2720 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2721 MRI);
2722 }
2723
matchBinOpSameVal(MachineInstr & MI)2724 bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
2725 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2726 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2727 MRI);
2728 }
2729
matchOperandIsZero(MachineInstr & MI,unsigned OpIdx)2730 bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
2731 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2732 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2733 MRI);
2734 }
2735
matchOperandIsUndef(MachineInstr & MI,unsigned OpIdx)2736 bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
2737 MachineOperand &MO = MI.getOperand(OpIdx);
2738 return MO.isReg() &&
2739 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2740 }
2741
matchOperandIsKnownToBeAPowerOfTwo(MachineInstr & MI,unsigned OpIdx)2742 bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
2743 unsigned OpIdx) {
2744 MachineOperand &MO = MI.getOperand(OpIdx);
2745 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2746 }
2747
replaceInstWithFConstant(MachineInstr & MI,double C)2748 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
2749 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2750 Builder.setInstr(MI);
2751 Builder.buildFConstant(MI.getOperand(0), C);
2752 MI.eraseFromParent();
2753 return true;
2754 }
2755
replaceInstWithConstant(MachineInstr & MI,int64_t C)2756 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
2757 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2758 Builder.setInstr(MI);
2759 Builder.buildConstant(MI.getOperand(0), C);
2760 MI.eraseFromParent();
2761 return true;
2762 }
2763
replaceInstWithUndef(MachineInstr & MI)2764 bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
2765 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2766 Builder.setInstr(MI);
2767 Builder.buildUndef(MI.getOperand(0));
2768 MI.eraseFromParent();
2769 return true;
2770 }
2771
matchSimplifyAddToSub(MachineInstr & MI,std::tuple<Register,Register> & MatchInfo)2772 bool CombinerHelper::matchSimplifyAddToSub(
2773 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2774 Register LHS = MI.getOperand(1).getReg();
2775 Register RHS = MI.getOperand(2).getReg();
2776 Register &NewLHS = std::get<0>(MatchInfo);
2777 Register &NewRHS = std::get<1>(MatchInfo);
2778
2779 // Helper lambda to check for opportunities for
2780 // ((0-A) + B) -> B - A
2781 // (A + (0-B)) -> A - B
2782 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2783 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2784 return false;
2785 NewLHS = MaybeNewLHS;
2786 return true;
2787 };
2788
2789 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2790 }
2791
matchCombineInsertVecElts(MachineInstr & MI,SmallVectorImpl<Register> & MatchInfo)2792 bool CombinerHelper::matchCombineInsertVecElts(
2793 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
2794 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2795 "Invalid opcode");
2796 Register DstReg = MI.getOperand(0).getReg();
2797 LLT DstTy = MRI.getType(DstReg);
2798 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2799 unsigned NumElts = DstTy.getNumElements();
2800 // If this MI is part of a sequence of insert_vec_elts, then
2801 // don't do the combine in the middle of the sequence.
2802 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2803 TargetOpcode::G_INSERT_VECTOR_ELT)
2804 return false;
2805 MachineInstr *CurrInst = &MI;
2806 MachineInstr *TmpInst;
2807 int64_t IntImm;
2808 Register TmpReg;
2809 MatchInfo.resize(NumElts);
2810 while (mi_match(
2811 CurrInst->getOperand(0).getReg(), MRI,
2812 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2813 if (IntImm >= NumElts)
2814 return false;
2815 if (!MatchInfo[IntImm])
2816 MatchInfo[IntImm] = TmpReg;
2817 CurrInst = TmpInst;
2818 }
2819 // Variable index.
2820 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2821 return false;
2822 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2823 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
2824 if (!MatchInfo[I - 1].isValid())
2825 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
2826 }
2827 return true;
2828 }
2829 // If we didn't end in a G_IMPLICIT_DEF, bail out.
2830 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
2831 }
2832
applyCombineInsertVecElts(MachineInstr & MI,SmallVectorImpl<Register> & MatchInfo)2833 bool CombinerHelper::applyCombineInsertVecElts(
2834 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
2835 Builder.setInstr(MI);
2836 Register UndefReg;
2837 auto GetUndef = [&]() {
2838 if (UndefReg)
2839 return UndefReg;
2840 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2841 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
2842 return UndefReg;
2843 };
2844 for (unsigned I = 0; I < MatchInfo.size(); ++I) {
2845 if (!MatchInfo[I])
2846 MatchInfo[I] = GetUndef();
2847 }
2848 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
2849 MI.eraseFromParent();
2850 return true;
2851 }
2852
applySimplifyAddToSub(MachineInstr & MI,std::tuple<Register,Register> & MatchInfo)2853 bool CombinerHelper::applySimplifyAddToSub(
2854 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2855 Builder.setInstr(MI);
2856 Register SubLHS, SubRHS;
2857 std::tie(SubLHS, SubRHS) = MatchInfo;
2858 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
2859 MI.eraseFromParent();
2860 return true;
2861 }
2862
matchHoistLogicOpWithSameOpcodeHands(MachineInstr & MI,InstructionStepsMatchInfo & MatchInfo)2863 bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
2864 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
2865 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
2866 //
2867 // Creates the new hand + logic instruction (but does not insert them.)
2868 //
2869 // On success, MatchInfo is populated with the new instructions. These are
2870 // inserted in applyHoistLogicOpWithSameOpcodeHands.
2871 unsigned LogicOpcode = MI.getOpcode();
2872 assert(LogicOpcode == TargetOpcode::G_AND ||
2873 LogicOpcode == TargetOpcode::G_OR ||
2874 LogicOpcode == TargetOpcode::G_XOR);
2875 MachineIRBuilder MIB(MI);
2876 Register Dst = MI.getOperand(0).getReg();
2877 Register LHSReg = MI.getOperand(1).getReg();
2878 Register RHSReg = MI.getOperand(2).getReg();
2879
2880 // Don't recompute anything.
2881 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
2882 return false;
2883
2884 // Make sure we have (hand x, ...), (hand y, ...)
2885 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
2886 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
2887 if (!LeftHandInst || !RightHandInst)
2888 return false;
2889 unsigned HandOpcode = LeftHandInst->getOpcode();
2890 if (HandOpcode != RightHandInst->getOpcode())
2891 return false;
2892 if (!LeftHandInst->getOperand(1).isReg() ||
2893 !RightHandInst->getOperand(1).isReg())
2894 return false;
2895
2896 // Make sure the types match up, and if we're doing this post-legalization,
2897 // we end up with legal types.
2898 Register X = LeftHandInst->getOperand(1).getReg();
2899 Register Y = RightHandInst->getOperand(1).getReg();
2900 LLT XTy = MRI.getType(X);
2901 LLT YTy = MRI.getType(Y);
2902 if (XTy != YTy)
2903 return false;
2904 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
2905 return false;
2906
2907 // Optional extra source register.
2908 Register ExtraHandOpSrcReg;
2909 switch (HandOpcode) {
2910 default:
2911 return false;
2912 case TargetOpcode::G_ANYEXT:
2913 case TargetOpcode::G_SEXT:
2914 case TargetOpcode::G_ZEXT: {
2915 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
2916 break;
2917 }
2918 case TargetOpcode::G_AND:
2919 case TargetOpcode::G_ASHR:
2920 case TargetOpcode::G_LSHR:
2921 case TargetOpcode::G_SHL: {
2922 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
2923 MachineOperand &ZOp = LeftHandInst->getOperand(2);
2924 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
2925 return false;
2926 ExtraHandOpSrcReg = ZOp.getReg();
2927 break;
2928 }
2929 }
2930
2931 // Record the steps to build the new instructions.
2932 //
2933 // Steps to build (logic x, y)
2934 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
2935 OperandBuildSteps LogicBuildSteps = {
2936 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
2937 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
2938 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
2939 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
2940
2941 // Steps to build hand (logic x, y), ...z
2942 OperandBuildSteps HandBuildSteps = {
2943 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
2944 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
2945 if (ExtraHandOpSrcReg.isValid())
2946 HandBuildSteps.push_back(
2947 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
2948 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
2949
2950 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
2951 return true;
2952 }
2953
applyBuildInstructionSteps(MachineInstr & MI,InstructionStepsMatchInfo & MatchInfo)2954 bool CombinerHelper::applyBuildInstructionSteps(
2955 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
2956 assert(MatchInfo.InstrsToBuild.size() &&
2957 "Expected at least one instr to build?");
2958 Builder.setInstr(MI);
2959 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
2960 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
2961 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
2962 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
2963 for (auto &OperandFn : InstrToBuild.OperandFns)
2964 OperandFn(Instr);
2965 }
2966 MI.eraseFromParent();
2967 return true;
2968 }
2969
matchAshrShlToSextInreg(MachineInstr & MI,std::tuple<Register,int64_t> & MatchInfo)2970 bool CombinerHelper::matchAshrShlToSextInreg(
2971 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
2972 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2973 int64_t ShlCst, AshrCst;
2974 Register Src;
2975 // FIXME: detect splat constant vectors.
2976 if (!mi_match(MI.getOperand(0).getReg(), MRI,
2977 m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
2978 return false;
2979 if (ShlCst != AshrCst)
2980 return false;
2981 if (!isLegalOrBeforeLegalizer(
2982 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
2983 return false;
2984 MatchInfo = std::make_tuple(Src, ShlCst);
2985 return true;
2986 }
applyAshShlToSextInreg(MachineInstr & MI,std::tuple<Register,int64_t> & MatchInfo)2987 bool CombinerHelper::applyAshShlToSextInreg(
2988 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
2989 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2990 Register Src;
2991 int64_t ShiftAmt;
2992 std::tie(Src, ShiftAmt) = MatchInfo;
2993 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
2994 Builder.setInstrAndDebugLoc(MI);
2995 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
2996 MI.eraseFromParent();
2997 return true;
2998 }
2999
matchRedundantAnd(MachineInstr & MI,Register & Replacement)3000 bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
3001 Register &Replacement) {
3002 // Given
3003 //
3004 // %y:_(sN) = G_SOMETHING
3005 // %x:_(sN) = G_SOMETHING
3006 // %res:_(sN) = G_AND %x, %y
3007 //
3008 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3009 //
3010 // Patterns like this can appear as a result of legalization. E.g.
3011 //
3012 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3013 // %one:_(s32) = G_CONSTANT i32 1
3014 // %and:_(s32) = G_AND %cmp, %one
3015 //
3016 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3017 assert(MI.getOpcode() == TargetOpcode::G_AND);
3018 if (!KB)
3019 return false;
3020
3021 Register AndDst = MI.getOperand(0).getReg();
3022 LLT DstTy = MRI.getType(AndDst);
3023
3024 // FIXME: This should be removed once GISelKnownBits supports vectors.
3025 if (DstTy.isVector())
3026 return false;
3027
3028 Register LHS = MI.getOperand(1).getReg();
3029 Register RHS = MI.getOperand(2).getReg();
3030 KnownBits LHSBits = KB->getKnownBits(LHS);
3031 KnownBits RHSBits = KB->getKnownBits(RHS);
3032
3033 // Check that x & Mask == x.
3034 // x & 1 == x, always
3035 // x & 0 == x, only if x is also 0
3036 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3037 //
3038 // Check if we can replace AndDst with the LHS of the G_AND
3039 if (canReplaceReg(AndDst, LHS, MRI) &&
3040 (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
3041 Replacement = LHS;
3042 return true;
3043 }
3044
3045 // Check if we can replace AndDst with the RHS of the G_AND
3046 if (canReplaceReg(AndDst, RHS, MRI) &&
3047 (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
3048 Replacement = RHS;
3049 return true;
3050 }
3051
3052 return false;
3053 }
3054
matchRedundantOr(MachineInstr & MI,Register & Replacement)3055 bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
3056 // Given
3057 //
3058 // %y:_(sN) = G_SOMETHING
3059 // %x:_(sN) = G_SOMETHING
3060 // %res:_(sN) = G_OR %x, %y
3061 //
3062 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3063 assert(MI.getOpcode() == TargetOpcode::G_OR);
3064 if (!KB)
3065 return false;
3066
3067 Register OrDst = MI.getOperand(0).getReg();
3068 LLT DstTy = MRI.getType(OrDst);
3069
3070 // FIXME: This should be removed once GISelKnownBits supports vectors.
3071 if (DstTy.isVector())
3072 return false;
3073
3074 Register LHS = MI.getOperand(1).getReg();
3075 Register RHS = MI.getOperand(2).getReg();
3076 KnownBits LHSBits = KB->getKnownBits(LHS);
3077 KnownBits RHSBits = KB->getKnownBits(RHS);
3078
3079 // Check that x | Mask == x.
3080 // x | 0 == x, always
3081 // x | 1 == x, only if x is also 1
3082 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3083 //
3084 // Check if we can replace OrDst with the LHS of the G_OR
3085 if (canReplaceReg(OrDst, LHS, MRI) &&
3086 (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
3087 Replacement = LHS;
3088 return true;
3089 }
3090
3091 // Check if we can replace OrDst with the RHS of the G_OR
3092 if (canReplaceReg(OrDst, RHS, MRI) &&
3093 (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
3094 Replacement = RHS;
3095 return true;
3096 }
3097
3098 return false;
3099 }
3100
matchRedundantSExtInReg(MachineInstr & MI)3101 bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
3102 // If the input is already sign extended, just drop the extension.
3103 Register Src = MI.getOperand(1).getReg();
3104 unsigned ExtBits = MI.getOperand(2).getImm();
3105 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3106 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3107 }
3108
isConstValidTrue(const TargetLowering & TLI,unsigned ScalarSizeBits,int64_t Cst,bool IsVector,bool IsFP)3109 static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3110 int64_t Cst, bool IsVector, bool IsFP) {
3111 // For i1, Cst will always be -1 regardless of boolean contents.
3112 return (ScalarSizeBits == 1 && Cst == -1) ||
3113 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3114 }
3115
matchNotCmp(MachineInstr & MI,SmallVectorImpl<Register> & RegsToNegate)3116 bool CombinerHelper::matchNotCmp(MachineInstr &MI,
3117 SmallVectorImpl<Register> &RegsToNegate) {
3118 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3119 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3120 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3121 Register XorSrc;
3122 Register CstReg;
3123 // We match xor(src, true) here.
3124 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3125 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3126 return false;
3127
3128 if (!MRI.hasOneNonDBGUse(XorSrc))
3129 return false;
3130
3131 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3132 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3133 // list of tree nodes to visit.
3134 RegsToNegate.push_back(XorSrc);
3135 // Remember whether the comparisons are all integer or all floating point.
3136 bool IsInt = false;
3137 bool IsFP = false;
3138 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3139 Register Reg = RegsToNegate[I];
3140 if (!MRI.hasOneNonDBGUse(Reg))
3141 return false;
3142 MachineInstr *Def = MRI.getVRegDef(Reg);
3143 switch (Def->getOpcode()) {
3144 default:
3145 // Don't match if the tree contains anything other than ANDs, ORs and
3146 // comparisons.
3147 return false;
3148 case TargetOpcode::G_ICMP:
3149 if (IsFP)
3150 return false;
3151 IsInt = true;
3152 // When we apply the combine we will invert the predicate.
3153 break;
3154 case TargetOpcode::G_FCMP:
3155 if (IsInt)
3156 return false;
3157 IsFP = true;
3158 // When we apply the combine we will invert the predicate.
3159 break;
3160 case TargetOpcode::G_AND:
3161 case TargetOpcode::G_OR:
3162 // Implement De Morgan's laws:
3163 // ~(x & y) -> ~x | ~y
3164 // ~(x | y) -> ~x & ~y
3165 // When we apply the combine we will change the opcode and recursively
3166 // negate the operands.
3167 RegsToNegate.push_back(Def->getOperand(1).getReg());
3168 RegsToNegate.push_back(Def->getOperand(2).getReg());
3169 break;
3170 }
3171 }
3172
3173 // Now we know whether the comparisons are integer or floating point, check
3174 // the constant in the xor.
3175 int64_t Cst;
3176 if (Ty.isVector()) {
3177 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3178 auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
3179 if (!MaybeCst)
3180 return false;
3181 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3182 return false;
3183 } else {
3184 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3185 return false;
3186 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3187 return false;
3188 }
3189
3190 return true;
3191 }
3192
applyNotCmp(MachineInstr & MI,SmallVectorImpl<Register> & RegsToNegate)3193 bool CombinerHelper::applyNotCmp(MachineInstr &MI,
3194 SmallVectorImpl<Register> &RegsToNegate) {
3195 for (Register Reg : RegsToNegate) {
3196 MachineInstr *Def = MRI.getVRegDef(Reg);
3197 Observer.changingInstr(*Def);
3198 // For each comparison, invert the opcode. For each AND and OR, change the
3199 // opcode.
3200 switch (Def->getOpcode()) {
3201 default:
3202 llvm_unreachable("Unexpected opcode");
3203 case TargetOpcode::G_ICMP:
3204 case TargetOpcode::G_FCMP: {
3205 MachineOperand &PredOp = Def->getOperand(1);
3206 CmpInst::Predicate NewP = CmpInst::getInversePredicate(
3207 (CmpInst::Predicate)PredOp.getPredicate());
3208 PredOp.setPredicate(NewP);
3209 break;
3210 }
3211 case TargetOpcode::G_AND:
3212 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3213 break;
3214 case TargetOpcode::G_OR:
3215 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3216 break;
3217 }
3218 Observer.changedInstr(*Def);
3219 }
3220
3221 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3222 MI.eraseFromParent();
3223 return true;
3224 }
3225
matchXorOfAndWithSameReg(MachineInstr & MI,std::pair<Register,Register> & MatchInfo)3226 bool CombinerHelper::matchXorOfAndWithSameReg(
3227 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3228 // Match (xor (and x, y), y) (or any of its commuted cases)
3229 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3230 Register &X = MatchInfo.first;
3231 Register &Y = MatchInfo.second;
3232 Register AndReg = MI.getOperand(1).getReg();
3233 Register SharedReg = MI.getOperand(2).getReg();
3234
3235 // Find a G_AND on either side of the G_XOR.
3236 // Look for one of
3237 //
3238 // (xor (and x, y), SharedReg)
3239 // (xor SharedReg, (and x, y))
3240 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3241 std::swap(AndReg, SharedReg);
3242 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3243 return false;
3244 }
3245
3246 // Only do this if we'll eliminate the G_AND.
3247 if (!MRI.hasOneNonDBGUse(AndReg))
3248 return false;
3249
3250 // We can combine if SharedReg is the same as either the LHS or RHS of the
3251 // G_AND.
3252 if (Y != SharedReg)
3253 std::swap(X, Y);
3254 return Y == SharedReg;
3255 }
3256
applyXorOfAndWithSameReg(MachineInstr & MI,std::pair<Register,Register> & MatchInfo)3257 bool CombinerHelper::applyXorOfAndWithSameReg(
3258 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3259 // Fold (xor (and x, y), y) -> (and (not x), y)
3260 Builder.setInstrAndDebugLoc(MI);
3261 Register X, Y;
3262 std::tie(X, Y) = MatchInfo;
3263 auto Not = Builder.buildNot(MRI.getType(X), X);
3264 Observer.changingInstr(MI);
3265 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3266 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3267 MI.getOperand(2).setReg(Y);
3268 Observer.changedInstr(MI);
3269 return true;
3270 }
3271
matchPtrAddZero(MachineInstr & MI)3272 bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
3273 Register DstReg = MI.getOperand(0).getReg();
3274 LLT Ty = MRI.getType(DstReg);
3275 const DataLayout &DL = Builder.getMF().getDataLayout();
3276
3277 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3278 return false;
3279
3280 if (Ty.isPointer()) {
3281 auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
3282 return ConstVal && *ConstVal == 0;
3283 }
3284
3285 assert(Ty.isVector() && "Expecting a vector type");
3286 const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
3287 return isBuildVectorAllZeros(*VecMI, MRI);
3288 }
3289
applyPtrAddZero(MachineInstr & MI)3290 bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
3291 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
3292 Builder.setInstrAndDebugLoc(MI);
3293 Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
3294 MI.eraseFromParent();
3295 return true;
3296 }
3297
3298 /// The second source operand is known to be a power of 2.
applySimplifyURemByPow2(MachineInstr & MI)3299 bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
3300 Register DstReg = MI.getOperand(0).getReg();
3301 Register Src0 = MI.getOperand(1).getReg();
3302 Register Pow2Src1 = MI.getOperand(2).getReg();
3303 LLT Ty = MRI.getType(DstReg);
3304 Builder.setInstrAndDebugLoc(MI);
3305
3306 // Fold (urem x, pow2) -> (and x, pow2-1)
3307 auto NegOne = Builder.buildConstant(Ty, -1);
3308 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3309 Builder.buildAnd(DstReg, Src0, Add);
3310 MI.eraseFromParent();
3311 return true;
3312 }
3313
3314 Optional<SmallVector<Register, 8>>
findCandidatesForLoadOrCombine(const MachineInstr * Root) const3315 CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3316 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3317 // We want to detect if Root is part of a tree which represents a bunch
3318 // of loads being merged into a larger load. We'll try to recognize patterns
3319 // like, for example:
3320 //
3321 // Reg Reg
3322 // \ /
3323 // OR_1 Reg
3324 // \ /
3325 // OR_2
3326 // \ Reg
3327 // .. /
3328 // Root
3329 //
3330 // Reg Reg Reg Reg
3331 // \ / \ /
3332 // OR_1 OR_2
3333 // \ /
3334 // \ /
3335 // ...
3336 // Root
3337 //
3338 // Each "Reg" may have been produced by a load + some arithmetic. This
3339 // function will save each of them.
3340 SmallVector<Register, 8> RegsToVisit;
3341 SmallVector<const MachineInstr *, 7> Ors = {Root};
3342
3343 // In the "worst" case, we're dealing with a load for each byte. So, there
3344 // are at most #bytes - 1 ORs.
3345 const unsigned MaxIter =
3346 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3347 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3348 if (Ors.empty())
3349 break;
3350 const MachineInstr *Curr = Ors.pop_back_val();
3351 Register OrLHS = Curr->getOperand(1).getReg();
3352 Register OrRHS = Curr->getOperand(2).getReg();
3353
3354 // In the combine, we want to elimate the entire tree.
3355 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3356 return None;
3357
3358 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3359 // something that may be a load + arithmetic.
3360 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3361 Ors.push_back(Or);
3362 else
3363 RegsToVisit.push_back(OrLHS);
3364 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3365 Ors.push_back(Or);
3366 else
3367 RegsToVisit.push_back(OrRHS);
3368 }
3369
3370 // We're going to try and merge each register into a wider power-of-2 type,
3371 // so we ought to have an even number of registers.
3372 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3373 return None;
3374 return RegsToVisit;
3375 }
3376
3377 /// Helper function for findLoadOffsetsForLoadOrCombine.
3378 ///
3379 /// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3380 /// and then moving that value into a specific byte offset.
3381 ///
3382 /// e.g. x[i] << 24
3383 ///
3384 /// \returns The load instruction and the byte offset it is moved into.
3385 static Optional<std::pair<MachineInstr *, int64_t>>
matchLoadAndBytePosition(Register Reg,unsigned MemSizeInBits,const MachineRegisterInfo & MRI)3386 matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3387 const MachineRegisterInfo &MRI) {
3388 assert(MRI.hasOneNonDBGUse(Reg) &&
3389 "Expected Reg to only have one non-debug use?");
3390 Register MaybeLoad;
3391 int64_t Shift;
3392 if (!mi_match(Reg, MRI,
3393 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3394 Shift = 0;
3395 MaybeLoad = Reg;
3396 }
3397
3398 if (Shift % MemSizeInBits != 0)
3399 return None;
3400
3401 // TODO: Handle other types of loads.
3402 auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI);
3403 if (!Load)
3404 return None;
3405
3406 const auto &MMO = **Load->memoperands_begin();
3407 if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits)
3408 return None;
3409
3410 return std::make_pair(Load, Shift / MemSizeInBits);
3411 }
3412
3413 Optional<std::pair<MachineInstr *, int64_t>>
findLoadOffsetsForLoadOrCombine(SmallDenseMap<int64_t,int64_t,8> & MemOffset2Idx,const SmallVector<Register,8> & RegsToVisit,const unsigned MemSizeInBits)3414 CombinerHelper::findLoadOffsetsForLoadOrCombine(
3415 SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
3416 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3417
3418 // Each load found for the pattern. There should be one for each RegsToVisit.
3419 SmallSetVector<const MachineInstr *, 8> Loads;
3420
3421 // The lowest index used in any load. (The lowest "i" for each x[i].)
3422 int64_t LowestIdx = INT64_MAX;
3423
3424 // The load which uses the lowest index.
3425 MachineInstr *LowestIdxLoad = nullptr;
3426
3427 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3428 SmallSet<int64_t, 8> SeenIdx;
3429
3430 // Ensure each load is in the same MBB.
3431 // TODO: Support multiple MachineBasicBlocks.
3432 MachineBasicBlock *MBB = nullptr;
3433 const MachineMemOperand *MMO = nullptr;
3434
3435 // Earliest instruction-order load in the pattern.
3436 MachineInstr *EarliestLoad = nullptr;
3437
3438 // Latest instruction-order load in the pattern.
3439 MachineInstr *LatestLoad = nullptr;
3440
3441 // Base pointer which every load should share.
3442 Register BasePtr;
3443
3444 // We want to find a load for each register. Each load should have some
3445 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3446 // track of the load which uses the lowest index. Later, we will check if we
3447 // can use its pointer in the final, combined load.
3448 for (auto Reg : RegsToVisit) {
3449 // Find the load, and find the position that it will end up in (e.g. a
3450 // shifted) value.
3451 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3452 if (!LoadAndPos)
3453 return None;
3454 MachineInstr *Load;
3455 int64_t DstPos;
3456 std::tie(Load, DstPos) = *LoadAndPos;
3457
3458 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3459 // it is difficult to check for stores/calls/etc between loads.
3460 MachineBasicBlock *LoadMBB = Load->getParent();
3461 if (!MBB)
3462 MBB = LoadMBB;
3463 if (LoadMBB != MBB)
3464 return None;
3465
3466 // Make sure that the MachineMemOperands of every seen load are compatible.
3467 const MachineMemOperand *LoadMMO = *Load->memoperands_begin();
3468 if (!MMO)
3469 MMO = LoadMMO;
3470 if (MMO->getAddrSpace() != LoadMMO->getAddrSpace())
3471 return None;
3472
3473 // Find out what the base pointer and index for the load is.
3474 Register LoadPtr;
3475 int64_t Idx;
3476 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3477 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3478 LoadPtr = Load->getOperand(1).getReg();
3479 Idx = 0;
3480 }
3481
3482 // Don't combine things like a[i], a[i] -> a bigger load.
3483 if (!SeenIdx.insert(Idx).second)
3484 return None;
3485
3486 // Every load must share the same base pointer; don't combine things like:
3487 //
3488 // a[i], b[i + 1] -> a bigger load.
3489 if (!BasePtr.isValid())
3490 BasePtr = LoadPtr;
3491 if (BasePtr != LoadPtr)
3492 return None;
3493
3494 if (Idx < LowestIdx) {
3495 LowestIdx = Idx;
3496 LowestIdxLoad = Load;
3497 }
3498
3499 // Keep track of the byte offset that this load ends up at. If we have seen
3500 // the byte offset, then stop here. We do not want to combine:
3501 //
3502 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3503 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3504 return None;
3505 Loads.insert(Load);
3506
3507 // Keep track of the position of the earliest/latest loads in the pattern.
3508 // We will check that there are no load fold barriers between them later
3509 // on.
3510 //
3511 // FIXME: Is there a better way to check for load fold barriers?
3512 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3513 EarliestLoad = Load;
3514 if (!LatestLoad || dominates(*LatestLoad, *Load))
3515 LatestLoad = Load;
3516 }
3517
3518 // We found a load for each register. Let's check if each load satisfies the
3519 // pattern.
3520 assert(Loads.size() == RegsToVisit.size() &&
3521 "Expected to find a load for each register?");
3522 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3523 LatestLoad && "Expected at least two loads?");
3524
3525 // Check if there are any stores, calls, etc. between any of the loads. If
3526 // there are, then we can't safely perform the combine.
3527 //
3528 // MaxIter is chosen based off the (worst case) number of iterations it
3529 // typically takes to succeed in the LLVM test suite plus some padding.
3530 //
3531 // FIXME: Is there a better way to check for load fold barriers?
3532 const unsigned MaxIter = 20;
3533 unsigned Iter = 0;
3534 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3535 LatestLoad->getIterator())) {
3536 if (Loads.count(&MI))
3537 continue;
3538 if (MI.isLoadFoldBarrier())
3539 return None;
3540 if (Iter++ == MaxIter)
3541 return None;
3542 }
3543
3544 return std::make_pair(LowestIdxLoad, LowestIdx);
3545 }
3546
matchLoadOrCombine(MachineInstr & MI,std::function<void (MachineIRBuilder &)> & MatchInfo)3547 bool CombinerHelper::matchLoadOrCombine(
3548 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3549 assert(MI.getOpcode() == TargetOpcode::G_OR);
3550 MachineFunction &MF = *MI.getMF();
3551 // Assuming a little-endian target, transform:
3552 // s8 *a = ...
3553 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3554 // =>
3555 // s32 val = *((i32)a)
3556 //
3557 // s8 *a = ...
3558 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3559 // =>
3560 // s32 val = BSWAP(*((s32)a))
3561 Register Dst = MI.getOperand(0).getReg();
3562 LLT Ty = MRI.getType(Dst);
3563 if (Ty.isVector())
3564 return false;
3565
3566 // We need to combine at least two loads into this type. Since the smallest
3567 // possible load is into a byte, we need at least a 16-bit wide type.
3568 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3569 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3570 return false;
3571
3572 // Match a collection of non-OR instructions in the pattern.
3573 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3574 if (!RegsToVisit)
3575 return false;
3576
3577 // We have a collection of non-OR instructions. Figure out how wide each of
3578 // the small loads should be based off of the number of potential loads we
3579 // found.
3580 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3581 if (NarrowMemSizeInBits % 8 != 0)
3582 return false;
3583
3584 // Check if each register feeding into each OR is a load from the same
3585 // base pointer + some arithmetic.
3586 //
3587 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3588 //
3589 // Also verify that each of these ends up putting a[i] into the same memory
3590 // offset as a load into a wide type would.
3591 SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
3592 MachineInstr *LowestIdxLoad;
3593 int64_t LowestIdx;
3594 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3595 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3596 if (!MaybeLoadInfo)
3597 return false;
3598 std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo;
3599
3600 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3601 // we found before, check if this corresponds to a big or little endian byte
3602 // pattern. If it does, then we can represent it using a load + possibly a
3603 // BSWAP.
3604 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3605 Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3606 if (!IsBigEndian.hasValue())
3607 return false;
3608 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3609 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3610 return false;
3611
3612 // Make sure that the load from the lowest index produces offset 0 in the
3613 // final value.
3614 //
3615 // This ensures that we won't combine something like this:
3616 //
3617 // load x[i] -> byte 2
3618 // load x[i+1] -> byte 0 ---> wide_load x[i]
3619 // load x[i+2] -> byte 1
3620 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3621 const unsigned ZeroByteOffset =
3622 *IsBigEndian
3623 ? bigEndianByteAt(NumLoadsInTy, 0)
3624 : littleEndianByteAt(NumLoadsInTy, 0);
3625 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3626 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3627 ZeroOffsetIdx->second != LowestIdx)
3628 return false;
3629
3630 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3631 // may not use index 0.
3632 Register Ptr = LowestIdxLoad->getOperand(1).getReg();
3633 const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin();
3634 LegalityQuery::MemDesc MMDesc;
3635 MMDesc.SizeInBits = WideMemSizeInBits;
3636 MMDesc.AlignInBits = MMO.getAlign().value() * 8;
3637 MMDesc.Ordering = MMO.getOrdering();
3638 if (!isLegalOrBeforeLegalizer(
3639 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3640 return false;
3641 auto PtrInfo = MMO.getPointerInfo();
3642 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3643
3644 // Load must be allowed and fast on the target.
3645 LLVMContext &C = MF.getFunction().getContext();
3646 auto &DL = MF.getDataLayout();
3647 bool Fast = false;
3648 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3649 !Fast)
3650 return false;
3651
3652 MatchInfo = [=](MachineIRBuilder &MIB) {
3653 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3654 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3655 if (NeedsBSwap)
3656 MIB.buildBSwap(Dst, LoadDst);
3657 };
3658 return true;
3659 }
3660
matchExtendThroughPhis(MachineInstr & MI,MachineInstr * & ExtMI)3661 bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
3662 MachineInstr *&ExtMI) {
3663 assert(MI.getOpcode() == TargetOpcode::G_PHI);
3664
3665 Register DstReg = MI.getOperand(0).getReg();
3666
3667 // TODO: Extending a vector may be expensive, don't do this until heuristics
3668 // are better.
3669 if (MRI.getType(DstReg).isVector())
3670 return false;
3671
3672 // Try to match a phi, whose only use is an extend.
3673 if (!MRI.hasOneNonDBGUse(DstReg))
3674 return false;
3675 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3676 switch (ExtMI->getOpcode()) {
3677 case TargetOpcode::G_ANYEXT:
3678 return true; // G_ANYEXT is usually free.
3679 case TargetOpcode::G_ZEXT:
3680 case TargetOpcode::G_SEXT:
3681 break;
3682 default:
3683 return false;
3684 }
3685
3686 // If the target is likely to fold this extend away, don't propagate.
3687 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
3688 return false;
3689
3690 // We don't want to propagate the extends unless there's a good chance that
3691 // they'll be optimized in some way.
3692 // Collect the unique incoming values.
3693 SmallPtrSet<MachineInstr *, 4> InSrcs;
3694 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
3695 auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI);
3696 switch (DefMI->getOpcode()) {
3697 case TargetOpcode::G_LOAD:
3698 case TargetOpcode::G_TRUNC:
3699 case TargetOpcode::G_SEXT:
3700 case TargetOpcode::G_ZEXT:
3701 case TargetOpcode::G_ANYEXT:
3702 case TargetOpcode::G_CONSTANT:
3703 InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI));
3704 // Don't try to propagate if there are too many places to create new
3705 // extends, chances are it'll increase code size.
3706 if (InSrcs.size() > 2)
3707 return false;
3708 break;
3709 default:
3710 return false;
3711 }
3712 }
3713 return true;
3714 }
3715
applyExtendThroughPhis(MachineInstr & MI,MachineInstr * & ExtMI)3716 bool CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
3717 MachineInstr *&ExtMI) {
3718 assert(MI.getOpcode() == TargetOpcode::G_PHI);
3719 Register DstReg = ExtMI->getOperand(0).getReg();
3720 LLT ExtTy = MRI.getType(DstReg);
3721
3722 // Propagate the extension into the block of each incoming reg's block.
3723 // Use a SetVector here because PHIs can have duplicate edges, and we want
3724 // deterministic iteration order.
3725 SmallSetVector<MachineInstr *, 8> SrcMIs;
3726 SmallDenseMap<MachineInstr *, MachineInstr *, 8> OldToNewSrcMap;
3727 for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) {
3728 auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg());
3729 if (!SrcMIs.insert(SrcMI))
3730 continue;
3731
3732 // Build an extend after each src inst.
3733 auto *MBB = SrcMI->getParent();
3734 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
3735 if (InsertPt != MBB->end() && InsertPt->isPHI())
3736 InsertPt = MBB->getFirstNonPHI();
3737
3738 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
3739 Builder.setDebugLoc(MI.getDebugLoc());
3740 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy,
3741 SrcMI->getOperand(0).getReg());
3742 OldToNewSrcMap[SrcMI] = NewExt;
3743 }
3744
3745 // Create a new phi with the extended inputs.
3746 Builder.setInstrAndDebugLoc(MI);
3747 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
3748 NewPhi.addDef(DstReg);
3749 for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) {
3750 auto &MO = MI.getOperand(SrcIdx);
3751 if (!MO.isReg()) {
3752 NewPhi.addMBB(MO.getMBB());
3753 continue;
3754 }
3755 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
3756 NewPhi.addUse(NewSrc->getOperand(0).getReg());
3757 }
3758 Builder.insertInstr(NewPhi);
3759 ExtMI->eraseFromParent();
3760 return true;
3761 }
3762
matchExtractVecEltBuildVec(MachineInstr & MI,Register & Reg)3763 bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
3764 Register &Reg) {
3765 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
3766 // If we have a constant index, look for a G_BUILD_VECTOR source
3767 // and find the source register that the index maps to.
3768 Register SrcVec = MI.getOperand(1).getReg();
3769 LLT SrcTy = MRI.getType(SrcVec);
3770 if (!isLegalOrBeforeLegalizer(
3771 {TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}}))
3772 return false;
3773
3774 auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
3775 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
3776 return false;
3777
3778 unsigned VecIdx = Cst->Value.getZExtValue();
3779 MachineInstr *BuildVecMI =
3780 getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, SrcVec, MRI);
3781 if (!BuildVecMI) {
3782 BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR_TRUNC, SrcVec, MRI);
3783 if (!BuildVecMI)
3784 return false;
3785 LLT ScalarTy = MRI.getType(BuildVecMI->getOperand(1).getReg());
3786 if (!isLegalOrBeforeLegalizer(
3787 {TargetOpcode::G_BUILD_VECTOR_TRUNC, {SrcTy, ScalarTy}}))
3788 return false;
3789 }
3790
3791 EVT Ty(getMVTForLLT(SrcTy));
3792 if (!MRI.hasOneNonDBGUse(SrcVec) &&
3793 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
3794 return false;
3795
3796 Reg = BuildVecMI->getOperand(VecIdx + 1).getReg();
3797 return true;
3798 }
3799
applyExtractVecEltBuildVec(MachineInstr & MI,Register & Reg)3800 void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
3801 Register &Reg) {
3802 // Check the type of the register, since it may have come from a
3803 // G_BUILD_VECTOR_TRUNC.
3804 LLT ScalarTy = MRI.getType(Reg);
3805 Register DstReg = MI.getOperand(0).getReg();
3806 LLT DstTy = MRI.getType(DstReg);
3807
3808 Builder.setInstrAndDebugLoc(MI);
3809 if (ScalarTy != DstTy) {
3810 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
3811 Builder.buildTrunc(DstReg, Reg);
3812 MI.eraseFromParent();
3813 return;
3814 }
3815 replaceSingleDefInstWithReg(MI, Reg);
3816 }
3817
matchExtractAllEltsFromBuildVector(MachineInstr & MI,SmallVectorImpl<std::pair<Register,MachineInstr * >> & SrcDstPairs)3818 bool CombinerHelper::matchExtractAllEltsFromBuildVector(
3819 MachineInstr &MI,
3820 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
3821 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3822 // This combine tries to find build_vector's which have every source element
3823 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
3824 // the masked load scalarization is run late in the pipeline. There's already
3825 // a combine for a similar pattern starting from the extract, but that
3826 // doesn't attempt to do it if there are multiple uses of the build_vector,
3827 // which in this case is true. Starting the combine from the build_vector
3828 // feels more natural than trying to find sibling nodes of extracts.
3829 // E.g.
3830 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
3831 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
3832 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
3833 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
3834 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
3835 // ==>
3836 // replace ext{1,2,3,4} with %s{1,2,3,4}
3837
3838 Register DstReg = MI.getOperand(0).getReg();
3839 LLT DstTy = MRI.getType(DstReg);
3840 unsigned NumElts = DstTy.getNumElements();
3841
3842 SmallBitVector ExtractedElts(NumElts);
3843 for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg),
3844 MRI.use_instr_nodbg_end())) {
3845 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
3846 return false;
3847 auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI);
3848 if (!Cst)
3849 return false;
3850 unsigned Idx = Cst.getValue().getZExtValue();
3851 if (Idx >= NumElts)
3852 return false; // Out of range.
3853 ExtractedElts.set(Idx);
3854 SrcDstPairs.emplace_back(
3855 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
3856 }
3857 // Match if every element was extracted.
3858 return ExtractedElts.all();
3859 }
3860
applyExtractAllEltsFromBuildVector(MachineInstr & MI,SmallVectorImpl<std::pair<Register,MachineInstr * >> & SrcDstPairs)3861 void CombinerHelper::applyExtractAllEltsFromBuildVector(
3862 MachineInstr &MI,
3863 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
3864 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3865 for (auto &Pair : SrcDstPairs) {
3866 auto *ExtMI = Pair.second;
3867 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
3868 ExtMI->eraseFromParent();
3869 }
3870 MI.eraseFromParent();
3871 }
3872
applyBuildFn(MachineInstr & MI,std::function<void (MachineIRBuilder &)> & MatchInfo)3873 bool CombinerHelper::applyBuildFn(
3874 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3875 Builder.setInstrAndDebugLoc(MI);
3876 MatchInfo(Builder);
3877 MI.eraseFromParent();
3878 return true;
3879 }
3880
3881 /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
matchFunnelShiftToRotate(MachineInstr & MI)3882 bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
3883 unsigned Opc = MI.getOpcode();
3884 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
3885 Register X = MI.getOperand(1).getReg();
3886 Register Y = MI.getOperand(2).getReg();
3887 if (X != Y)
3888 return false;
3889 unsigned RotateOpc =
3890 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
3891 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
3892 }
3893
applyFunnelShiftToRotate(MachineInstr & MI)3894 void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) {
3895 unsigned Opc = MI.getOpcode();
3896 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
3897 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
3898 Observer.changingInstr(MI);
3899 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
3900 : TargetOpcode::G_ROTR));
3901 MI.RemoveOperand(2);
3902 Observer.changedInstr(MI);
3903 }
3904
3905 // Fold (rot x, c) -> (rot x, c % BitSize)
matchRotateOutOfRange(MachineInstr & MI)3906 bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) {
3907 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
3908 MI.getOpcode() == TargetOpcode::G_ROTR);
3909 unsigned Bitsize =
3910 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
3911 Register AmtReg = MI.getOperand(2).getReg();
3912 bool OutOfRange = false;
3913 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
3914 if (auto *CI = dyn_cast<ConstantInt>(C))
3915 OutOfRange |= CI->getValue().uge(Bitsize);
3916 return true;
3917 };
3918 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
3919 }
3920
applyRotateOutOfRange(MachineInstr & MI)3921 void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) {
3922 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
3923 MI.getOpcode() == TargetOpcode::G_ROTR);
3924 unsigned Bitsize =
3925 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
3926 Builder.setInstrAndDebugLoc(MI);
3927 Register Amt = MI.getOperand(2).getReg();
3928 LLT AmtTy = MRI.getType(Amt);
3929 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
3930 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
3931 Observer.changingInstr(MI);
3932 MI.getOperand(2).setReg(Amt);
3933 Observer.changedInstr(MI);
3934 }
3935
matchICmpToTrueFalseKnownBits(MachineInstr & MI,int64_t & MatchInfo)3936 bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
3937 int64_t &MatchInfo) {
3938 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
3939 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3940 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
3941 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
3942 Optional<bool> KnownVal;
3943 switch (Pred) {
3944 default:
3945 llvm_unreachable("Unexpected G_ICMP predicate?");
3946 case CmpInst::ICMP_EQ:
3947 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
3948 break;
3949 case CmpInst::ICMP_NE:
3950 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
3951 break;
3952 case CmpInst::ICMP_SGE:
3953 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
3954 break;
3955 case CmpInst::ICMP_SGT:
3956 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
3957 break;
3958 case CmpInst::ICMP_SLE:
3959 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
3960 break;
3961 case CmpInst::ICMP_SLT:
3962 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
3963 break;
3964 case CmpInst::ICMP_UGE:
3965 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
3966 break;
3967 case CmpInst::ICMP_UGT:
3968 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
3969 break;
3970 case CmpInst::ICMP_ULE:
3971 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
3972 break;
3973 case CmpInst::ICMP_ULT:
3974 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
3975 break;
3976 }
3977 if (!KnownVal)
3978 return false;
3979 MatchInfo =
3980 *KnownVal
3981 ? getICmpTrueVal(getTargetLowering(),
3982 /*IsVector = */
3983 MRI.getType(MI.getOperand(0).getReg()).isVector(),
3984 /* IsFP = */ false)
3985 : 0;
3986 return true;
3987 }
3988
tryCombine(MachineInstr & MI)3989 bool CombinerHelper::tryCombine(MachineInstr &MI) {
3990 if (tryCombineCopy(MI))
3991 return true;
3992 if (tryCombineExtendingLoads(MI))
3993 return true;
3994 if (tryCombineIndexedLoadStore(MI))
3995 return true;
3996 return false;
3997 }
3998