1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the AArch64 target.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AArch64MachineFunctionInfo.h"
14 #include "AArch64TargetMachine.h"
15 #include "MCTargetDesc/AArch64AddressingModes.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/CodeGen/ISDOpcodes.h"
18 #include "llvm/CodeGen/SelectionDAGISel.h"
19 #include "llvm/IR/Function.h" // To access function attributes.
20 #include "llvm/IR/GlobalValue.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/IntrinsicsAArch64.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/KnownBits.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28
29 using namespace llvm;
30
31 #define DEBUG_TYPE "aarch64-isel"
32 #define PASS_NAME "AArch64 Instruction Selection"
33
34 //===--------------------------------------------------------------------===//
35 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36 /// instructions for SelectionDAG operations.
37 ///
38 namespace {
39
40 class AArch64DAGToDAGISel : public SelectionDAGISel {
41
42 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43 /// make the right decision when generating code for different targets.
44 const AArch64Subtarget *Subtarget;
45
46 public:
47 static char ID;
48
49 AArch64DAGToDAGISel() = delete;
50
AArch64DAGToDAGISel(AArch64TargetMachine & tm,CodeGenOpt::Level OptLevel)51 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
52 CodeGenOpt::Level OptLevel)
53 : SelectionDAGISel(ID, tm, OptLevel), Subtarget(nullptr) {}
54
runOnMachineFunction(MachineFunction & MF)55 bool runOnMachineFunction(MachineFunction &MF) override {
56 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
57 return SelectionDAGISel::runOnMachineFunction(MF);
58 }
59
60 void Select(SDNode *Node) override;
61
62 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
63 /// inline asm expressions.
64 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
65 unsigned ConstraintID,
66 std::vector<SDValue> &OutOps) override;
67
68 template <signed Low, signed High, signed Scale>
69 bool SelectRDVLImm(SDValue N, SDValue &Imm);
70
71 bool tryMLAV64LaneV128(SDNode *N);
72 bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
73 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
74 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
75 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
76 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
SelectArithShiftedRegister(SDValue N,SDValue & Reg,SDValue & Shift)77 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
78 return SelectShiftedRegister(N, false, Reg, Shift);
79 }
SelectLogicalShiftedRegister(SDValue N,SDValue & Reg,SDValue & Shift)80 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
81 return SelectShiftedRegister(N, true, Reg, Shift);
82 }
SelectAddrModeIndexed7S8(SDValue N,SDValue & Base,SDValue & OffImm)83 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
84 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
85 }
SelectAddrModeIndexed7S16(SDValue N,SDValue & Base,SDValue & OffImm)86 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
87 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
88 }
SelectAddrModeIndexed7S32(SDValue N,SDValue & Base,SDValue & OffImm)89 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
90 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
91 }
SelectAddrModeIndexed7S64(SDValue N,SDValue & Base,SDValue & OffImm)92 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
93 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
94 }
SelectAddrModeIndexed7S128(SDValue N,SDValue & Base,SDValue & OffImm)95 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
96 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
97 }
SelectAddrModeIndexedS9S128(SDValue N,SDValue & Base,SDValue & OffImm)98 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
99 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
100 }
SelectAddrModeIndexedU6S128(SDValue N,SDValue & Base,SDValue & OffImm)101 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
102 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
103 }
SelectAddrModeIndexed8(SDValue N,SDValue & Base,SDValue & OffImm)104 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
105 return SelectAddrModeIndexed(N, 1, Base, OffImm);
106 }
SelectAddrModeIndexed16(SDValue N,SDValue & Base,SDValue & OffImm)107 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
108 return SelectAddrModeIndexed(N, 2, Base, OffImm);
109 }
SelectAddrModeIndexed32(SDValue N,SDValue & Base,SDValue & OffImm)110 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
111 return SelectAddrModeIndexed(N, 4, Base, OffImm);
112 }
SelectAddrModeIndexed64(SDValue N,SDValue & Base,SDValue & OffImm)113 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
114 return SelectAddrModeIndexed(N, 8, Base, OffImm);
115 }
SelectAddrModeIndexed128(SDValue N,SDValue & Base,SDValue & OffImm)116 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
117 return SelectAddrModeIndexed(N, 16, Base, OffImm);
118 }
SelectAddrModeUnscaled8(SDValue N,SDValue & Base,SDValue & OffImm)119 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
120 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
121 }
SelectAddrModeUnscaled16(SDValue N,SDValue & Base,SDValue & OffImm)122 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
123 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
124 }
SelectAddrModeUnscaled32(SDValue N,SDValue & Base,SDValue & OffImm)125 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
126 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
127 }
SelectAddrModeUnscaled64(SDValue N,SDValue & Base,SDValue & OffImm)128 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
129 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
130 }
SelectAddrModeUnscaled128(SDValue N,SDValue & Base,SDValue & OffImm)131 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
132 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
133 }
134 template <unsigned Size, unsigned Max>
SelectAddrModeIndexedUImm(SDValue N,SDValue & Base,SDValue & OffImm)135 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
136 // Test if there is an appropriate addressing mode and check if the
137 // immediate fits.
138 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
139 if (Found) {
140 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
141 int64_t C = CI->getSExtValue();
142 if (C <= Max)
143 return true;
144 }
145 }
146
147 // Otherwise, base only, materialize address in register.
148 Base = N;
149 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
150 return true;
151 }
152
153 template<int Width>
SelectAddrModeWRO(SDValue N,SDValue & Base,SDValue & Offset,SDValue & SignExtend,SDValue & DoShift)154 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
155 SDValue &SignExtend, SDValue &DoShift) {
156 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
157 }
158
159 template<int Width>
SelectAddrModeXRO(SDValue N,SDValue & Base,SDValue & Offset,SDValue & SignExtend,SDValue & DoShift)160 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
161 SDValue &SignExtend, SDValue &DoShift) {
162 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
163 }
164
SelectExtractHigh(SDValue N,SDValue & Res)165 bool SelectExtractHigh(SDValue N, SDValue &Res) {
166 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
167 N = N->getOperand(0);
168 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
169 !isa<ConstantSDNode>(N->getOperand(1)))
170 return false;
171 EVT VT = N->getValueType(0);
172 EVT LVT = N->getOperand(0).getValueType();
173 unsigned Index = N->getConstantOperandVal(1);
174 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
175 Index != VT.getVectorNumElements())
176 return false;
177 Res = N->getOperand(0);
178 return true;
179 }
180
SelectRoundingVLShr(SDValue N,SDValue & Res1,SDValue & Res2)181 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
182 if (N.getOpcode() != AArch64ISD::VLSHR)
183 return false;
184 SDValue Op = N->getOperand(0);
185 EVT VT = Op.getValueType();
186 unsigned ShtAmt = N->getConstantOperandVal(1);
187 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
188 return false;
189
190 APInt Imm;
191 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
192 Imm = APInt(VT.getScalarSizeInBits(),
193 Op.getOperand(1).getConstantOperandVal(0)
194 << Op.getOperand(1).getConstantOperandVal(1));
195 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
196 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
197 Imm = APInt(VT.getScalarSizeInBits(),
198 Op.getOperand(1).getConstantOperandVal(0));
199 else
200 return false;
201
202 if (Imm != 1ULL << (ShtAmt - 1))
203 return false;
204
205 Res1 = Op.getOperand(0);
206 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
207 return true;
208 }
209
SelectDupZeroOrUndef(SDValue N)210 bool SelectDupZeroOrUndef(SDValue N) {
211 switch(N->getOpcode()) {
212 case ISD::UNDEF:
213 return true;
214 case AArch64ISD::DUP:
215 case ISD::SPLAT_VECTOR: {
216 auto Opnd0 = N->getOperand(0);
217 if (isNullConstant(Opnd0))
218 return true;
219 if (isNullFPConstant(Opnd0))
220 return true;
221 break;
222 }
223 default:
224 break;
225 }
226
227 return false;
228 }
229
SelectDupZero(SDValue N)230 bool SelectDupZero(SDValue N) {
231 switch(N->getOpcode()) {
232 case AArch64ISD::DUP:
233 case ISD::SPLAT_VECTOR: {
234 auto Opnd0 = N->getOperand(0);
235 if (isNullConstant(Opnd0))
236 return true;
237 if (isNullFPConstant(Opnd0))
238 return true;
239 break;
240 }
241 }
242
243 return false;
244 }
245
246 template<MVT::SimpleValueType VT>
SelectSVEAddSubImm(SDValue N,SDValue & Imm,SDValue & Shift)247 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
248 return SelectSVEAddSubImm(N, VT, Imm, Shift);
249 }
250
251 template <MVT::SimpleValueType VT>
SelectSVECpyDupImm(SDValue N,SDValue & Imm,SDValue & Shift)252 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
253 return SelectSVECpyDupImm(N, VT, Imm, Shift);
254 }
255
256 template <MVT::SimpleValueType VT, bool Invert = false>
SelectSVELogicalImm(SDValue N,SDValue & Imm)257 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
258 return SelectSVELogicalImm(N, VT, Imm, Invert);
259 }
260
261 template <MVT::SimpleValueType VT>
SelectSVEArithImm(SDValue N,SDValue & Imm)262 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
263 return SelectSVEArithImm(N, VT, Imm);
264 }
265
266 template <unsigned Low, unsigned High, bool AllowSaturation = false>
SelectSVEShiftImm(SDValue N,SDValue & Imm)267 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
268 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
269 }
270
SelectSVEShiftSplatImmR(SDValue N,SDValue & Imm)271 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
272 if (N->getOpcode() != ISD::SPLAT_VECTOR)
273 return false;
274
275 EVT EltVT = N->getValueType(0).getVectorElementType();
276 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
277 /* High */ EltVT.getFixedSizeInBits(),
278 /* AllowSaturation */ true, Imm);
279 }
280
281 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
282 template<signed Min, signed Max, signed Scale, bool Shift>
SelectCntImm(SDValue N,SDValue & Imm)283 bool SelectCntImm(SDValue N, SDValue &Imm) {
284 if (!isa<ConstantSDNode>(N))
285 return false;
286
287 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
288 if (Shift)
289 MulImm = 1LL << MulImm;
290
291 if ((MulImm % std::abs(Scale)) != 0)
292 return false;
293
294 MulImm /= Scale;
295 if ((MulImm >= Min) && (MulImm <= Max)) {
296 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
297 return true;
298 }
299
300 return false;
301 }
302
303 template <signed Max, signed Scale>
SelectEXTImm(SDValue N,SDValue & Imm)304 bool SelectEXTImm(SDValue N, SDValue &Imm) {
305 if (!isa<ConstantSDNode>(N))
306 return false;
307
308 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
309
310 if (MulImm >= 0 && MulImm <= Max) {
311 MulImm *= Scale;
312 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
313 return true;
314 }
315
316 return false;
317 }
318
ImmToTile(SDValue N,SDValue & Imm)319 template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) {
320 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
321 uint64_t C = CI->getZExtValue();
322 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
323 return true;
324 }
325 return false;
326 }
327
328 /// Form sequences of consecutive 64/128-bit registers for use in NEON
329 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
330 /// between 1 and 4 elements. If it contains a single element that is returned
331 /// unchanged; otherwise a REG_SEQUENCE value is returned.
332 SDValue createDTuple(ArrayRef<SDValue> Vecs);
333 SDValue createQTuple(ArrayRef<SDValue> Vecs);
334 // Form a sequence of SVE registers for instructions using list of vectors,
335 // e.g. structured loads and stores (ldN, stN).
336 SDValue createZTuple(ArrayRef<SDValue> Vecs);
337
338 /// Generic helper for the createDTuple/createQTuple
339 /// functions. Those should almost always be called instead.
340 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
341 const unsigned SubRegs[]);
342
343 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
344
345 bool tryIndexedLoad(SDNode *N);
346
347 bool trySelectStackSlotTagP(SDNode *N);
348 void SelectTagP(SDNode *N);
349
350 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
351 unsigned SubRegIdx);
352 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
353 unsigned SubRegIdx);
354 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
355 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
356 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
357 unsigned Opc_rr, unsigned Opc_ri,
358 bool IsIntr = false);
359 void SelectWhilePair(SDNode *N, unsigned Opc);
360 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
361
362 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
363 /// SVE Reg+Imm addressing mode.
364 template <int64_t Min, int64_t Max>
365 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
366 SDValue &OffImm);
367 /// SVE Reg+Reg address mode.
368 template <unsigned Scale>
SelectSVERegRegAddrMode(SDValue N,SDValue & Base,SDValue & Offset)369 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
370 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
371 }
372
373 template <unsigned MaxIdx, unsigned Scale>
SelectSMETileSlice(SDValue N,SDValue & Vector,SDValue & Offset)374 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
375 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
376 }
377
378 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
381 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
382 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
383 unsigned Opc_rr, unsigned Opc_ri);
384 std::tuple<unsigned, SDValue, SDValue>
385 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
386 const SDValue &OldBase, const SDValue &OldOffset,
387 unsigned Scale);
388
389 bool tryBitfieldExtractOp(SDNode *N);
390 bool tryBitfieldExtractOpFromSExt(SDNode *N);
391 bool tryBitfieldInsertOp(SDNode *N);
392 bool tryBitfieldInsertInZeroOp(SDNode *N);
393 bool tryShiftAmountMod(SDNode *N);
394 bool tryHighFPExt(SDNode *N);
395
396 bool tryReadRegister(SDNode *N);
397 bool tryWriteRegister(SDNode *N);
398
399 // Include the pieces autogenerated from the target description.
400 #include "AArch64GenDAGISel.inc"
401
402 private:
403 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
404 SDValue &Shift);
405 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
SelectAddrModeIndexed7S(SDValue N,unsigned Size,SDValue & Base,SDValue & OffImm)406 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
407 SDValue &OffImm) {
408 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
409 }
410 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
411 unsigned Size, SDValue &Base,
412 SDValue &OffImm);
413 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
414 SDValue &OffImm);
415 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
416 SDValue &OffImm);
417 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
418 SDValue &Offset, SDValue &SignExtend,
419 SDValue &DoShift);
420 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
421 SDValue &Offset, SDValue &SignExtend,
422 SDValue &DoShift);
423 bool isWorthFolding(SDValue V) const;
424 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
425 SDValue &Offset, SDValue &SignExtend);
426
427 template<unsigned RegWidth>
SelectCVTFixedPosOperand(SDValue N,SDValue & FixedPos)428 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
429 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
430 }
431
432 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
433
434 bool SelectCMP_SWAP(SDNode *N);
435
436 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
437 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
438 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
439
440 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
441 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
442 bool AllowSaturation, SDValue &Imm);
443
444 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
445 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
446 SDValue &Offset);
447 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
448 SDValue &Offset, unsigned Scale = 1);
449
450 bool SelectAllActivePredicate(SDValue N);
451 };
452 } // end anonymous namespace
453
454 char AArch64DAGToDAGISel::ID = 0;
455
INITIALIZE_PASS(AArch64DAGToDAGISel,DEBUG_TYPE,PASS_NAME,false,false)456 INITIALIZE_PASS(AArch64DAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
457
458 /// isIntImmediate - This method tests to see if the node is a constant
459 /// operand. If so Imm will receive the 32-bit value.
460 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
461 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
462 Imm = C->getZExtValue();
463 return true;
464 }
465 return false;
466 }
467
468 // isIntImmediate - This method tests to see if a constant operand.
469 // If so Imm will receive the value.
isIntImmediate(SDValue N,uint64_t & Imm)470 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
471 return isIntImmediate(N.getNode(), Imm);
472 }
473
474 // isOpcWithIntImmediate - This method tests to see if the node is a specific
475 // opcode and that it has a immediate integer right operand.
476 // If so Imm will receive the 32 bit value.
isOpcWithIntImmediate(const SDNode * N,unsigned Opc,uint64_t & Imm)477 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
478 uint64_t &Imm) {
479 return N->getOpcode() == Opc &&
480 isIntImmediate(N->getOperand(1).getNode(), Imm);
481 }
482
483 // isIntImmediateEq - This method tests to see if N is a constant operand that
484 // is equivalent to 'ImmExpected'.
485 #ifndef NDEBUG
isIntImmediateEq(SDValue N,const uint64_t ImmExpected)486 static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
487 uint64_t Imm;
488 if (!isIntImmediate(N.getNode(), Imm))
489 return false;
490 return Imm == ImmExpected;
491 }
492 #endif
493
SelectInlineAsmMemoryOperand(const SDValue & Op,unsigned ConstraintID,std::vector<SDValue> & OutOps)494 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
495 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
496 switch(ConstraintID) {
497 default:
498 llvm_unreachable("Unexpected asm memory constraint");
499 case InlineAsm::Constraint_m:
500 case InlineAsm::Constraint_o:
501 case InlineAsm::Constraint_Q:
502 // We need to make sure that this one operand does not end up in XZR, thus
503 // require the address to be in a PointerRegClass register.
504 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
505 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
506 SDLoc dl(Op);
507 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
508 SDValue NewOp =
509 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
510 dl, Op.getValueType(),
511 Op, RC), 0);
512 OutOps.push_back(NewOp);
513 return false;
514 }
515 return true;
516 }
517
518 /// SelectArithImmed - Select an immediate value that can be represented as
519 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
520 /// Val set to the 12-bit value and Shift set to the shifter operand.
SelectArithImmed(SDValue N,SDValue & Val,SDValue & Shift)521 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
522 SDValue &Shift) {
523 // This function is called from the addsub_shifted_imm ComplexPattern,
524 // which lists [imm] as the list of opcode it's interested in, however
525 // we still need to check whether the operand is actually an immediate
526 // here because the ComplexPattern opcode list is only used in
527 // root-level opcode matching.
528 if (!isa<ConstantSDNode>(N.getNode()))
529 return false;
530
531 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
532 unsigned ShiftAmt;
533
534 if (Immed >> 12 == 0) {
535 ShiftAmt = 0;
536 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
537 ShiftAmt = 12;
538 Immed = Immed >> 12;
539 } else
540 return false;
541
542 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
543 SDLoc dl(N);
544 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
545 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
546 return true;
547 }
548
549 /// SelectNegArithImmed - As above, but negates the value before trying to
550 /// select it.
SelectNegArithImmed(SDValue N,SDValue & Val,SDValue & Shift)551 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
552 SDValue &Shift) {
553 // This function is called from the addsub_shifted_imm ComplexPattern,
554 // which lists [imm] as the list of opcode it's interested in, however
555 // we still need to check whether the operand is actually an immediate
556 // here because the ComplexPattern opcode list is only used in
557 // root-level opcode matching.
558 if (!isa<ConstantSDNode>(N.getNode()))
559 return false;
560
561 // The immediate operand must be a 24-bit zero-extended immediate.
562 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
563
564 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
565 // have the opposite effect on the C flag, so this pattern mustn't match under
566 // those circumstances.
567 if (Immed == 0)
568 return false;
569
570 if (N.getValueType() == MVT::i32)
571 Immed = ~((uint32_t)Immed) + 1;
572 else
573 Immed = ~Immed + 1ULL;
574 if (Immed & 0xFFFFFFFFFF000000ULL)
575 return false;
576
577 Immed &= 0xFFFFFFULL;
578 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
579 Shift);
580 }
581
582 /// getShiftTypeForNode - Translate a shift node to the corresponding
583 /// ShiftType value.
getShiftTypeForNode(SDValue N)584 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
585 switch (N.getOpcode()) {
586 default:
587 return AArch64_AM::InvalidShiftExtend;
588 case ISD::SHL:
589 return AArch64_AM::LSL;
590 case ISD::SRL:
591 return AArch64_AM::LSR;
592 case ISD::SRA:
593 return AArch64_AM::ASR;
594 case ISD::ROTR:
595 return AArch64_AM::ROR;
596 }
597 }
598
599 /// Determine whether it is worth it to fold SHL into the addressing
600 /// mode.
isWorthFoldingSHL(SDValue V)601 static bool isWorthFoldingSHL(SDValue V) {
602 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
603 // It is worth folding logical shift of up to three places.
604 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
605 if (!CSD)
606 return false;
607 unsigned ShiftVal = CSD->getZExtValue();
608 if (ShiftVal > 3)
609 return false;
610
611 // Check if this particular node is reused in any non-memory related
612 // operation. If yes, do not try to fold this node into the address
613 // computation, since the computation will be kept.
614 const SDNode *Node = V.getNode();
615 for (SDNode *UI : Node->uses())
616 if (!isa<MemSDNode>(*UI))
617 for (SDNode *UII : UI->uses())
618 if (!isa<MemSDNode>(*UII))
619 return false;
620 return true;
621 }
622
623 /// Determine whether it is worth to fold V into an extended register.
isWorthFolding(SDValue V) const624 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
625 // Trivial if we are optimizing for code size or if there is only
626 // one use of the value.
627 if (CurDAG->shouldOptForSize() || V.hasOneUse())
628 return true;
629 // If a subtarget has a fastpath LSL we can fold a logical shift into
630 // the addressing mode and save a cycle.
631 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
632 isWorthFoldingSHL(V))
633 return true;
634 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
635 const SDValue LHS = V.getOperand(0);
636 const SDValue RHS = V.getOperand(1);
637 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
638 return true;
639 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
640 return true;
641 }
642
643 // It hurts otherwise, since the value will be reused.
644 return false;
645 }
646
647 /// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
648 /// to select more shifted register
SelectShiftedRegisterFromAnd(SDValue N,SDValue & Reg,SDValue & Shift)649 bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
650 SDValue &Shift) {
651 EVT VT = N.getValueType();
652 if (VT != MVT::i32 && VT != MVT::i64)
653 return false;
654
655 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
656 return false;
657 SDValue LHS = N.getOperand(0);
658 if (!LHS->hasOneUse())
659 return false;
660
661 unsigned LHSOpcode = LHS->getOpcode();
662 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
663 return false;
664
665 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
666 if (!ShiftAmtNode)
667 return false;
668
669 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
670 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
671 if (!RHSC)
672 return false;
673
674 APInt AndMask = RHSC->getAPIntValue();
675 unsigned LowZBits, MaskLen;
676 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
677 return false;
678
679 unsigned BitWidth = N.getValueSizeInBits();
680 SDLoc DL(LHS);
681 uint64_t NewShiftC;
682 unsigned NewShiftOp;
683 if (LHSOpcode == ISD::SHL) {
684 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
685 // BitWidth != LowZBits + MaskLen doesn't match the pattern
686 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
687 return false;
688
689 NewShiftC = LowZBits - ShiftAmtC;
690 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
691 } else {
692 if (LowZBits == 0)
693 return false;
694
695 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
696 NewShiftC = LowZBits + ShiftAmtC;
697 if (NewShiftC >= BitWidth)
698 return false;
699
700 // SRA need all high bits
701 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
702 return false;
703
704 // SRL high bits can be 0 or 1
705 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
706 return false;
707
708 if (LHSOpcode == ISD::SRL)
709 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
710 else
711 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
712 }
713
714 assert(NewShiftC < BitWidth && "Invalid shift amount");
715 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
716 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
717 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
718 NewShiftAmt, BitWidthMinus1),
719 0);
720 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
721 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
722 return true;
723 }
724
725 /// SelectShiftedRegister - Select a "shifted register" operand. If the value
726 /// is not shifted, set the Shift operand to default of "LSL 0". The logical
727 /// instructions allow the shifted register to be rotated, but the arithmetic
728 /// instructions do not. The AllowROR parameter specifies whether ROR is
729 /// supported.
SelectShiftedRegister(SDValue N,bool AllowROR,SDValue & Reg,SDValue & Shift)730 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
731 SDValue &Reg, SDValue &Shift) {
732 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
733 return true;
734
735 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
736 if (ShType == AArch64_AM::InvalidShiftExtend)
737 return false;
738 if (!AllowROR && ShType == AArch64_AM::ROR)
739 return false;
740
741 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
742 unsigned BitSize = N.getValueSizeInBits();
743 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
744 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
745
746 Reg = N.getOperand(0);
747 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
748 return isWorthFolding(N);
749 }
750
751 return false;
752 }
753
754 /// getExtendTypeForNode - Translate an extend node to the corresponding
755 /// ExtendType value.
756 static AArch64_AM::ShiftExtendType
getExtendTypeForNode(SDValue N,bool IsLoadStore=false)757 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
758 if (N.getOpcode() == ISD::SIGN_EXTEND ||
759 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
760 EVT SrcVT;
761 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
762 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
763 else
764 SrcVT = N.getOperand(0).getValueType();
765
766 if (!IsLoadStore && SrcVT == MVT::i8)
767 return AArch64_AM::SXTB;
768 else if (!IsLoadStore && SrcVT == MVT::i16)
769 return AArch64_AM::SXTH;
770 else if (SrcVT == MVT::i32)
771 return AArch64_AM::SXTW;
772 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
773
774 return AArch64_AM::InvalidShiftExtend;
775 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
776 N.getOpcode() == ISD::ANY_EXTEND) {
777 EVT SrcVT = N.getOperand(0).getValueType();
778 if (!IsLoadStore && SrcVT == MVT::i8)
779 return AArch64_AM::UXTB;
780 else if (!IsLoadStore && SrcVT == MVT::i16)
781 return AArch64_AM::UXTH;
782 else if (SrcVT == MVT::i32)
783 return AArch64_AM::UXTW;
784 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
785
786 return AArch64_AM::InvalidShiftExtend;
787 } else if (N.getOpcode() == ISD::AND) {
788 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
789 if (!CSD)
790 return AArch64_AM::InvalidShiftExtend;
791 uint64_t AndMask = CSD->getZExtValue();
792
793 switch (AndMask) {
794 default:
795 return AArch64_AM::InvalidShiftExtend;
796 case 0xFF:
797 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
798 case 0xFFFF:
799 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
800 case 0xFFFFFFFF:
801 return AArch64_AM::UXTW;
802 }
803 }
804
805 return AArch64_AM::InvalidShiftExtend;
806 }
807
808 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
checkHighLaneIndex(SDNode * DL,SDValue & LaneOp,int & LaneIdx)809 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
810 if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
811 DL->getOpcode() != AArch64ISD::DUPLANE32)
812 return false;
813
814 SDValue SV = DL->getOperand(0);
815 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
816 return false;
817
818 SDValue EV = SV.getOperand(1);
819 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
820 return false;
821
822 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
823 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
824 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
825 LaneOp = EV.getOperand(0);
826
827 return true;
828 }
829
830 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
831 // high lane extract.
checkV64LaneV128(SDValue Op0,SDValue Op1,SDValue & StdOp,SDValue & LaneOp,int & LaneIdx)832 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
833 SDValue &LaneOp, int &LaneIdx) {
834
835 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
836 std::swap(Op0, Op1);
837 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
838 return false;
839 }
840 StdOp = Op1;
841 return true;
842 }
843
844 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
845 /// is a lane in the upper half of a 128-bit vector. Recognize and select this
846 /// so that we don't emit unnecessary lane extracts.
tryMLAV64LaneV128(SDNode * N)847 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
848 SDLoc dl(N);
849 SDValue Op0 = N->getOperand(0);
850 SDValue Op1 = N->getOperand(1);
851 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
852 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
853 int LaneIdx = -1; // Will hold the lane index.
854
855 if (Op1.getOpcode() != ISD::MUL ||
856 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
857 LaneIdx)) {
858 std::swap(Op0, Op1);
859 if (Op1.getOpcode() != ISD::MUL ||
860 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
861 LaneIdx))
862 return false;
863 }
864
865 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
866
867 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
868
869 unsigned MLAOpc = ~0U;
870
871 switch (N->getSimpleValueType(0).SimpleTy) {
872 default:
873 llvm_unreachable("Unrecognized MLA.");
874 case MVT::v4i16:
875 MLAOpc = AArch64::MLAv4i16_indexed;
876 break;
877 case MVT::v8i16:
878 MLAOpc = AArch64::MLAv8i16_indexed;
879 break;
880 case MVT::v2i32:
881 MLAOpc = AArch64::MLAv2i32_indexed;
882 break;
883 case MVT::v4i32:
884 MLAOpc = AArch64::MLAv4i32_indexed;
885 break;
886 }
887
888 ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
889 return true;
890 }
891
tryMULLV64LaneV128(unsigned IntNo,SDNode * N)892 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
893 SDLoc dl(N);
894 SDValue SMULLOp0;
895 SDValue SMULLOp1;
896 int LaneIdx;
897
898 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
899 LaneIdx))
900 return false;
901
902 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
903
904 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
905
906 unsigned SMULLOpc = ~0U;
907
908 if (IntNo == Intrinsic::aarch64_neon_smull) {
909 switch (N->getSimpleValueType(0).SimpleTy) {
910 default:
911 llvm_unreachable("Unrecognized SMULL.");
912 case MVT::v4i32:
913 SMULLOpc = AArch64::SMULLv4i16_indexed;
914 break;
915 case MVT::v2i64:
916 SMULLOpc = AArch64::SMULLv2i32_indexed;
917 break;
918 }
919 } else if (IntNo == Intrinsic::aarch64_neon_umull) {
920 switch (N->getSimpleValueType(0).SimpleTy) {
921 default:
922 llvm_unreachable("Unrecognized SMULL.");
923 case MVT::v4i32:
924 SMULLOpc = AArch64::UMULLv4i16_indexed;
925 break;
926 case MVT::v2i64:
927 SMULLOpc = AArch64::UMULLv2i32_indexed;
928 break;
929 }
930 } else
931 llvm_unreachable("Unrecognized intrinsic.");
932
933 ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
934 return true;
935 }
936
937 /// Instructions that accept extend modifiers like UXTW expect the register
938 /// being extended to be a GPR32, but the incoming DAG might be acting on a
939 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
940 /// this is the case.
narrowIfNeeded(SelectionDAG * CurDAG,SDValue N)941 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
942 if (N.getValueType() == MVT::i32)
943 return N;
944
945 SDLoc dl(N);
946 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
947 MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
948 dl, MVT::i32, N, SubReg);
949 return SDValue(Node, 0);
950 }
951
952 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
953 template<signed Low, signed High, signed Scale>
SelectRDVLImm(SDValue N,SDValue & Imm)954 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
955 if (!isa<ConstantSDNode>(N))
956 return false;
957
958 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
959 if ((MulImm % std::abs(Scale)) == 0) {
960 int64_t RDVLImm = MulImm / Scale;
961 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
962 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
963 return true;
964 }
965 }
966
967 return false;
968 }
969
970 /// SelectArithExtendedRegister - Select a "extended register" operand. This
971 /// operand folds in an extend followed by an optional left shift.
SelectArithExtendedRegister(SDValue N,SDValue & Reg,SDValue & Shift)972 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
973 SDValue &Shift) {
974 unsigned ShiftVal = 0;
975 AArch64_AM::ShiftExtendType Ext;
976
977 if (N.getOpcode() == ISD::SHL) {
978 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
979 if (!CSD)
980 return false;
981 ShiftVal = CSD->getZExtValue();
982 if (ShiftVal > 4)
983 return false;
984
985 Ext = getExtendTypeForNode(N.getOperand(0));
986 if (Ext == AArch64_AM::InvalidShiftExtend)
987 return false;
988
989 Reg = N.getOperand(0).getOperand(0);
990 } else {
991 Ext = getExtendTypeForNode(N);
992 if (Ext == AArch64_AM::InvalidShiftExtend)
993 return false;
994
995 Reg = N.getOperand(0);
996
997 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
998 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
999 auto isDef32 = [](SDValue N) {
1000 unsigned Opc = N.getOpcode();
1001 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1002 Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
1003 Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
1004 Opc != ISD::FREEZE;
1005 };
1006 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1007 isDef32(Reg))
1008 return false;
1009 }
1010
1011 // AArch64 mandates that the RHS of the operation must use the smallest
1012 // register class that could contain the size being extended from. Thus,
1013 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1014 // there might not be an actual 32-bit value in the program. We can
1015 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1016 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1017 Reg = narrowIfNeeded(CurDAG, Reg);
1018 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1019 MVT::i32);
1020 return isWorthFolding(N);
1021 }
1022
1023 /// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1024 /// operand is refered by the instructions have SP operand
SelectArithUXTXRegister(SDValue N,SDValue & Reg,SDValue & Shift)1025 bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1026 SDValue &Shift) {
1027 unsigned ShiftVal = 0;
1028 AArch64_AM::ShiftExtendType Ext;
1029
1030 if (N.getOpcode() != ISD::SHL)
1031 return false;
1032
1033 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1034 if (!CSD)
1035 return false;
1036 ShiftVal = CSD->getZExtValue();
1037 if (ShiftVal > 4)
1038 return false;
1039
1040 Ext = AArch64_AM::UXTX;
1041 Reg = N.getOperand(0);
1042 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1043 MVT::i32);
1044 return isWorthFolding(N);
1045 }
1046
1047 /// If there's a use of this ADDlow that's not itself a load/store then we'll
1048 /// need to create a real ADD instruction from it anyway and there's no point in
1049 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1050 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1051 /// leads to duplicated ADRP instructions.
isWorthFoldingADDlow(SDValue N)1052 static bool isWorthFoldingADDlow(SDValue N) {
1053 for (auto *Use : N->uses()) {
1054 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1055 Use->getOpcode() != ISD::ATOMIC_LOAD &&
1056 Use->getOpcode() != ISD::ATOMIC_STORE)
1057 return false;
1058
1059 // ldar and stlr have much more restrictive addressing modes (just a
1060 // register).
1061 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
1062 return false;
1063 }
1064
1065 return true;
1066 }
1067
1068 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1069 /// immediate" address. The "Size" argument is the size in bytes of the memory
1070 /// reference, which determines the scale.
SelectAddrModeIndexedBitWidth(SDValue N,bool IsSignedImm,unsigned BW,unsigned Size,SDValue & Base,SDValue & OffImm)1071 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1072 unsigned BW, unsigned Size,
1073 SDValue &Base,
1074 SDValue &OffImm) {
1075 SDLoc dl(N);
1076 const DataLayout &DL = CurDAG->getDataLayout();
1077 const TargetLowering *TLI = getTargetLowering();
1078 if (N.getOpcode() == ISD::FrameIndex) {
1079 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1080 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1081 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1082 return true;
1083 }
1084
1085 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1086 // selected here doesn't support labels/immediates, only base+offset.
1087 if (CurDAG->isBaseWithConstantOffset(N)) {
1088 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1089 if (IsSignedImm) {
1090 int64_t RHSC = RHS->getSExtValue();
1091 unsigned Scale = Log2_32(Size);
1092 int64_t Range = 0x1LL << (BW - 1);
1093
1094 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1095 RHSC < (Range << Scale)) {
1096 Base = N.getOperand(0);
1097 if (Base.getOpcode() == ISD::FrameIndex) {
1098 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1099 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1100 }
1101 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1102 return true;
1103 }
1104 } else {
1105 // unsigned Immediate
1106 uint64_t RHSC = RHS->getZExtValue();
1107 unsigned Scale = Log2_32(Size);
1108 uint64_t Range = 0x1ULL << BW;
1109
1110 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1111 Base = N.getOperand(0);
1112 if (Base.getOpcode() == ISD::FrameIndex) {
1113 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1114 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1115 }
1116 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1117 return true;
1118 }
1119 }
1120 }
1121 }
1122 // Base only. The address will be materialized into a register before
1123 // the memory is accessed.
1124 // add x0, Xbase, #offset
1125 // stp x1, x2, [x0]
1126 Base = N;
1127 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1128 return true;
1129 }
1130
1131 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1132 /// immediate" address. The "Size" argument is the size in bytes of the memory
1133 /// reference, which determines the scale.
SelectAddrModeIndexed(SDValue N,unsigned Size,SDValue & Base,SDValue & OffImm)1134 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1135 SDValue &Base, SDValue &OffImm) {
1136 SDLoc dl(N);
1137 const DataLayout &DL = CurDAG->getDataLayout();
1138 const TargetLowering *TLI = getTargetLowering();
1139 if (N.getOpcode() == ISD::FrameIndex) {
1140 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1141 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1142 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1143 return true;
1144 }
1145
1146 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1147 GlobalAddressSDNode *GAN =
1148 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1149 Base = N.getOperand(0);
1150 OffImm = N.getOperand(1);
1151 if (!GAN)
1152 return true;
1153
1154 if (GAN->getOffset() % Size == 0 &&
1155 GAN->getGlobal()->getPointerAlignment(DL) >= Size)
1156 return true;
1157 }
1158
1159 if (CurDAG->isBaseWithConstantOffset(N)) {
1160 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1161 int64_t RHSC = (int64_t)RHS->getZExtValue();
1162 unsigned Scale = Log2_32(Size);
1163 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
1164 Base = N.getOperand(0);
1165 if (Base.getOpcode() == ISD::FrameIndex) {
1166 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1167 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1168 }
1169 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1170 return true;
1171 }
1172 }
1173 }
1174
1175 // Before falling back to our general case, check if the unscaled
1176 // instructions can handle this. If so, that's preferable.
1177 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1178 return false;
1179
1180 // Base only. The address will be materialized into a register before
1181 // the memory is accessed.
1182 // add x0, Xbase, #offset
1183 // ldr x0, [x0]
1184 Base = N;
1185 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1186 return true;
1187 }
1188
1189 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1190 /// immediate" address. This should only match when there is an offset that
1191 /// is not valid for a scaled immediate addressing mode. The "Size" argument
1192 /// is the size in bytes of the memory reference, which is needed here to know
1193 /// what is valid for a scaled immediate.
SelectAddrModeUnscaled(SDValue N,unsigned Size,SDValue & Base,SDValue & OffImm)1194 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1195 SDValue &Base,
1196 SDValue &OffImm) {
1197 if (!CurDAG->isBaseWithConstantOffset(N))
1198 return false;
1199 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1200 int64_t RHSC = RHS->getSExtValue();
1201 // If the offset is valid as a scaled immediate, don't match here.
1202 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
1203 RHSC < (0x1000 << Log2_32(Size)))
1204 return false;
1205 if (RHSC >= -256 && RHSC < 256) {
1206 Base = N.getOperand(0);
1207 if (Base.getOpcode() == ISD::FrameIndex) {
1208 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1209 const TargetLowering *TLI = getTargetLowering();
1210 Base = CurDAG->getTargetFrameIndex(
1211 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1212 }
1213 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1214 return true;
1215 }
1216 }
1217 return false;
1218 }
1219
Widen(SelectionDAG * CurDAG,SDValue N)1220 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
1221 SDLoc dl(N);
1222 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1223 SDValue ImpDef = SDValue(
1224 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1225 MachineSDNode *Node = CurDAG->getMachineNode(
1226 TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
1227 return SDValue(Node, 0);
1228 }
1229
1230 /// Check if the given SHL node (\p N), can be used to form an
1231 /// extended register for an addressing mode.
SelectExtendedSHL(SDValue N,unsigned Size,bool WantExtend,SDValue & Offset,SDValue & SignExtend)1232 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1233 bool WantExtend, SDValue &Offset,
1234 SDValue &SignExtend) {
1235 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1236 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1237 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1238 return false;
1239
1240 SDLoc dl(N);
1241 if (WantExtend) {
1242 AArch64_AM::ShiftExtendType Ext =
1243 getExtendTypeForNode(N.getOperand(0), true);
1244 if (Ext == AArch64_AM::InvalidShiftExtend)
1245 return false;
1246
1247 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1248 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1249 MVT::i32);
1250 } else {
1251 Offset = N.getOperand(0);
1252 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1253 }
1254
1255 unsigned LegalShiftVal = Log2_32(Size);
1256 unsigned ShiftVal = CSD->getZExtValue();
1257
1258 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1259 return false;
1260
1261 return isWorthFolding(N);
1262 }
1263
SelectAddrModeWRO(SDValue N,unsigned Size,SDValue & Base,SDValue & Offset,SDValue & SignExtend,SDValue & DoShift)1264 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1265 SDValue &Base, SDValue &Offset,
1266 SDValue &SignExtend,
1267 SDValue &DoShift) {
1268 if (N.getOpcode() != ISD::ADD)
1269 return false;
1270 SDValue LHS = N.getOperand(0);
1271 SDValue RHS = N.getOperand(1);
1272 SDLoc dl(N);
1273
1274 // We don't want to match immediate adds here, because they are better lowered
1275 // to the register-immediate addressing modes.
1276 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1277 return false;
1278
1279 // Check if this particular node is reused in any non-memory related
1280 // operation. If yes, do not try to fold this node into the address
1281 // computation, since the computation will be kept.
1282 const SDNode *Node = N.getNode();
1283 for (SDNode *UI : Node->uses()) {
1284 if (!isa<MemSDNode>(*UI))
1285 return false;
1286 }
1287
1288 // Remember if it is worth folding N when it produces extended register.
1289 bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1290
1291 // Try to match a shifted extend on the RHS.
1292 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1293 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1294 Base = LHS;
1295 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1296 return true;
1297 }
1298
1299 // Try to match a shifted extend on the LHS.
1300 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1301 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1302 Base = RHS;
1303 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1304 return true;
1305 }
1306
1307 // There was no shift, whatever else we find.
1308 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1309
1310 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
1311 // Try to match an unshifted extend on the LHS.
1312 if (IsExtendedRegisterWorthFolding &&
1313 (Ext = getExtendTypeForNode(LHS, true)) !=
1314 AArch64_AM::InvalidShiftExtend) {
1315 Base = RHS;
1316 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1317 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1318 MVT::i32);
1319 if (isWorthFolding(LHS))
1320 return true;
1321 }
1322
1323 // Try to match an unshifted extend on the RHS.
1324 if (IsExtendedRegisterWorthFolding &&
1325 (Ext = getExtendTypeForNode(RHS, true)) !=
1326 AArch64_AM::InvalidShiftExtend) {
1327 Base = LHS;
1328 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1329 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1330 MVT::i32);
1331 if (isWorthFolding(RHS))
1332 return true;
1333 }
1334
1335 return false;
1336 }
1337
1338 // Check if the given immediate is preferred by ADD. If an immediate can be
1339 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1340 // encoded by one MOVZ, return true.
isPreferredADD(int64_t ImmOff)1341 static bool isPreferredADD(int64_t ImmOff) {
1342 // Constant in [0x0, 0xfff] can be encoded in ADD.
1343 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1344 return true;
1345 // Check if it can be encoded in an "ADD LSL #12".
1346 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1347 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1348 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1349 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1350 return false;
1351 }
1352
SelectAddrModeXRO(SDValue N,unsigned Size,SDValue & Base,SDValue & Offset,SDValue & SignExtend,SDValue & DoShift)1353 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1354 SDValue &Base, SDValue &Offset,
1355 SDValue &SignExtend,
1356 SDValue &DoShift) {
1357 if (N.getOpcode() != ISD::ADD)
1358 return false;
1359 SDValue LHS = N.getOperand(0);
1360 SDValue RHS = N.getOperand(1);
1361 SDLoc DL(N);
1362
1363 // Check if this particular node is reused in any non-memory related
1364 // operation. If yes, do not try to fold this node into the address
1365 // computation, since the computation will be kept.
1366 const SDNode *Node = N.getNode();
1367 for (SDNode *UI : Node->uses()) {
1368 if (!isa<MemSDNode>(*UI))
1369 return false;
1370 }
1371
1372 // Watch out if RHS is a wide immediate, it can not be selected into
1373 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1374 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1375 // instructions like:
1376 // MOV X0, WideImmediate
1377 // ADD X1, BaseReg, X0
1378 // LDR X2, [X1, 0]
1379 // For such situation, using [BaseReg, XReg] addressing mode can save one
1380 // ADD/SUB:
1381 // MOV X0, WideImmediate
1382 // LDR X2, [BaseReg, X0]
1383 if (isa<ConstantSDNode>(RHS)) {
1384 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
1385 unsigned Scale = Log2_32(Size);
1386 // Skip the immediate can be selected by load/store addressing mode.
1387 // Also skip the immediate can be encoded by a single ADD (SUB is also
1388 // checked by using -ImmOff).
1389 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
1390 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1391 return false;
1392
1393 SDValue Ops[] = { RHS };
1394 SDNode *MOVI =
1395 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1396 SDValue MOVIV = SDValue(MOVI, 0);
1397 // This ADD of two X register will be selected into [Reg+Reg] mode.
1398 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1399 }
1400
1401 // Remember if it is worth folding N when it produces extended register.
1402 bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
1403
1404 // Try to match a shifted extend on the RHS.
1405 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1406 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1407 Base = LHS;
1408 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1409 return true;
1410 }
1411
1412 // Try to match a shifted extend on the LHS.
1413 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1414 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1415 Base = RHS;
1416 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1417 return true;
1418 }
1419
1420 // Match any non-shifted, non-extend, non-immediate add expression.
1421 Base = LHS;
1422 Offset = RHS;
1423 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1424 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1425 // Reg1 + Reg2 is free: no check needed.
1426 return true;
1427 }
1428
createDTuple(ArrayRef<SDValue> Regs)1429 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1430 static const unsigned RegClassIDs[] = {
1431 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1432 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1433 AArch64::dsub2, AArch64::dsub3};
1434
1435 return createTuple(Regs, RegClassIDs, SubRegs);
1436 }
1437
createQTuple(ArrayRef<SDValue> Regs)1438 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1439 static const unsigned RegClassIDs[] = {
1440 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1441 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1442 AArch64::qsub2, AArch64::qsub3};
1443
1444 return createTuple(Regs, RegClassIDs, SubRegs);
1445 }
1446
createZTuple(ArrayRef<SDValue> Regs)1447 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1448 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1449 AArch64::ZPR3RegClassID,
1450 AArch64::ZPR4RegClassID};
1451 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1452 AArch64::zsub2, AArch64::zsub3};
1453
1454 return createTuple(Regs, RegClassIDs, SubRegs);
1455 }
1456
createTuple(ArrayRef<SDValue> Regs,const unsigned RegClassIDs[],const unsigned SubRegs[])1457 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1458 const unsigned RegClassIDs[],
1459 const unsigned SubRegs[]) {
1460 // There's no special register-class for a vector-list of 1 element: it's just
1461 // a vector.
1462 if (Regs.size() == 1)
1463 return Regs[0];
1464
1465 assert(Regs.size() >= 2 && Regs.size() <= 4);
1466
1467 SDLoc DL(Regs[0]);
1468
1469 SmallVector<SDValue, 4> Ops;
1470
1471 // First operand of REG_SEQUENCE is the desired RegClass.
1472 Ops.push_back(
1473 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1474
1475 // Then we get pairs of source & subregister-position for the components.
1476 for (unsigned i = 0; i < Regs.size(); ++i) {
1477 Ops.push_back(Regs[i]);
1478 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1479 }
1480
1481 SDNode *N =
1482 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1483 return SDValue(N, 0);
1484 }
1485
SelectTable(SDNode * N,unsigned NumVecs,unsigned Opc,bool isExt)1486 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1487 bool isExt) {
1488 SDLoc dl(N);
1489 EVT VT = N->getValueType(0);
1490
1491 unsigned ExtOff = isExt;
1492
1493 // Form a REG_SEQUENCE to force register allocation.
1494 unsigned Vec0Off = ExtOff + 1;
1495 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1496 N->op_begin() + Vec0Off + NumVecs);
1497 SDValue RegSeq = createQTuple(Regs);
1498
1499 SmallVector<SDValue, 6> Ops;
1500 if (isExt)
1501 Ops.push_back(N->getOperand(1));
1502 Ops.push_back(RegSeq);
1503 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1504 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1505 }
1506
tryIndexedLoad(SDNode * N)1507 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1508 LoadSDNode *LD = cast<LoadSDNode>(N);
1509 if (LD->isUnindexed())
1510 return false;
1511 EVT VT = LD->getMemoryVT();
1512 EVT DstVT = N->getValueType(0);
1513 ISD::MemIndexedMode AM = LD->getAddressingMode();
1514 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1515
1516 // We're not doing validity checking here. That was done when checking
1517 // if we should mark the load as indexed or not. We're just selecting
1518 // the right instruction.
1519 unsigned Opcode = 0;
1520
1521 ISD::LoadExtType ExtType = LD->getExtensionType();
1522 bool InsertTo64 = false;
1523 if (VT == MVT::i64)
1524 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1525 else if (VT == MVT::i32) {
1526 if (ExtType == ISD::NON_EXTLOAD)
1527 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1528 else if (ExtType == ISD::SEXTLOAD)
1529 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1530 else {
1531 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1532 InsertTo64 = true;
1533 // The result of the load is only i32. It's the subreg_to_reg that makes
1534 // it into an i64.
1535 DstVT = MVT::i32;
1536 }
1537 } else if (VT == MVT::i16) {
1538 if (ExtType == ISD::SEXTLOAD) {
1539 if (DstVT == MVT::i64)
1540 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1541 else
1542 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1543 } else {
1544 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1545 InsertTo64 = DstVT == MVT::i64;
1546 // The result of the load is only i32. It's the subreg_to_reg that makes
1547 // it into an i64.
1548 DstVT = MVT::i32;
1549 }
1550 } else if (VT == MVT::i8) {
1551 if (ExtType == ISD::SEXTLOAD) {
1552 if (DstVT == MVT::i64)
1553 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1554 else
1555 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1556 } else {
1557 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1558 InsertTo64 = DstVT == MVT::i64;
1559 // The result of the load is only i32. It's the subreg_to_reg that makes
1560 // it into an i64.
1561 DstVT = MVT::i32;
1562 }
1563 } else if (VT == MVT::f16) {
1564 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1565 } else if (VT == MVT::bf16) {
1566 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1567 } else if (VT == MVT::f32) {
1568 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1569 } else if (VT == MVT::f64 || VT.is64BitVector()) {
1570 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1571 } else if (VT.is128BitVector()) {
1572 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1573 } else
1574 return false;
1575 SDValue Chain = LD->getChain();
1576 SDValue Base = LD->getBasePtr();
1577 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1578 int OffsetVal = (int)OffsetOp->getZExtValue();
1579 SDLoc dl(N);
1580 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1581 SDValue Ops[] = { Base, Offset, Chain };
1582 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1583 MVT::Other, Ops);
1584
1585 // Transfer memoperands.
1586 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1587 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1588
1589 // Either way, we're replacing the node, so tell the caller that.
1590 SDValue LoadedVal = SDValue(Res, 1);
1591 if (InsertTo64) {
1592 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1593 LoadedVal =
1594 SDValue(CurDAG->getMachineNode(
1595 AArch64::SUBREG_TO_REG, dl, MVT::i64,
1596 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1597 SubReg),
1598 0);
1599 }
1600
1601 ReplaceUses(SDValue(N, 0), LoadedVal);
1602 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1603 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1604 CurDAG->RemoveDeadNode(N);
1605 return true;
1606 }
1607
SelectLoad(SDNode * N,unsigned NumVecs,unsigned Opc,unsigned SubRegIdx)1608 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1609 unsigned SubRegIdx) {
1610 SDLoc dl(N);
1611 EVT VT = N->getValueType(0);
1612 SDValue Chain = N->getOperand(0);
1613
1614 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1615 Chain};
1616
1617 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1618
1619 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1620 SDValue SuperReg = SDValue(Ld, 0);
1621 for (unsigned i = 0; i < NumVecs; ++i)
1622 ReplaceUses(SDValue(N, i),
1623 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1624
1625 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1626
1627 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1628 // because it's too simple to have needed special treatment during lowering.
1629 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1630 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1631 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1632 }
1633
1634 CurDAG->RemoveDeadNode(N);
1635 }
1636
SelectPostLoad(SDNode * N,unsigned NumVecs,unsigned Opc,unsigned SubRegIdx)1637 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1638 unsigned Opc, unsigned SubRegIdx) {
1639 SDLoc dl(N);
1640 EVT VT = N->getValueType(0);
1641 SDValue Chain = N->getOperand(0);
1642
1643 SDValue Ops[] = {N->getOperand(1), // Mem operand
1644 N->getOperand(2), // Incremental
1645 Chain};
1646
1647 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1648 MVT::Untyped, MVT::Other};
1649
1650 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1651
1652 // Update uses of write back register
1653 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1654
1655 // Update uses of vector list
1656 SDValue SuperReg = SDValue(Ld, 1);
1657 if (NumVecs == 1)
1658 ReplaceUses(SDValue(N, 0), SuperReg);
1659 else
1660 for (unsigned i = 0; i < NumVecs; ++i)
1661 ReplaceUses(SDValue(N, i),
1662 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1663
1664 // Update the chain
1665 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1666 CurDAG->RemoveDeadNode(N);
1667 }
1668
1669 /// Optimize \param OldBase and \param OldOffset selecting the best addressing
1670 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1671 /// new Base and an SDValue representing the new offset.
1672 std::tuple<unsigned, SDValue, SDValue>
findAddrModeSVELoadStore(SDNode * N,unsigned Opc_rr,unsigned Opc_ri,const SDValue & OldBase,const SDValue & OldOffset,unsigned Scale)1673 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1674 unsigned Opc_ri,
1675 const SDValue &OldBase,
1676 const SDValue &OldOffset,
1677 unsigned Scale) {
1678 SDValue NewBase = OldBase;
1679 SDValue NewOffset = OldOffset;
1680 // Detect a possible Reg+Imm addressing mode.
1681 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1682 N, OldBase, NewBase, NewOffset);
1683
1684 // Detect a possible reg+reg addressing mode, but only if we haven't already
1685 // detected a Reg+Imm one.
1686 const bool IsRegReg =
1687 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1688
1689 // Select the instruction.
1690 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1691 }
1692
1693 enum class SelectTypeKind {
1694 Int1 = 0,
1695 };
1696
1697 /// This function selects an opcode from a list of opcodes, which is
1698 /// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1699 /// element types, in this order.
1700 template <SelectTypeKind Kind>
SelectOpcodeFromVT(EVT VT,ArrayRef<unsigned> Opcodes)1701 static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1702 // Only match scalable vector VTs
1703 if (!VT.isScalableVector())
1704 return 0;
1705
1706 EVT EltVT = VT.getVectorElementType();
1707 switch (Kind) {
1708 case SelectTypeKind::Int1:
1709 if (EltVT != MVT::i1)
1710 return 0;
1711 break;
1712 }
1713
1714 unsigned Offset;
1715 switch (VT.getVectorMinNumElements()) {
1716 case 16: // 8-bit
1717 Offset = 0;
1718 break;
1719 case 8: // 16-bit
1720 Offset = 1;
1721 break;
1722 case 4: // 32-bit
1723 Offset = 2;
1724 break;
1725 case 2: // 64-bit
1726 Offset = 3;
1727 break;
1728 default:
1729 return 0;
1730 }
1731
1732 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1733 }
1734
SelectWhilePair(SDNode * N,unsigned Opc)1735 void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1736 SDLoc DL(N);
1737 EVT VT = N->getValueType(0);
1738
1739 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1740
1741 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1742 SDValue SuperReg = SDValue(WhilePair, 0);
1743
1744 for (unsigned I = 0; I < 2; ++I)
1745 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1746 AArch64::psub0 + I, DL, VT, SuperReg));
1747
1748 CurDAG->RemoveDeadNode(N);
1749 }
1750
SelectCVTIntrinsic(SDNode * N,unsigned NumVecs,unsigned Opcode)1751 void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1752 unsigned Opcode) {
1753 EVT VT = N->getValueType(0);
1754 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1755 SDValue Ops = createZTuple(Regs);
1756 SDLoc DL(N);
1757 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1758 SDValue SuperReg = SDValue(Intrinsic, 0);
1759 for (unsigned i = 0; i < NumVecs; ++i)
1760 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1761 AArch64::zsub0 + i, DL, VT, SuperReg));
1762
1763 CurDAG->RemoveDeadNode(N);
1764 return;
1765 }
1766
SelectPredicatedLoad(SDNode * N,unsigned NumVecs,unsigned Scale,unsigned Opc_ri,unsigned Opc_rr,bool IsIntr)1767 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1768 unsigned Scale, unsigned Opc_ri,
1769 unsigned Opc_rr, bool IsIntr) {
1770 assert(Scale < 4 && "Invalid scaling value.");
1771 SDLoc DL(N);
1772 EVT VT = N->getValueType(0);
1773 SDValue Chain = N->getOperand(0);
1774
1775 // Optimize addressing mode.
1776 SDValue Base, Offset;
1777 unsigned Opc;
1778 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1779 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1780 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1781
1782 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1783 Base, // Memory operand
1784 Offset, Chain};
1785
1786 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1787
1788 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1789 SDValue SuperReg = SDValue(Load, 0);
1790 for (unsigned i = 0; i < NumVecs; ++i)
1791 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1792 AArch64::zsub0 + i, DL, VT, SuperReg));
1793
1794 // Copy chain
1795 unsigned ChainIdx = NumVecs;
1796 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1797 CurDAG->RemoveDeadNode(N);
1798 }
1799
SelectStore(SDNode * N,unsigned NumVecs,unsigned Opc)1800 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
1801 unsigned Opc) {
1802 SDLoc dl(N);
1803 EVT VT = N->getOperand(2)->getValueType(0);
1804
1805 // Form a REG_SEQUENCE to force register allocation.
1806 bool Is128Bit = VT.getSizeInBits() == 128;
1807 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1808 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1809
1810 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
1811 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1812
1813 // Transfer memoperands.
1814 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1815 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
1816
1817 ReplaceNode(N, St);
1818 }
1819
SelectPredicatedStore(SDNode * N,unsigned NumVecs,unsigned Scale,unsigned Opc_rr,unsigned Opc_ri)1820 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
1821 unsigned Scale, unsigned Opc_rr,
1822 unsigned Opc_ri) {
1823 SDLoc dl(N);
1824
1825 // Form a REG_SEQUENCE to force register allocation.
1826 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1827 SDValue RegSeq = createZTuple(Regs);
1828
1829 // Optimize addressing mode.
1830 unsigned Opc;
1831 SDValue Offset, Base;
1832 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1833 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
1834 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
1835
1836 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
1837 Base, // address
1838 Offset, // offset
1839 N->getOperand(0)}; // chain
1840 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
1841
1842 ReplaceNode(N, St);
1843 }
1844
SelectAddrModeFrameIndexSVE(SDValue N,SDValue & Base,SDValue & OffImm)1845 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
1846 SDValue &OffImm) {
1847 SDLoc dl(N);
1848 const DataLayout &DL = CurDAG->getDataLayout();
1849 const TargetLowering *TLI = getTargetLowering();
1850
1851 // Try to match it for the frame address
1852 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
1853 int FI = FINode->getIndex();
1854 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1855 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1856 return true;
1857 }
1858
1859 return false;
1860 }
1861
SelectPostStore(SDNode * N,unsigned NumVecs,unsigned Opc)1862 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
1863 unsigned Opc) {
1864 SDLoc dl(N);
1865 EVT VT = N->getOperand(2)->getValueType(0);
1866 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1867 MVT::Other}; // Type for the Chain
1868
1869 // Form a REG_SEQUENCE to force register allocation.
1870 bool Is128Bit = VT.getSizeInBits() == 128;
1871 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1872 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
1873
1874 SDValue Ops[] = {RegSeq,
1875 N->getOperand(NumVecs + 1), // base register
1876 N->getOperand(NumVecs + 2), // Incremental
1877 N->getOperand(0)}; // Chain
1878 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1879
1880 ReplaceNode(N, St);
1881 }
1882
1883 namespace {
1884 /// WidenVector - Given a value in the V64 register class, produce the
1885 /// equivalent value in the V128 register class.
1886 class WidenVector {
1887 SelectionDAG &DAG;
1888
1889 public:
WidenVector(SelectionDAG & DAG)1890 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
1891
operator ()(SDValue V64Reg)1892 SDValue operator()(SDValue V64Reg) {
1893 EVT VT = V64Reg.getValueType();
1894 unsigned NarrowSize = VT.getVectorNumElements();
1895 MVT EltTy = VT.getVectorElementType().getSimpleVT();
1896 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
1897 SDLoc DL(V64Reg);
1898
1899 SDValue Undef =
1900 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
1901 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
1902 }
1903 };
1904 } // namespace
1905
1906 /// NarrowVector - Given a value in the V128 register class, produce the
1907 /// equivalent value in the V64 register class.
NarrowVector(SDValue V128Reg,SelectionDAG & DAG)1908 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
1909 EVT VT = V128Reg.getValueType();
1910 unsigned WideSize = VT.getVectorNumElements();
1911 MVT EltTy = VT.getVectorElementType().getSimpleVT();
1912 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
1913
1914 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
1915 V128Reg);
1916 }
1917
SelectLoadLane(SDNode * N,unsigned NumVecs,unsigned Opc)1918 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
1919 unsigned Opc) {
1920 SDLoc dl(N);
1921 EVT VT = N->getValueType(0);
1922 bool Narrow = VT.getSizeInBits() == 64;
1923
1924 // Form a REG_SEQUENCE to force register allocation.
1925 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
1926
1927 if (Narrow)
1928 transform(Regs, Regs.begin(),
1929 WidenVector(*CurDAG));
1930
1931 SDValue RegSeq = createQTuple(Regs);
1932
1933 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1934
1935 unsigned LaneNo =
1936 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
1937
1938 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
1939 N->getOperand(NumVecs + 3), N->getOperand(0)};
1940 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1941 SDValue SuperReg = SDValue(Ld, 0);
1942
1943 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1944 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1945 AArch64::qsub2, AArch64::qsub3 };
1946 for (unsigned i = 0; i < NumVecs; ++i) {
1947 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
1948 if (Narrow)
1949 NV = NarrowVector(NV, *CurDAG);
1950 ReplaceUses(SDValue(N, i), NV);
1951 }
1952
1953 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1954 CurDAG->RemoveDeadNode(N);
1955 }
1956
SelectPostLoadLane(SDNode * N,unsigned NumVecs,unsigned Opc)1957 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
1958 unsigned Opc) {
1959 SDLoc dl(N);
1960 EVT VT = N->getValueType(0);
1961 bool Narrow = VT.getSizeInBits() == 64;
1962
1963 // Form a REG_SEQUENCE to force register allocation.
1964 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1965
1966 if (Narrow)
1967 transform(Regs, Regs.begin(),
1968 WidenVector(*CurDAG));
1969
1970 SDValue RegSeq = createQTuple(Regs);
1971
1972 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1973 RegSeq->getValueType(0), MVT::Other};
1974
1975 unsigned LaneNo =
1976 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
1977
1978 SDValue Ops[] = {RegSeq,
1979 CurDAG->getTargetConstant(LaneNo, dl,
1980 MVT::i64), // Lane Number
1981 N->getOperand(NumVecs + 2), // Base register
1982 N->getOperand(NumVecs + 3), // Incremental
1983 N->getOperand(0)};
1984 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1985
1986 // Update uses of the write back register
1987 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1988
1989 // Update uses of the vector list
1990 SDValue SuperReg = SDValue(Ld, 1);
1991 if (NumVecs == 1) {
1992 ReplaceUses(SDValue(N, 0),
1993 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
1994 } else {
1995 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
1996 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
1997 AArch64::qsub2, AArch64::qsub3 };
1998 for (unsigned i = 0; i < NumVecs; ++i) {
1999 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2000 SuperReg);
2001 if (Narrow)
2002 NV = NarrowVector(NV, *CurDAG);
2003 ReplaceUses(SDValue(N, i), NV);
2004 }
2005 }
2006
2007 // Update the Chain
2008 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2009 CurDAG->RemoveDeadNode(N);
2010 }
2011
SelectStoreLane(SDNode * N,unsigned NumVecs,unsigned Opc)2012 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2013 unsigned Opc) {
2014 SDLoc dl(N);
2015 EVT VT = N->getOperand(2)->getValueType(0);
2016 bool Narrow = VT.getSizeInBits() == 64;
2017
2018 // Form a REG_SEQUENCE to force register allocation.
2019 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2020
2021 if (Narrow)
2022 transform(Regs, Regs.begin(),
2023 WidenVector(*CurDAG));
2024
2025 SDValue RegSeq = createQTuple(Regs);
2026
2027 unsigned LaneNo =
2028 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
2029
2030 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2031 N->getOperand(NumVecs + 3), N->getOperand(0)};
2032 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2033
2034 // Transfer memoperands.
2035 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2036 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2037
2038 ReplaceNode(N, St);
2039 }
2040
SelectPostStoreLane(SDNode * N,unsigned NumVecs,unsigned Opc)2041 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2042 unsigned Opc) {
2043 SDLoc dl(N);
2044 EVT VT = N->getOperand(2)->getValueType(0);
2045 bool Narrow = VT.getSizeInBits() == 64;
2046
2047 // Form a REG_SEQUENCE to force register allocation.
2048 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2049
2050 if (Narrow)
2051 transform(Regs, Regs.begin(),
2052 WidenVector(*CurDAG));
2053
2054 SDValue RegSeq = createQTuple(Regs);
2055
2056 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2057 MVT::Other};
2058
2059 unsigned LaneNo =
2060 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
2061
2062 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2063 N->getOperand(NumVecs + 2), // Base Register
2064 N->getOperand(NumVecs + 3), // Incremental
2065 N->getOperand(0)};
2066 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2067
2068 // Transfer memoperands.
2069 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2070 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2071
2072 ReplaceNode(N, St);
2073 }
2074
isBitfieldExtractOpFromAnd(SelectionDAG * CurDAG,SDNode * N,unsigned & Opc,SDValue & Opd0,unsigned & LSB,unsigned & MSB,unsigned NumberOfIgnoredLowBits,bool BiggerPattern)2075 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
2076 unsigned &Opc, SDValue &Opd0,
2077 unsigned &LSB, unsigned &MSB,
2078 unsigned NumberOfIgnoredLowBits,
2079 bool BiggerPattern) {
2080 assert(N->getOpcode() == ISD::AND &&
2081 "N must be a AND operation to call this function");
2082
2083 EVT VT = N->getValueType(0);
2084
2085 // Here we can test the type of VT and return false when the type does not
2086 // match, but since it is done prior to that call in the current context
2087 // we turned that into an assert to avoid redundant code.
2088 assert((VT == MVT::i32 || VT == MVT::i64) &&
2089 "Type checking must have been done before calling this function");
2090
2091 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2092 // changed the AND node to a 32-bit mask operation. We'll have to
2093 // undo that as part of the transform here if we want to catch all
2094 // the opportunities.
2095 // Currently the NumberOfIgnoredLowBits argument helps to recover
2096 // from these situations when matching bigger pattern (bitfield insert).
2097
2098 // For unsigned extracts, check for a shift right and mask
2099 uint64_t AndImm = 0;
2100 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2101 return false;
2102
2103 const SDNode *Op0 = N->getOperand(0).getNode();
2104
2105 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2106 // simplified. Try to undo that
2107 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2108
2109 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2110 if (AndImm & (AndImm + 1))
2111 return false;
2112
2113 bool ClampMSB = false;
2114 uint64_t SrlImm = 0;
2115 // Handle the SRL + ANY_EXTEND case.
2116 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2117 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2118 // Extend the incoming operand of the SRL to 64-bit.
2119 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2120 // Make sure to clamp the MSB so that we preserve the semantics of the
2121 // original operations.
2122 ClampMSB = true;
2123 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2124 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
2125 SrlImm)) {
2126 // If the shift result was truncated, we can still combine them.
2127 Opd0 = Op0->getOperand(0).getOperand(0);
2128
2129 // Use the type of SRL node.
2130 VT = Opd0->getValueType(0);
2131 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2132 Opd0 = Op0->getOperand(0);
2133 ClampMSB = (VT == MVT::i32);
2134 } else if (BiggerPattern) {
2135 // Let's pretend a 0 shift right has been performed.
2136 // The resulting code will be at least as good as the original one
2137 // plus it may expose more opportunities for bitfield insert pattern.
2138 // FIXME: Currently we limit this to the bigger pattern, because
2139 // some optimizations expect AND and not UBFM.
2140 Opd0 = N->getOperand(0);
2141 } else
2142 return false;
2143
2144 // Bail out on large immediates. This happens when no proper
2145 // combining/constant folding was performed.
2146 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2147 LLVM_DEBUG(
2148 (dbgs() << N
2149 << ": Found large shift immediate, this should not happen\n"));
2150 return false;
2151 }
2152
2153 LSB = SrlImm;
2154 MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
2155 : countTrailingOnes<uint64_t>(AndImm)) -
2156 1;
2157 if (ClampMSB)
2158 // Since we're moving the extend before the right shift operation, we need
2159 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2160 // the zeros which would get shifted in with the original right shift
2161 // operation.
2162 MSB = MSB > 31 ? 31 : MSB;
2163
2164 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2165 return true;
2166 }
2167
isBitfieldExtractOpFromSExtInReg(SDNode * N,unsigned & Opc,SDValue & Opd0,unsigned & Immr,unsigned & Imms)2168 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2169 SDValue &Opd0, unsigned &Immr,
2170 unsigned &Imms) {
2171 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2172
2173 EVT VT = N->getValueType(0);
2174 unsigned BitWidth = VT.getSizeInBits();
2175 assert((VT == MVT::i32 || VT == MVT::i64) &&
2176 "Type checking must have been done before calling this function");
2177
2178 SDValue Op = N->getOperand(0);
2179 if (Op->getOpcode() == ISD::TRUNCATE) {
2180 Op = Op->getOperand(0);
2181 VT = Op->getValueType(0);
2182 BitWidth = VT.getSizeInBits();
2183 }
2184
2185 uint64_t ShiftImm;
2186 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2187 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2188 return false;
2189
2190 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2191 if (ShiftImm + Width > BitWidth)
2192 return false;
2193
2194 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2195 Opd0 = Op.getOperand(0);
2196 Immr = ShiftImm;
2197 Imms = ShiftImm + Width - 1;
2198 return true;
2199 }
2200
isSeveralBitsExtractOpFromShr(SDNode * N,unsigned & Opc,SDValue & Opd0,unsigned & LSB,unsigned & MSB)2201 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2202 SDValue &Opd0, unsigned &LSB,
2203 unsigned &MSB) {
2204 // We are looking for the following pattern which basically extracts several
2205 // continuous bits from the source value and places it from the LSB of the
2206 // destination value, all other bits of the destination value or set to zero:
2207 //
2208 // Value2 = AND Value, MaskImm
2209 // SRL Value2, ShiftImm
2210 //
2211 // with MaskImm >> ShiftImm to search for the bit width.
2212 //
2213 // This gets selected into a single UBFM:
2214 //
2215 // UBFM Value, ShiftImm, findLastSet(MaskImm)
2216 //
2217
2218 if (N->getOpcode() != ISD::SRL)
2219 return false;
2220
2221 uint64_t AndMask = 0;
2222 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2223 return false;
2224
2225 Opd0 = N->getOperand(0).getOperand(0);
2226
2227 uint64_t SrlImm = 0;
2228 if (!isIntImmediate(N->getOperand(1), SrlImm))
2229 return false;
2230
2231 // Check whether we really have several bits extract here.
2232 if (!isMask_64(AndMask >> SrlImm))
2233 return false;
2234
2235 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2236 LSB = SrlImm;
2237 MSB = findLastSet(AndMask, ZB_Undefined);
2238 return true;
2239 }
2240
isBitfieldExtractOpFromShr(SDNode * N,unsigned & Opc,SDValue & Opd0,unsigned & Immr,unsigned & Imms,bool BiggerPattern)2241 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2242 unsigned &Immr, unsigned &Imms,
2243 bool BiggerPattern) {
2244 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2245 "N must be a SHR/SRA operation to call this function");
2246
2247 EVT VT = N->getValueType(0);
2248
2249 // Here we can test the type of VT and return false when the type does not
2250 // match, but since it is done prior to that call in the current context
2251 // we turned that into an assert to avoid redundant code.
2252 assert((VT == MVT::i32 || VT == MVT::i64) &&
2253 "Type checking must have been done before calling this function");
2254
2255 // Check for AND + SRL doing several bits extract.
2256 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2257 return true;
2258
2259 // We're looking for a shift of a shift.
2260 uint64_t ShlImm = 0;
2261 uint64_t TruncBits = 0;
2262 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2263 Opd0 = N->getOperand(0).getOperand(0);
2264 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2265 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2266 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2267 // be considered as setting high 32 bits as zero. Our strategy here is to
2268 // always generate 64bit UBFM. This consistency will help the CSE pass
2269 // later find more redundancy.
2270 Opd0 = N->getOperand(0).getOperand(0);
2271 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2272 VT = Opd0.getValueType();
2273 assert(VT == MVT::i64 && "the promoted type should be i64");
2274 } else if (BiggerPattern) {
2275 // Let's pretend a 0 shift left has been performed.
2276 // FIXME: Currently we limit this to the bigger pattern case,
2277 // because some optimizations expect AND and not UBFM
2278 Opd0 = N->getOperand(0);
2279 } else
2280 return false;
2281
2282 // Missing combines/constant folding may have left us with strange
2283 // constants.
2284 if (ShlImm >= VT.getSizeInBits()) {
2285 LLVM_DEBUG(
2286 (dbgs() << N
2287 << ": Found large shift immediate, this should not happen\n"));
2288 return false;
2289 }
2290
2291 uint64_t SrlImm = 0;
2292 if (!isIntImmediate(N->getOperand(1), SrlImm))
2293 return false;
2294
2295 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2296 "bad amount in shift node!");
2297 int immr = SrlImm - ShlImm;
2298 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2299 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2300 // SRA requires a signed extraction
2301 if (VT == MVT::i32)
2302 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2303 else
2304 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2305 return true;
2306 }
2307
tryBitfieldExtractOpFromSExt(SDNode * N)2308 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2309 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2310
2311 EVT VT = N->getValueType(0);
2312 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2313 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2314 return false;
2315
2316 uint64_t ShiftImm;
2317 SDValue Op = N->getOperand(0);
2318 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2319 return false;
2320
2321 SDLoc dl(N);
2322 // Extend the incoming operand of the shift to 64-bits.
2323 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2324 unsigned Immr = ShiftImm;
2325 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2326 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2327 CurDAG->getTargetConstant(Imms, dl, VT)};
2328 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2329 return true;
2330 }
2331
2332 /// Try to form fcvtl2 instructions from a floating-point extend of a high-half
2333 /// extract of a subvector.
tryHighFPExt(SDNode * N)2334 bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) {
2335 assert(N->getOpcode() == ISD::FP_EXTEND);
2336
2337 // There are 2 forms of fcvtl2 - extend to double or extend to float.
2338 SDValue Extract = N->getOperand(0);
2339 EVT VT = N->getValueType(0);
2340 EVT NarrowVT = Extract.getValueType();
2341 if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) &&
2342 (VT != MVT::v4f32 || NarrowVT != MVT::v4f16))
2343 return false;
2344
2345 // Optionally look past a bitcast.
2346 Extract = peekThroughBitcasts(Extract);
2347 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2348 return false;
2349
2350 // Match extract from start of high half index.
2351 // Example: v8i16 -> v4i16 means the extract must begin at index 4.
2352 unsigned ExtractIndex = Extract.getConstantOperandVal(1);
2353 if (ExtractIndex != Extract.getValueType().getVectorNumElements())
2354 return false;
2355
2356 auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
2357 CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0));
2358 return true;
2359 }
2360
isBitfieldExtractOp(SelectionDAG * CurDAG,SDNode * N,unsigned & Opc,SDValue & Opd0,unsigned & Immr,unsigned & Imms,unsigned NumberOfIgnoredLowBits=0,bool BiggerPattern=false)2361 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2362 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2363 unsigned NumberOfIgnoredLowBits = 0,
2364 bool BiggerPattern = false) {
2365 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2366 return false;
2367
2368 switch (N->getOpcode()) {
2369 default:
2370 if (!N->isMachineOpcode())
2371 return false;
2372 break;
2373 case ISD::AND:
2374 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2375 NumberOfIgnoredLowBits, BiggerPattern);
2376 case ISD::SRL:
2377 case ISD::SRA:
2378 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2379
2380 case ISD::SIGN_EXTEND_INREG:
2381 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2382 }
2383
2384 unsigned NOpc = N->getMachineOpcode();
2385 switch (NOpc) {
2386 default:
2387 return false;
2388 case AArch64::SBFMWri:
2389 case AArch64::UBFMWri:
2390 case AArch64::SBFMXri:
2391 case AArch64::UBFMXri:
2392 Opc = NOpc;
2393 Opd0 = N->getOperand(0);
2394 Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
2395 Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
2396 return true;
2397 }
2398 // Unreachable
2399 return false;
2400 }
2401
tryBitfieldExtractOp(SDNode * N)2402 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2403 unsigned Opc, Immr, Imms;
2404 SDValue Opd0;
2405 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2406 return false;
2407
2408 EVT VT = N->getValueType(0);
2409 SDLoc dl(N);
2410
2411 // If the bit extract operation is 64bit but the original type is 32bit, we
2412 // need to add one EXTRACT_SUBREG.
2413 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2414 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2415 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2416
2417 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2418 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
2419 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2420 MVT::i32, SDValue(BFM, 0), SubReg));
2421 return true;
2422 }
2423
2424 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2425 CurDAG->getTargetConstant(Imms, dl, VT)};
2426 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2427 return true;
2428 }
2429
2430 /// Does DstMask form a complementary pair with the mask provided by
2431 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2432 /// this asks whether DstMask zeroes precisely those bits that will be set by
2433 /// the other half.
isBitfieldDstMask(uint64_t DstMask,const APInt & BitsToBeInserted,unsigned NumberOfIgnoredHighBits,EVT VT)2434 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2435 unsigned NumberOfIgnoredHighBits, EVT VT) {
2436 assert((VT == MVT::i32 || VT == MVT::i64) &&
2437 "i32 or i64 mask type expected!");
2438 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2439
2440 APInt SignificantDstMask = APInt(BitWidth, DstMask);
2441 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2442
2443 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2444 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2445 }
2446
2447 // Look for bits that will be useful for later uses.
2448 // A bit is consider useless as soon as it is dropped and never used
2449 // before it as been dropped.
2450 // E.g., looking for useful bit of x
2451 // 1. y = x & 0x7
2452 // 2. z = y >> 2
2453 // After #1, x useful bits are 0x7, then the useful bits of x, live through
2454 // y.
2455 // After #2, the useful bits of x are 0x4.
2456 // However, if x is used on an unpredicatable instruction, then all its bits
2457 // are useful.
2458 // E.g.
2459 // 1. y = x & 0x7
2460 // 2. z = y >> 2
2461 // 3. str x, [@x]
2462 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2463
getUsefulBitsFromAndWithImmediate(SDValue Op,APInt & UsefulBits,unsigned Depth)2464 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
2465 unsigned Depth) {
2466 uint64_t Imm =
2467 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2468 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2469 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2470 getUsefulBits(Op, UsefulBits, Depth + 1);
2471 }
2472
getUsefulBitsFromBitfieldMoveOpd(SDValue Op,APInt & UsefulBits,uint64_t Imm,uint64_t MSB,unsigned Depth)2473 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
2474 uint64_t Imm, uint64_t MSB,
2475 unsigned Depth) {
2476 // inherit the bitwidth value
2477 APInt OpUsefulBits(UsefulBits);
2478 OpUsefulBits = 1;
2479
2480 if (MSB >= Imm) {
2481 OpUsefulBits <<= MSB - Imm + 1;
2482 --OpUsefulBits;
2483 // The interesting part will be in the lower part of the result
2484 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2485 // The interesting part was starting at Imm in the argument
2486 OpUsefulBits <<= Imm;
2487 } else {
2488 OpUsefulBits <<= MSB + 1;
2489 --OpUsefulBits;
2490 // The interesting part will be shifted in the result
2491 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2492 getUsefulBits(Op, OpUsefulBits, Depth + 1);
2493 // The interesting part was at zero in the argument
2494 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2495 }
2496
2497 UsefulBits &= OpUsefulBits;
2498 }
2499
getUsefulBitsFromUBFM(SDValue Op,APInt & UsefulBits,unsigned Depth)2500 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2501 unsigned Depth) {
2502 uint64_t Imm =
2503 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2504 uint64_t MSB =
2505 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2506
2507 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2508 }
2509
getUsefulBitsFromOrWithShiftedReg(SDValue Op,APInt & UsefulBits,unsigned Depth)2510 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
2511 unsigned Depth) {
2512 uint64_t ShiftTypeAndValue =
2513 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2514 APInt Mask(UsefulBits);
2515 Mask.clearAllBits();
2516 Mask.flipAllBits();
2517
2518 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2519 // Shift Left
2520 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2521 Mask <<= ShiftAmt;
2522 getUsefulBits(Op, Mask, Depth + 1);
2523 Mask.lshrInPlace(ShiftAmt);
2524 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2525 // Shift Right
2526 // We do not handle AArch64_AM::ASR, because the sign will change the
2527 // number of useful bits
2528 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2529 Mask.lshrInPlace(ShiftAmt);
2530 getUsefulBits(Op, Mask, Depth + 1);
2531 Mask <<= ShiftAmt;
2532 } else
2533 return;
2534
2535 UsefulBits &= Mask;
2536 }
2537
getUsefulBitsFromBFM(SDValue Op,SDValue Orig,APInt & UsefulBits,unsigned Depth)2538 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2539 unsigned Depth) {
2540 uint64_t Imm =
2541 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2542 uint64_t MSB =
2543 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2544
2545 APInt OpUsefulBits(UsefulBits);
2546 OpUsefulBits = 1;
2547
2548 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2549 ResultUsefulBits.flipAllBits();
2550 APInt Mask(UsefulBits.getBitWidth(), 0);
2551
2552 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2553
2554 if (MSB >= Imm) {
2555 // The instruction is a BFXIL.
2556 uint64_t Width = MSB - Imm + 1;
2557 uint64_t LSB = Imm;
2558
2559 OpUsefulBits <<= Width;
2560 --OpUsefulBits;
2561
2562 if (Op.getOperand(1) == Orig) {
2563 // Copy the low bits from the result to bits starting from LSB.
2564 Mask = ResultUsefulBits & OpUsefulBits;
2565 Mask <<= LSB;
2566 }
2567
2568 if (Op.getOperand(0) == Orig)
2569 // Bits starting from LSB in the input contribute to the result.
2570 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2571 } else {
2572 // The instruction is a BFI.
2573 uint64_t Width = MSB + 1;
2574 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2575
2576 OpUsefulBits <<= Width;
2577 --OpUsefulBits;
2578 OpUsefulBits <<= LSB;
2579
2580 if (Op.getOperand(1) == Orig) {
2581 // Copy the bits from the result to the zero bits.
2582 Mask = ResultUsefulBits & OpUsefulBits;
2583 Mask.lshrInPlace(LSB);
2584 }
2585
2586 if (Op.getOperand(0) == Orig)
2587 Mask |= (ResultUsefulBits & ~OpUsefulBits);
2588 }
2589
2590 UsefulBits &= Mask;
2591 }
2592
getUsefulBitsForUse(SDNode * UserNode,APInt & UsefulBits,SDValue Orig,unsigned Depth)2593 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2594 SDValue Orig, unsigned Depth) {
2595
2596 // Users of this node should have already been instruction selected
2597 // FIXME: Can we turn that into an assert?
2598 if (!UserNode->isMachineOpcode())
2599 return;
2600
2601 switch (UserNode->getMachineOpcode()) {
2602 default:
2603 return;
2604 case AArch64::ANDSWri:
2605 case AArch64::ANDSXri:
2606 case AArch64::ANDWri:
2607 case AArch64::ANDXri:
2608 // We increment Depth only when we call the getUsefulBits
2609 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2610 Depth);
2611 case AArch64::UBFMWri:
2612 case AArch64::UBFMXri:
2613 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2614
2615 case AArch64::ORRWrs:
2616 case AArch64::ORRXrs:
2617 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2618 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2619 Depth);
2620 return;
2621 case AArch64::BFMWri:
2622 case AArch64::BFMXri:
2623 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2624
2625 case AArch64::STRBBui:
2626 case AArch64::STURBBi:
2627 if (UserNode->getOperand(0) != Orig)
2628 return;
2629 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2630 return;
2631
2632 case AArch64::STRHHui:
2633 case AArch64::STURHHi:
2634 if (UserNode->getOperand(0) != Orig)
2635 return;
2636 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2637 return;
2638 }
2639 }
2640
getUsefulBits(SDValue Op,APInt & UsefulBits,unsigned Depth)2641 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2642 if (Depth >= SelectionDAG::MaxRecursionDepth)
2643 return;
2644 // Initialize UsefulBits
2645 if (!Depth) {
2646 unsigned Bitwidth = Op.getScalarValueSizeInBits();
2647 // At the beginning, assume every produced bits is useful
2648 UsefulBits = APInt(Bitwidth, 0);
2649 UsefulBits.flipAllBits();
2650 }
2651 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2652
2653 for (SDNode *Node : Op.getNode()->uses()) {
2654 // A use cannot produce useful bits
2655 APInt UsefulBitsForUse = APInt(UsefulBits);
2656 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2657 UsersUsefulBits |= UsefulBitsForUse;
2658 }
2659 // UsefulBits contains the produced bits that are meaningful for the
2660 // current definition, thus a user cannot make a bit meaningful at
2661 // this point
2662 UsefulBits &= UsersUsefulBits;
2663 }
2664
2665 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
2666 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
2667 /// 0, return Op unchanged.
getLeftShift(SelectionDAG * CurDAG,SDValue Op,int ShlAmount)2668 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
2669 if (ShlAmount == 0)
2670 return Op;
2671
2672 EVT VT = Op.getValueType();
2673 SDLoc dl(Op);
2674 unsigned BitWidth = VT.getSizeInBits();
2675 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2676
2677 SDNode *ShiftNode;
2678 if (ShlAmount > 0) {
2679 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
2680 ShiftNode = CurDAG->getMachineNode(
2681 UBFMOpc, dl, VT, Op,
2682 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
2683 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
2684 } else {
2685 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
2686 assert(ShlAmount < 0 && "expected right shift");
2687 int ShrAmount = -ShlAmount;
2688 ShiftNode = CurDAG->getMachineNode(
2689 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
2690 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
2691 }
2692
2693 return SDValue(ShiftNode, 0);
2694 }
2695
2696 // For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
2697 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
2698 bool BiggerPattern,
2699 const uint64_t NonZeroBits,
2700 SDValue &Src, int &DstLSB,
2701 int &Width);
2702
2703 // For bit-field-positioning pattern "shl VAL, N)".
2704 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
2705 bool BiggerPattern,
2706 const uint64_t NonZeroBits,
2707 SDValue &Src, int &DstLSB,
2708 int &Width);
2709
2710 /// Does this tree qualify as an attempt to move a bitfield into position,
2711 /// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
isBitfieldPositioningOp(SelectionDAG * CurDAG,SDValue Op,bool BiggerPattern,SDValue & Src,int & DstLSB,int & Width)2712 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
2713 bool BiggerPattern, SDValue &Src,
2714 int &DstLSB, int &Width) {
2715 EVT VT = Op.getValueType();
2716 unsigned BitWidth = VT.getSizeInBits();
2717 (void)BitWidth;
2718 assert(BitWidth == 32 || BitWidth == 64);
2719
2720 KnownBits Known = CurDAG->computeKnownBits(Op);
2721
2722 // Non-zero in the sense that they're not provably zero, which is the key
2723 // point if we want to use this value
2724 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
2725 if (!isShiftedMask_64(NonZeroBits))
2726 return false;
2727
2728 switch (Op.getOpcode()) {
2729 default:
2730 break;
2731 case ISD::AND:
2732 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
2733 NonZeroBits, Src, DstLSB, Width);
2734 case ISD::SHL:
2735 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
2736 NonZeroBits, Src, DstLSB, Width);
2737 }
2738
2739 return false;
2740 }
2741
isBitfieldPositioningOpFromAnd(SelectionDAG * CurDAG,SDValue Op,bool BiggerPattern,const uint64_t NonZeroBits,SDValue & Src,int & DstLSB,int & Width)2742 static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
2743 bool BiggerPattern,
2744 const uint64_t NonZeroBits,
2745 SDValue &Src, int &DstLSB,
2746 int &Width) {
2747 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
2748
2749 EVT VT = Op.getValueType();
2750 assert((VT == MVT::i32 || VT == MVT::i64) &&
2751 "Caller guarantees VT is one of i32 or i64");
2752 (void)VT;
2753
2754 uint64_t AndImm;
2755 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
2756 return false;
2757
2758 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
2759 // 1) (AndImm & (1 << POS) == 0)
2760 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
2761 //
2762 // 1) and 2) don't agree so something must be wrong (e.g., in
2763 // 'SelectionDAG::computeKnownBits')
2764 assert((~AndImm & NonZeroBits) == 0 &&
2765 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
2766
2767 SDValue AndOp0 = Op.getOperand(0);
2768
2769 uint64_t ShlImm;
2770 SDValue ShlOp0;
2771 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
2772 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
2773 ShlOp0 = AndOp0.getOperand(0);
2774 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
2775 isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL,
2776 ShlImm)) {
2777 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
2778
2779 // ShlVal == shl(val, N), which is a left shift on a smaller type.
2780 SDValue ShlVal = AndOp0.getOperand(0);
2781
2782 // Since this is after type legalization and ShlVal is extended to MVT::i64,
2783 // expect VT to be MVT::i32.
2784 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
2785
2786 // Widens 'val' to MVT::i64 as the source of bit field positioning.
2787 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
2788 } else
2789 return false;
2790
2791 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
2792 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
2793 // AndOp0+AND.
2794 if (!BiggerPattern && !AndOp0.hasOneUse())
2795 return false;
2796
2797 DstLSB = countTrailingZeros(NonZeroBits);
2798 Width = countTrailingOnes(NonZeroBits >> DstLSB);
2799
2800 // Bail out on large Width. This happens when no proper combining / constant
2801 // folding was performed.
2802 if (Width >= (int)VT.getSizeInBits()) {
2803 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
2804 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
2805 // "val".
2806 // If VT is i32, what Width >= 32 means:
2807 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
2808 // demands at least 'Width' bits (after dag-combiner). This together with
2809 // `any_extend` Op (undefined higher bits) indicates missed combination
2810 // when lowering the 'and' IR instruction to an machine IR instruction.
2811 LLVM_DEBUG(
2812 dbgs()
2813 << "Found large Width in bit-field-positioning -- this indicates no "
2814 "proper combining / constant folding was performed\n");
2815 return false;
2816 }
2817
2818 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
2819 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
2820 // amount. BiggerPattern is true when this pattern is being matched for BFI,
2821 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
2822 // which case it is not profitable to insert an extra shift.
2823 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
2824 return false;
2825
2826 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
2827 return true;
2828 }
2829
2830 // For node (shl (and val, mask), N)), returns true if the node is equivalent to
2831 // UBFIZ.
isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm,SDValue Op,SDValue & Src,int & DstLSB,int & Width)2832 static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,
2833 SDValue &Src, int &DstLSB,
2834 int &Width) {
2835 // Caller should have verified that N is a left shift with constant shift
2836 // amount; asserts that.
2837 assert(Op.getOpcode() == ISD::SHL &&
2838 "Op.getNode() should be a SHL node to call this function");
2839 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
2840 "Op.getNode() should shift ShlImm to call this function");
2841
2842 uint64_t AndImm = 0;
2843 SDValue Op0 = Op.getOperand(0);
2844 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
2845 return false;
2846
2847 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
2848 if (isMask_64(ShiftedAndImm)) {
2849 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
2850 // should end with Mask, and could be prefixed with random bits if those
2851 // bits are shifted out.
2852 //
2853 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
2854 // the AND result corresponding to those bits are shifted out, so it's fine
2855 // to not extract them.
2856 Width = countTrailingOnes(ShiftedAndImm);
2857 DstLSB = ShlImm;
2858 Src = Op0.getOperand(0);
2859 return true;
2860 }
2861 return false;
2862 }
2863
isBitfieldPositioningOpFromShl(SelectionDAG * CurDAG,SDValue Op,bool BiggerPattern,const uint64_t NonZeroBits,SDValue & Src,int & DstLSB,int & Width)2864 static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
2865 bool BiggerPattern,
2866 const uint64_t NonZeroBits,
2867 SDValue &Src, int &DstLSB,
2868 int &Width) {
2869 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
2870
2871 EVT VT = Op.getValueType();
2872 assert((VT == MVT::i32 || VT == MVT::i64) &&
2873 "Caller guarantees that type is i32 or i64");
2874 (void)VT;
2875
2876 uint64_t ShlImm;
2877 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
2878 return false;
2879
2880 if (!BiggerPattern && !Op.hasOneUse())
2881 return false;
2882
2883 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
2884 return true;
2885
2886 DstLSB = countTrailingZeros(NonZeroBits);
2887 Width = countTrailingOnes(NonZeroBits >> DstLSB);
2888
2889 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
2890 return false;
2891
2892 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
2893 return true;
2894 }
2895
isShiftedMask(uint64_t Mask,EVT VT)2896 static bool isShiftedMask(uint64_t Mask, EVT VT) {
2897 assert(VT == MVT::i32 || VT == MVT::i64);
2898 if (VT == MVT::i32)
2899 return isShiftedMask_32(Mask);
2900 return isShiftedMask_64(Mask);
2901 }
2902
2903 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
2904 // inserted only sets known zero bits.
tryBitfieldInsertOpFromOrAndImm(SDNode * N,SelectionDAG * CurDAG)2905 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
2906 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
2907
2908 EVT VT = N->getValueType(0);
2909 if (VT != MVT::i32 && VT != MVT::i64)
2910 return false;
2911
2912 unsigned BitWidth = VT.getSizeInBits();
2913
2914 uint64_t OrImm;
2915 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
2916 return false;
2917
2918 // Skip this transformation if the ORR immediate can be encoded in the ORR.
2919 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
2920 // performance neutral.
2921 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
2922 return false;
2923
2924 uint64_t MaskImm;
2925 SDValue And = N->getOperand(0);
2926 // Must be a single use AND with an immediate operand.
2927 if (!And.hasOneUse() ||
2928 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
2929 return false;
2930
2931 // Compute the Known Zero for the AND as this allows us to catch more general
2932 // cases than just looking for AND with imm.
2933 KnownBits Known = CurDAG->computeKnownBits(And);
2934
2935 // Non-zero in the sense that they're not provably zero, which is the key
2936 // point if we want to use this value.
2937 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
2938
2939 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
2940 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
2941 return false;
2942
2943 // The bits being inserted must only set those bits that are known to be zero.
2944 if ((OrImm & NotKnownZero) != 0) {
2945 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
2946 // currently handle this case.
2947 return false;
2948 }
2949
2950 // BFI/BFXIL dst, src, #lsb, #width.
2951 int LSB = countTrailingOnes(NotKnownZero);
2952 int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
2953
2954 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
2955 unsigned ImmR = (BitWidth - LSB) % BitWidth;
2956 unsigned ImmS = Width - 1;
2957
2958 // If we're creating a BFI instruction avoid cases where we need more
2959 // instructions to materialize the BFI constant as compared to the original
2960 // ORR. A BFXIL will use the same constant as the original ORR, so the code
2961 // should be no worse in this case.
2962 bool IsBFI = LSB != 0;
2963 uint64_t BFIImm = OrImm >> LSB;
2964 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
2965 // We have a BFI instruction and we know the constant can't be materialized
2966 // with a ORR-immediate with the zero register.
2967 unsigned OrChunks = 0, BFIChunks = 0;
2968 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
2969 if (((OrImm >> Shift) & 0xFFFF) != 0)
2970 ++OrChunks;
2971 if (((BFIImm >> Shift) & 0xFFFF) != 0)
2972 ++BFIChunks;
2973 }
2974 if (BFIChunks > OrChunks)
2975 return false;
2976 }
2977
2978 // Materialize the constant to be inserted.
2979 SDLoc DL(N);
2980 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
2981 SDNode *MOVI = CurDAG->getMachineNode(
2982 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
2983
2984 // Create the BFI/BFXIL instruction.
2985 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
2986 CurDAG->getTargetConstant(ImmR, DL, VT),
2987 CurDAG->getTargetConstant(ImmS, DL, VT)};
2988 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
2989 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2990 return true;
2991 }
2992
isWorthFoldingIntoOrrWithShift(SDValue Dst,SelectionDAG * CurDAG,SDValue & ShiftedOperand,uint64_t & EncodedShiftImm)2993 static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
2994 SDValue &ShiftedOperand,
2995 uint64_t &EncodedShiftImm) {
2996 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
2997 if (!Dst.hasOneUse())
2998 return false;
2999
3000 EVT VT = Dst.getValueType();
3001 assert((VT == MVT::i32 || VT == MVT::i64) &&
3002 "Caller should guarantee that VT is one of i32 or i64");
3003 const unsigned SizeInBits = VT.getSizeInBits();
3004
3005 SDLoc DL(Dst.getNode());
3006 uint64_t AndImm, ShlImm;
3007 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3008 isShiftedMask_64(AndImm)) {
3009 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3010 SDValue DstOp0 = Dst.getOperand(0);
3011 if (!DstOp0.hasOneUse())
3012 return false;
3013
3014 // An example to illustrate the transformation
3015 // From:
3016 // lsr x8, x1, #1
3017 // and x8, x8, #0x3f80
3018 // bfxil x8, x1, #0, #7
3019 // To:
3020 // and x8, x23, #0x7f
3021 // ubfx x9, x23, #8, #7
3022 // orr x23, x8, x9, lsl #7
3023 //
3024 // The number of instructions remains the same, but ORR is faster than BFXIL
3025 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3026 // the dependency chain is improved after the transformation.
3027 uint64_t SrlImm;
3028 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3029 uint64_t NumTrailingZeroInShiftedMask = countTrailingZeros(AndImm);
3030 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3031 unsigned MaskWidth =
3032 countTrailingOnes(AndImm >> NumTrailingZeroInShiftedMask);
3033 unsigned UBFMOpc =
3034 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3035 SDNode *UBFMNode = CurDAG->getMachineNode(
3036 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3037 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3038 VT),
3039 CurDAG->getTargetConstant(
3040 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3041 ShiftedOperand = SDValue(UBFMNode, 0);
3042 EncodedShiftImm = AArch64_AM::getShifterImm(
3043 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3044 return true;
3045 }
3046 }
3047 return false;
3048 }
3049
3050 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3051 ShiftedOperand = Dst.getOperand(0);
3052 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3053 return true;
3054 }
3055
3056 uint64_t SrlImm;
3057 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3058 ShiftedOperand = Dst.getOperand(0);
3059 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3060 return true;
3061 }
3062 return false;
3063 }
3064
3065 // Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3066 // the operands and select it to AArch64::ORR with shifted registers if
3067 // that's more efficient. Returns true iff selection to AArch64::ORR happens.
tryOrrWithShift(SDNode * N,SDValue OrOpd0,SDValue OrOpd1,SDValue Src,SDValue Dst,SelectionDAG * CurDAG,const bool BiggerPattern)3068 static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3069 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3070 const bool BiggerPattern) {
3071 EVT VT = N->getValueType(0);
3072 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3073 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3074 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3075 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3076 assert((VT == MVT::i32 || VT == MVT::i64) &&
3077 "Expect result type to be i32 or i64 since N is combinable to BFM");
3078 SDLoc DL(N);
3079
3080 // Bail out if BFM simplifies away one node in BFM Dst.
3081 if (OrOpd1 != Dst)
3082 return false;
3083
3084 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3085 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3086 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3087 if (BiggerPattern) {
3088 uint64_t SrcAndImm;
3089 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3090 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3091 // OrOpd0 = AND Src, #Mask
3092 // So BFM simplifies away one AND node from Src and doesn't simplify away
3093 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3094 // one node (from Rd), ORR is better since it has higher throughput and
3095 // smaller latency than BFM on many AArch64 processors (and for the rest
3096 // ORR is at least as good as BFM).
3097 SDValue ShiftedOperand;
3098 uint64_t EncodedShiftImm;
3099 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3100 EncodedShiftImm)) {
3101 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3102 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3103 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3104 return true;
3105 }
3106 }
3107 return false;
3108 }
3109
3110 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3111
3112 uint64_t ShlImm;
3113 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3114 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3115 SDValue Ops[] = {
3116 Dst, Src,
3117 CurDAG->getTargetConstant(
3118 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
3119 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3120 return true;
3121 }
3122
3123 // Select the following pattern to left-shifted operand rather than BFI.
3124 // %val1 = op ..
3125 // %val2 = shl %val1, #imm
3126 // %res = or %val1, %val2
3127 //
3128 // If N is selected to be BFI, we know that
3129 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3130 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3131 //
3132 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3133 if (OrOpd0.getOperand(0) == OrOpd1) {
3134 SDValue Ops[] = {
3135 OrOpd1, OrOpd1,
3136 CurDAG->getTargetConstant(
3137 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
3138 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3139 return true;
3140 }
3141 }
3142
3143 uint64_t SrlImm;
3144 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3145 // Select the following pattern to right-shifted operand rather than BFXIL.
3146 // %val1 = op ..
3147 // %val2 = lshr %val1, #imm
3148 // %res = or %val1, %val2
3149 //
3150 // If N is selected to be BFXIL, we know that
3151 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3152 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3153 //
3154 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3155 if (OrOpd0.getOperand(0) == OrOpd1) {
3156 SDValue Ops[] = {
3157 OrOpd1, OrOpd1,
3158 CurDAG->getTargetConstant(
3159 AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)};
3160 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3161 return true;
3162 }
3163 }
3164
3165 return false;
3166 }
3167
tryBitfieldInsertOpFromOr(SDNode * N,const APInt & UsefulBits,SelectionDAG * CurDAG)3168 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3169 SelectionDAG *CurDAG) {
3170 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3171
3172 EVT VT = N->getValueType(0);
3173 if (VT != MVT::i32 && VT != MVT::i64)
3174 return false;
3175
3176 unsigned BitWidth = VT.getSizeInBits();
3177
3178 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3179 // have the expected shape. Try to undo that.
3180
3181 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
3182 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
3183
3184 // Given a OR operation, check if we have the following pattern
3185 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3186 // isBitfieldExtractOp)
3187 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3188 // countTrailingZeros(mask2) == imm2 - imm + 1
3189 // f = d | c
3190 // if yes, replace the OR instruction with:
3191 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3192
3193 // OR is commutative, check all combinations of operand order and values of
3194 // BiggerPattern, i.e.
3195 // Opd0, Opd1, BiggerPattern=false
3196 // Opd1, Opd0, BiggerPattern=false
3197 // Opd0, Opd1, BiggerPattern=true
3198 // Opd1, Opd0, BiggerPattern=true
3199 // Several of these combinations may match, so check with BiggerPattern=false
3200 // first since that will produce better results by matching more instructions
3201 // and/or inserting fewer extra instructions.
3202 for (int I = 0; I < 4; ++I) {
3203
3204 SDValue Dst, Src;
3205 unsigned ImmR, ImmS;
3206 bool BiggerPattern = I / 2;
3207 SDValue OrOpd0Val = N->getOperand(I % 2);
3208 SDNode *OrOpd0 = OrOpd0Val.getNode();
3209 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3210 SDNode *OrOpd1 = OrOpd1Val.getNode();
3211
3212 unsigned BFXOpc;
3213 int DstLSB, Width;
3214 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3215 NumberOfIgnoredLowBits, BiggerPattern)) {
3216 // Check that the returned opcode is compatible with the pattern,
3217 // i.e., same type and zero extended (U and not S)
3218 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3219 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3220 continue;
3221
3222 // Compute the width of the bitfield insertion
3223 DstLSB = 0;
3224 Width = ImmS - ImmR + 1;
3225 // FIXME: This constraint is to catch bitfield insertion we may
3226 // want to widen the pattern if we want to grab general bitfied
3227 // move case
3228 if (Width <= 0)
3229 continue;
3230
3231 // If the mask on the insertee is correct, we have a BFXIL operation. We
3232 // can share the ImmR and ImmS values from the already-computed UBFM.
3233 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3234 BiggerPattern,
3235 Src, DstLSB, Width)) {
3236 ImmR = (BitWidth - DstLSB) % BitWidth;
3237 ImmS = Width - 1;
3238 } else
3239 continue;
3240
3241 // Check the second part of the pattern
3242 EVT VT = OrOpd1Val.getValueType();
3243 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3244
3245 // Compute the Known Zero for the candidate of the first operand.
3246 // This allows to catch more general case than just looking for
3247 // AND with imm. Indeed, simplify-demanded-bits may have removed
3248 // the AND instruction because it proves it was useless.
3249 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3250
3251 // Check if there is enough room for the second operand to appear
3252 // in the first one
3253 APInt BitsToBeInserted =
3254 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3255
3256 if ((BitsToBeInserted & ~Known.Zero) != 0)
3257 continue;
3258
3259 // Set the first operand
3260 uint64_t Imm;
3261 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3262 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3263 // In that case, we can eliminate the AND
3264 Dst = OrOpd1->getOperand(0);
3265 else
3266 // Maybe the AND has been removed by simplify-demanded-bits
3267 // or is useful because it discards more bits
3268 Dst = OrOpd1Val;
3269
3270 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3271 // with shifted operand is more efficient.
3272 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3273 BiggerPattern))
3274 return true;
3275
3276 // both parts match
3277 SDLoc DL(N);
3278 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3279 CurDAG->getTargetConstant(ImmS, DL, VT)};
3280 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3281 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3282 return true;
3283 }
3284
3285 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3286 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3287 // mask (e.g., 0x000ffff0).
3288 uint64_t Mask0Imm, Mask1Imm;
3289 SDValue And0 = N->getOperand(0);
3290 SDValue And1 = N->getOperand(1);
3291 if (And0.hasOneUse() && And1.hasOneUse() &&
3292 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3293 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3294 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3295 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3296
3297 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3298 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3299 // bits to be inserted.
3300 if (isShiftedMask(Mask0Imm, VT)) {
3301 std::swap(And0, And1);
3302 std::swap(Mask0Imm, Mask1Imm);
3303 }
3304
3305 SDValue Src = And1->getOperand(0);
3306 SDValue Dst = And0->getOperand(0);
3307 unsigned LSB = countTrailingZeros(Mask1Imm);
3308 int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
3309
3310 // The BFXIL inserts the low-order bits from a source register, so right
3311 // shift the needed bits into place.
3312 SDLoc DL(N);
3313 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3314 uint64_t LsrImm = LSB;
3315 if (Src->hasOneUse() &&
3316 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3317 (LsrImm + LSB) < BitWidth) {
3318 Src = Src->getOperand(0);
3319 LsrImm += LSB;
3320 }
3321
3322 SDNode *LSR = CurDAG->getMachineNode(
3323 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3324 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3325
3326 // BFXIL is an alias of BFM, so translate to BFM operands.
3327 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3328 unsigned ImmS = Width - 1;
3329
3330 // Create the BFXIL instruction.
3331 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3332 CurDAG->getTargetConstant(ImmR, DL, VT),
3333 CurDAG->getTargetConstant(ImmS, DL, VT)};
3334 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3335 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3336 return true;
3337 }
3338
3339 return false;
3340 }
3341
tryBitfieldInsertOp(SDNode * N)3342 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3343 if (N->getOpcode() != ISD::OR)
3344 return false;
3345
3346 APInt NUsefulBits;
3347 getUsefulBits(SDValue(N, 0), NUsefulBits);
3348
3349 // If all bits are not useful, just return UNDEF.
3350 if (!NUsefulBits) {
3351 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3352 return true;
3353 }
3354
3355 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3356 return true;
3357
3358 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3359 }
3360
3361 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3362 /// equivalent of a left shift by a constant amount followed by an and masking
3363 /// out a contiguous set of bits.
tryBitfieldInsertInZeroOp(SDNode * N)3364 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3365 if (N->getOpcode() != ISD::AND)
3366 return false;
3367
3368 EVT VT = N->getValueType(0);
3369 if (VT != MVT::i32 && VT != MVT::i64)
3370 return false;
3371
3372 SDValue Op0;
3373 int DstLSB, Width;
3374 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3375 Op0, DstLSB, Width))
3376 return false;
3377
3378 // ImmR is the rotate right amount.
3379 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3380 // ImmS is the most significant bit of the source to be moved.
3381 unsigned ImmS = Width - 1;
3382
3383 SDLoc DL(N);
3384 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3385 CurDAG->getTargetConstant(ImmS, DL, VT)};
3386 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3387 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3388 return true;
3389 }
3390
3391 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3392 /// variable shift/rotate instructions.
tryShiftAmountMod(SDNode * N)3393 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3394 EVT VT = N->getValueType(0);
3395
3396 unsigned Opc;
3397 switch (N->getOpcode()) {
3398 case ISD::ROTR:
3399 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3400 break;
3401 case ISD::SHL:
3402 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3403 break;
3404 case ISD::SRL:
3405 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3406 break;
3407 case ISD::SRA:
3408 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3409 break;
3410 default:
3411 return false;
3412 }
3413
3414 uint64_t Size;
3415 uint64_t Bits;
3416 if (VT == MVT::i32) {
3417 Bits = 5;
3418 Size = 32;
3419 } else if (VT == MVT::i64) {
3420 Bits = 6;
3421 Size = 64;
3422 } else
3423 return false;
3424
3425 SDValue ShiftAmt = N->getOperand(1);
3426 SDLoc DL(N);
3427 SDValue NewShiftAmt;
3428
3429 // Skip over an extend of the shift amount.
3430 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3431 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3432 ShiftAmt = ShiftAmt->getOperand(0);
3433
3434 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3435 SDValue Add0 = ShiftAmt->getOperand(0);
3436 SDValue Add1 = ShiftAmt->getOperand(1);
3437 uint64_t Add0Imm;
3438 uint64_t Add1Imm;
3439 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3440 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3441 // to avoid the ADD/SUB.
3442 NewShiftAmt = Add0;
3443 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3444 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3445 (Add0Imm % Size == 0)) {
3446 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3447 // to generate a NEG instead of a SUB from a constant.
3448 unsigned NegOpc;
3449 unsigned ZeroReg;
3450 EVT SubVT = ShiftAmt->getValueType(0);
3451 if (SubVT == MVT::i32) {
3452 NegOpc = AArch64::SUBWrr;
3453 ZeroReg = AArch64::WZR;
3454 } else {
3455 assert(SubVT == MVT::i64);
3456 NegOpc = AArch64::SUBXrr;
3457 ZeroReg = AArch64::XZR;
3458 }
3459 SDValue Zero =
3460 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3461 MachineSDNode *Neg =
3462 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3463 NewShiftAmt = SDValue(Neg, 0);
3464 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3465 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3466 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3467 // to generate a NOT instead of a SUB from a constant.
3468 unsigned NotOpc;
3469 unsigned ZeroReg;
3470 EVT SubVT = ShiftAmt->getValueType(0);
3471 if (SubVT == MVT::i32) {
3472 NotOpc = AArch64::ORNWrr;
3473 ZeroReg = AArch64::WZR;
3474 } else {
3475 assert(SubVT == MVT::i64);
3476 NotOpc = AArch64::ORNXrr;
3477 ZeroReg = AArch64::XZR;
3478 }
3479 SDValue Zero =
3480 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3481 MachineSDNode *Not =
3482 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3483 NewShiftAmt = SDValue(Not, 0);
3484 } else
3485 return false;
3486 } else {
3487 // If the shift amount is masked with an AND, check that the mask covers the
3488 // bits that are implicitly ANDed off by the above opcodes and if so, skip
3489 // the AND.
3490 uint64_t MaskImm;
3491 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3492 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3493 return false;
3494
3495 if (countTrailingOnes(MaskImm) < Bits)
3496 return false;
3497
3498 NewShiftAmt = ShiftAmt->getOperand(0);
3499 }
3500
3501 // Narrow/widen the shift amount to match the size of the shift operation.
3502 if (VT == MVT::i32)
3503 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3504 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3505 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3506 MachineSDNode *Ext = CurDAG->getMachineNode(
3507 AArch64::SUBREG_TO_REG, DL, VT,
3508 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3509 NewShiftAmt = SDValue(Ext, 0);
3510 }
3511
3512 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3513 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3514 return true;
3515 }
3516
3517 bool
SelectCVTFixedPosOperand(SDValue N,SDValue & FixedPos,unsigned RegWidth)3518 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3519 unsigned RegWidth) {
3520 APFloat FVal(0.0);
3521 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3522 FVal = CN->getValueAPF();
3523 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3524 // Some otherwise illegal constants are allowed in this case.
3525 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3526 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3527 return false;
3528
3529 ConstantPoolSDNode *CN =
3530 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3531 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3532 } else
3533 return false;
3534
3535 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3536 // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3537 // x-register.
3538 //
3539 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3540 // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3541 // integers.
3542 bool IsExact;
3543
3544 // fbits is between 1 and 64 in the worst-case, which means the fmul
3545 // could have 2^64 as an actual operand. Need 65 bits of precision.
3546 APSInt IntVal(65, true);
3547 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3548
3549 // N.b. isPowerOf2 also checks for > 0.
3550 if (!IsExact || !IntVal.isPowerOf2()) return false;
3551 unsigned FBits = IntVal.logBase2();
3552
3553 // Checks above should have guaranteed that we haven't lost information in
3554 // finding FBits, but it must still be in range.
3555 if (FBits == 0 || FBits > RegWidth) return false;
3556
3557 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3558 return true;
3559 }
3560
3561 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3562 // of the string and obtains the integer values from them and combines these
3563 // into a single value to be used in the MRS/MSR instruction.
getIntOperandFromRegisterString(StringRef RegString)3564 static int getIntOperandFromRegisterString(StringRef RegString) {
3565 SmallVector<StringRef, 5> Fields;
3566 RegString.split(Fields, ':');
3567
3568 if (Fields.size() == 1)
3569 return -1;
3570
3571 assert(Fields.size() == 5
3572 && "Invalid number of fields in read register string");
3573
3574 SmallVector<int, 5> Ops;
3575 bool AllIntFields = true;
3576
3577 for (StringRef Field : Fields) {
3578 unsigned IntField;
3579 AllIntFields &= !Field.getAsInteger(10, IntField);
3580 Ops.push_back(IntField);
3581 }
3582
3583 assert(AllIntFields &&
3584 "Unexpected non-integer value in special register string.");
3585 (void)AllIntFields;
3586
3587 // Need to combine the integer fields of the string into a single value
3588 // based on the bit encoding of MRS/MSR instruction.
3589 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3590 (Ops[3] << 3) | (Ops[4]);
3591 }
3592
3593 // Lower the read_register intrinsic to an MRS instruction node if the special
3594 // register string argument is either of the form detailed in the ALCE (the
3595 // form described in getIntOperandsFromRegsterString) or is a named register
3596 // known by the MRS SysReg mapper.
tryReadRegister(SDNode * N)3597 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3598 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3599 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3600 SDLoc DL(N);
3601
3602 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3603
3604 unsigned Opcode64Bit = AArch64::MRS;
3605 int Imm = getIntOperandFromRegisterString(RegString->getString());
3606 if (Imm == -1) {
3607 // No match, Use the sysreg mapper to map the remaining possible strings to
3608 // the value for the register to be used for the instruction operand.
3609 const auto *TheReg =
3610 AArch64SysReg::lookupSysRegByName(RegString->getString());
3611 if (TheReg && TheReg->Readable &&
3612 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3613 Imm = TheReg->Encoding;
3614 else
3615 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3616
3617 if (Imm == -1) {
3618 // Still no match, see if this is "pc" or give up.
3619 if (!ReadIs128Bit && RegString->getString() == "pc") {
3620 Opcode64Bit = AArch64::ADR;
3621 Imm = 0;
3622 } else {
3623 return false;
3624 }
3625 }
3626 }
3627
3628 SDValue InChain = N->getOperand(0);
3629 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3630 if (!ReadIs128Bit) {
3631 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3632 {SysRegImm, InChain});
3633 } else {
3634 SDNode *MRRS = CurDAG->getMachineNode(
3635 AArch64::MRRS, DL,
3636 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3637 {SysRegImm, InChain});
3638
3639 // Sysregs are not endian. The even register always contains the low half
3640 // of the register.
3641 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3642 SDValue(MRRS, 0));
3643 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3644 SDValue(MRRS, 0));
3645 SDValue OutChain = SDValue(MRRS, 1);
3646
3647 ReplaceUses(SDValue(N, 0), Lo);
3648 ReplaceUses(SDValue(N, 1), Hi);
3649 ReplaceUses(SDValue(N, 2), OutChain);
3650 };
3651 return true;
3652 }
3653
3654 // Lower the write_register intrinsic to an MSR instruction node if the special
3655 // register string argument is either of the form detailed in the ALCE (the
3656 // form described in getIntOperandsFromRegsterString) or is a named register
3657 // known by the MSR SysReg mapper.
tryWriteRegister(SDNode * N)3658 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
3659 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3660 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3661 SDLoc DL(N);
3662
3663 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
3664
3665 if (!WriteIs128Bit) {
3666 // Check if the register was one of those allowed as the pstatefield value
3667 // in the MSR (immediate) instruction. To accept the values allowed in the
3668 // pstatefield for the MSR (immediate) instruction, we also require that an
3669 // immediate value has been provided as an argument, we know that this is
3670 // the case as it has been ensured by semantic checking.
3671 auto trySelectPState = [&](auto PMapper, unsigned State) {
3672 if (PMapper) {
3673 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
3674 "Expected a constant integer expression.");
3675 unsigned Reg = PMapper->Encoding;
3676 uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
3677 CurDAG->SelectNodeTo(
3678 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
3679 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
3680 return true;
3681 }
3682 return false;
3683 };
3684
3685 if (trySelectPState(
3686 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
3687 AArch64::MSRpstateImm4))
3688 return true;
3689 if (trySelectPState(
3690 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
3691 AArch64::MSRpstateImm1))
3692 return true;
3693 }
3694
3695 int Imm = getIntOperandFromRegisterString(RegString->getString());
3696 if (Imm == -1) {
3697 // Use the sysreg mapper to attempt to map the remaining possible strings
3698 // to the value for the register to be used for the MSR (register)
3699 // instruction operand.
3700 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
3701 if (TheReg && TheReg->Writeable &&
3702 TheReg->haveFeatures(Subtarget->getFeatureBits()))
3703 Imm = TheReg->Encoding;
3704 else
3705 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3706
3707 if (Imm == -1)
3708 return false;
3709 }
3710
3711 SDValue InChain = N->getOperand(0);
3712 if (!WriteIs128Bit) {
3713 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
3714 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3715 N->getOperand(2), InChain);
3716 } else {
3717 // No endian swap. The lower half always goes into the even subreg, and the
3718 // higher half always into the odd supreg.
3719 SDNode *Pair = CurDAG->getMachineNode(
3720 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
3721 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
3722 MVT::i32),
3723 N->getOperand(2),
3724 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
3725 N->getOperand(3),
3726 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
3727
3728 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
3729 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
3730 SDValue(Pair, 0), InChain);
3731 }
3732
3733 return true;
3734 }
3735
3736 /// We've got special pseudo-instructions for these
SelectCMP_SWAP(SDNode * N)3737 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3738 unsigned Opcode;
3739 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3740
3741 // Leave IR for LSE if subtarget supports it.
3742 if (Subtarget->hasLSE()) return false;
3743
3744 if (MemTy == MVT::i8)
3745 Opcode = AArch64::CMP_SWAP_8;
3746 else if (MemTy == MVT::i16)
3747 Opcode = AArch64::CMP_SWAP_16;
3748 else if (MemTy == MVT::i32)
3749 Opcode = AArch64::CMP_SWAP_32;
3750 else if (MemTy == MVT::i64)
3751 Opcode = AArch64::CMP_SWAP_64;
3752 else
3753 llvm_unreachable("Unknown AtomicCmpSwap type");
3754
3755 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
3756 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3757 N->getOperand(0)};
3758 SDNode *CmpSwap = CurDAG->getMachineNode(
3759 Opcode, SDLoc(N),
3760 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
3761
3762 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3763 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3764
3765 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3766 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3767 CurDAG->RemoveDeadNode(N);
3768
3769 return true;
3770 }
3771
SelectSVEAddSubImm(SDValue N,MVT VT,SDValue & Imm,SDValue & Shift)3772 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
3773 SDValue &Shift) {
3774 if (!isa<ConstantSDNode>(N))
3775 return false;
3776
3777 SDLoc DL(N);
3778 uint64_t Val = cast<ConstantSDNode>(N)
3779 ->getAPIntValue()
3780 .trunc(VT.getFixedSizeInBits())
3781 .getZExtValue();
3782
3783 switch (VT.SimpleTy) {
3784 case MVT::i8:
3785 // All immediates are supported.
3786 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3787 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
3788 return true;
3789 case MVT::i16:
3790 case MVT::i32:
3791 case MVT::i64:
3792 // Support 8bit unsigned immediates.
3793 if (Val <= 255) {
3794 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3795 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
3796 return true;
3797 }
3798 // Support 16bit unsigned immediates that are a multiple of 256.
3799 if (Val <= 65280 && Val % 256 == 0) {
3800 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
3801 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
3802 return true;
3803 }
3804 break;
3805 default:
3806 break;
3807 }
3808
3809 return false;
3810 }
3811
SelectSVECpyDupImm(SDValue N,MVT VT,SDValue & Imm,SDValue & Shift)3812 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
3813 SDValue &Shift) {
3814 if (!isa<ConstantSDNode>(N))
3815 return false;
3816
3817 SDLoc DL(N);
3818 int64_t Val = cast<ConstantSDNode>(N)
3819 ->getAPIntValue()
3820 .trunc(VT.getFixedSizeInBits())
3821 .getSExtValue();
3822
3823 switch (VT.SimpleTy) {
3824 case MVT::i8:
3825 // All immediates are supported.
3826 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3827 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
3828 return true;
3829 case MVT::i16:
3830 case MVT::i32:
3831 case MVT::i64:
3832 // Support 8bit signed immediates.
3833 if (Val >= -128 && Val <= 127) {
3834 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
3835 Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
3836 return true;
3837 }
3838 // Support 16bit signed immediates that are a multiple of 256.
3839 if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
3840 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
3841 Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
3842 return true;
3843 }
3844 break;
3845 default:
3846 break;
3847 }
3848
3849 return false;
3850 }
3851
SelectSVESignedArithImm(SDValue N,SDValue & Imm)3852 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
3853 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3854 int64_t ImmVal = CNode->getSExtValue();
3855 SDLoc DL(N);
3856 if (ImmVal >= -128 && ImmVal < 128) {
3857 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
3858 return true;
3859 }
3860 }
3861 return false;
3862 }
3863
SelectSVEArithImm(SDValue N,MVT VT,SDValue & Imm)3864 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
3865 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3866 uint64_t ImmVal = CNode->getZExtValue();
3867
3868 switch (VT.SimpleTy) {
3869 case MVT::i8:
3870 ImmVal &= 0xFF;
3871 break;
3872 case MVT::i16:
3873 ImmVal &= 0xFFFF;
3874 break;
3875 case MVT::i32:
3876 ImmVal &= 0xFFFFFFFF;
3877 break;
3878 case MVT::i64:
3879 break;
3880 default:
3881 llvm_unreachable("Unexpected type");
3882 }
3883
3884 if (ImmVal < 256) {
3885 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
3886 return true;
3887 }
3888 }
3889 return false;
3890 }
3891
SelectSVELogicalImm(SDValue N,MVT VT,SDValue & Imm,bool Invert)3892 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
3893 bool Invert) {
3894 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
3895 uint64_t ImmVal = CNode->getZExtValue();
3896 SDLoc DL(N);
3897
3898 if (Invert)
3899 ImmVal = ~ImmVal;
3900
3901 // Shift mask depending on type size.
3902 switch (VT.SimpleTy) {
3903 case MVT::i8:
3904 ImmVal &= 0xFF;
3905 ImmVal |= ImmVal << 8;
3906 ImmVal |= ImmVal << 16;
3907 ImmVal |= ImmVal << 32;
3908 break;
3909 case MVT::i16:
3910 ImmVal &= 0xFFFF;
3911 ImmVal |= ImmVal << 16;
3912 ImmVal |= ImmVal << 32;
3913 break;
3914 case MVT::i32:
3915 ImmVal &= 0xFFFFFFFF;
3916 ImmVal |= ImmVal << 32;
3917 break;
3918 case MVT::i64:
3919 break;
3920 default:
3921 llvm_unreachable("Unexpected type");
3922 }
3923
3924 uint64_t encoding;
3925 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
3926 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
3927 return true;
3928 }
3929 }
3930 return false;
3931 }
3932
3933 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
3934 // Rather than attempt to normalise everything we can sometimes saturate the
3935 // shift amount during selection. This function also allows for consistent
3936 // isel patterns by ensuring the resulting "Imm" node is of the i32 type
3937 // required by the instructions.
SelectSVEShiftImm(SDValue N,uint64_t Low,uint64_t High,bool AllowSaturation,SDValue & Imm)3938 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
3939 uint64_t High, bool AllowSaturation,
3940 SDValue &Imm) {
3941 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
3942 uint64_t ImmVal = CN->getZExtValue();
3943
3944 // Reject shift amounts that are too small.
3945 if (ImmVal < Low)
3946 return false;
3947
3948 // Reject or saturate shift amounts that are too big.
3949 if (ImmVal > High) {
3950 if (!AllowSaturation)
3951 return false;
3952 ImmVal = High;
3953 }
3954
3955 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
3956 return true;
3957 }
3958
3959 return false;
3960 }
3961
trySelectStackSlotTagP(SDNode * N)3962 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
3963 // tagp(FrameIndex, IRGstack, tag_offset):
3964 // since the offset between FrameIndex and IRGstack is a compile-time
3965 // constant, this can be lowered to a single ADDG instruction.
3966 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
3967 return false;
3968 }
3969
3970 SDValue IRG_SP = N->getOperand(2);
3971 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
3972 cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
3973 Intrinsic::aarch64_irg_sp) {
3974 return false;
3975 }
3976
3977 const TargetLowering *TLI = getTargetLowering();
3978 SDLoc DL(N);
3979 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
3980 SDValue FiOp = CurDAG->getTargetFrameIndex(
3981 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3982 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
3983
3984 SDNode *Out = CurDAG->getMachineNode(
3985 AArch64::TAGPstack, DL, MVT::i64,
3986 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
3987 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
3988 ReplaceNode(N, Out);
3989 return true;
3990 }
3991
SelectTagP(SDNode * N)3992 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
3993 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
3994 "llvm.aarch64.tagp third argument must be an immediate");
3995 if (trySelectStackSlotTagP(N))
3996 return;
3997 // FIXME: above applies in any case when offset between Op1 and Op2 is a
3998 // compile-time constant, not just for stack allocations.
3999
4000 // General case for unrelated pointers in Op1 and Op2.
4001 SDLoc DL(N);
4002 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
4003 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4004 {N->getOperand(1), N->getOperand(2)});
4005 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4006 {SDValue(N1, 0), N->getOperand(2)});
4007 SDNode *N3 = CurDAG->getMachineNode(
4008 AArch64::ADDG, DL, MVT::i64,
4009 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4010 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4011 ReplaceNode(N, N3);
4012 }
4013
4014 // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length
4015 // vector types larger than NEON don't have a matching SubRegIndex.
extractSubReg(SelectionDAG * DAG,EVT VT,SDValue V)4016 static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
4017 assert(V.getValueType().isScalableVector() &&
4018 V.getValueType().getSizeInBits().getKnownMinValue() ==
4019 AArch64::SVEBitsPerBlock &&
4020 "Expected to extract from a packed scalable vector!");
4021 assert(VT.isFixedLengthVector() &&
4022 "Expected to extract a fixed length vector!");
4023
4024 SDLoc DL(V);
4025 switch (VT.getSizeInBits()) {
4026 case 64: {
4027 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4028 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
4029 }
4030 case 128: {
4031 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4032 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
4033 }
4034 default: {
4035 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4036 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
4037 }
4038 }
4039 }
4040
4041 // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length
4042 // vector types larger than NEON don't have a matching SubRegIndex.
insertSubReg(SelectionDAG * DAG,EVT VT,SDValue V)4043 static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
4044 assert(VT.isScalableVector() &&
4045 VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4046 "Expected to insert into a packed scalable vector!");
4047 assert(V.getValueType().isFixedLengthVector() &&
4048 "Expected to insert a fixed length vector!");
4049
4050 SDLoc DL(V);
4051 switch (V.getValueType().getSizeInBits()) {
4052 case 64: {
4053 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4054 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
4055 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
4056 SDValue(Container, 0), V, SubReg);
4057 }
4058 case 128: {
4059 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4060 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
4061 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
4062 SDValue(Container, 0), V, SubReg);
4063 }
4064 default: {
4065 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4066 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
4067 }
4068 }
4069 }
4070
Select(SDNode * Node)4071 void AArch64DAGToDAGISel::Select(SDNode *Node) {
4072 // If we have a custom node, we already have selected!
4073 if (Node->isMachineOpcode()) {
4074 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4075 Node->setNodeId(-1);
4076 return;
4077 }
4078
4079 // Few custom selection stuff.
4080 EVT VT = Node->getValueType(0);
4081
4082 switch (Node->getOpcode()) {
4083 default:
4084 break;
4085
4086 case ISD::ATOMIC_CMP_SWAP:
4087 if (SelectCMP_SWAP(Node))
4088 return;
4089 break;
4090
4091 case ISD::READ_REGISTER:
4092 case AArch64ISD::MRRS:
4093 if (tryReadRegister(Node))
4094 return;
4095 break;
4096
4097 case ISD::WRITE_REGISTER:
4098 case AArch64ISD::MSRR:
4099 if (tryWriteRegister(Node))
4100 return;
4101 break;
4102
4103 case ISD::ADD:
4104 if (tryMLAV64LaneV128(Node))
4105 return;
4106 break;
4107
4108 case ISD::LOAD: {
4109 // Try to select as an indexed load. Fall through to normal processing
4110 // if we can't.
4111 if (tryIndexedLoad(Node))
4112 return;
4113 break;
4114 }
4115
4116 case ISD::SRL:
4117 case ISD::AND:
4118 case ISD::SRA:
4119 case ISD::SIGN_EXTEND_INREG:
4120 if (tryBitfieldExtractOp(Node))
4121 return;
4122 if (tryBitfieldInsertInZeroOp(Node))
4123 return;
4124 [[fallthrough]];
4125 case ISD::ROTR:
4126 case ISD::SHL:
4127 if (tryShiftAmountMod(Node))
4128 return;
4129 break;
4130
4131 case ISD::SIGN_EXTEND:
4132 if (tryBitfieldExtractOpFromSExt(Node))
4133 return;
4134 break;
4135
4136 case ISD::FP_EXTEND:
4137 if (tryHighFPExt(Node))
4138 return;
4139 break;
4140
4141 case ISD::OR:
4142 if (tryBitfieldInsertOp(Node))
4143 return;
4144 break;
4145
4146 case ISD::EXTRACT_SUBVECTOR: {
4147 // Bail when not a "cast" like extract_subvector.
4148 if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
4149 break;
4150
4151 // Bail when normal isel can do the job.
4152 EVT InVT = Node->getOperand(0).getValueType();
4153 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4154 break;
4155
4156 // NOTE: We can only get here when doing fixed length SVE code generation.
4157 // We do manual selection because the types involved are not linked to real
4158 // registers (despite being legal) and must be coerced into SVE registers.
4159 //
4160 // NOTE: If the above changes, be aware that selection will still not work
4161 // because the td definition of extract_vector does not support extracting
4162 // a fixed length vector from a scalable vector.
4163
4164 ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0)));
4165 return;
4166 }
4167
4168 case ISD::INSERT_SUBVECTOR: {
4169 // Bail when not a "cast" like insert_subvector.
4170 if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
4171 break;
4172 if (!Node->getOperand(0).isUndef())
4173 break;
4174
4175 // Bail when normal isel should do the job.
4176 EVT InVT = Node->getOperand(1).getValueType();
4177 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4178 break;
4179
4180 // NOTE: We can only get here when doing fixed length SVE code generation.
4181 // We do manual selection because the types involved are not linked to real
4182 // registers (despite being legal) and must be coerced into SVE registers.
4183 //
4184 // NOTE: If the above changes, be aware that selection will still not work
4185 // because the td definition of insert_vector does not support inserting a
4186 // fixed length vector into a scalable vector.
4187
4188 ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1)));
4189 return;
4190 }
4191
4192 case ISD::Constant: {
4193 // Materialize zero constants as copies from WZR/XZR. This allows
4194 // the coalescer to propagate these into other instructions.
4195 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4196 if (ConstNode->isZero()) {
4197 if (VT == MVT::i32) {
4198 SDValue New = CurDAG->getCopyFromReg(
4199 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4200 ReplaceNode(Node, New.getNode());
4201 return;
4202 } else if (VT == MVT::i64) {
4203 SDValue New = CurDAG->getCopyFromReg(
4204 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4205 ReplaceNode(Node, New.getNode());
4206 return;
4207 }
4208 }
4209 break;
4210 }
4211
4212 case ISD::FrameIndex: {
4213 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4214 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4215 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4216 const TargetLowering *TLI = getTargetLowering();
4217 SDValue TFI = CurDAG->getTargetFrameIndex(
4218 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4219 SDLoc DL(Node);
4220 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4221 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4222 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4223 return;
4224 }
4225 case ISD::INTRINSIC_W_CHAIN: {
4226 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
4227 switch (IntNo) {
4228 default:
4229 break;
4230 case Intrinsic::aarch64_ldaxp:
4231 case Intrinsic::aarch64_ldxp: {
4232 unsigned Op =
4233 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4234 SDValue MemAddr = Node->getOperand(2);
4235 SDLoc DL(Node);
4236 SDValue Chain = Node->getOperand(0);
4237
4238 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4239 MVT::Other, MemAddr, Chain);
4240
4241 // Transfer memoperands.
4242 MachineMemOperand *MemOp =
4243 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4244 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4245 ReplaceNode(Node, Ld);
4246 return;
4247 }
4248 case Intrinsic::aarch64_stlxp:
4249 case Intrinsic::aarch64_stxp: {
4250 unsigned Op =
4251 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4252 SDLoc DL(Node);
4253 SDValue Chain = Node->getOperand(0);
4254 SDValue ValLo = Node->getOperand(2);
4255 SDValue ValHi = Node->getOperand(3);
4256 SDValue MemAddr = Node->getOperand(4);
4257
4258 // Place arguments in the right order.
4259 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4260
4261 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4262 // Transfer memoperands.
4263 MachineMemOperand *MemOp =
4264 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4265 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4266
4267 ReplaceNode(Node, St);
4268 return;
4269 }
4270 case Intrinsic::aarch64_neon_ld1x2:
4271 if (VT == MVT::v8i8) {
4272 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4273 return;
4274 } else if (VT == MVT::v16i8) {
4275 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4276 return;
4277 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4278 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4279 return;
4280 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4281 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4282 return;
4283 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4284 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4285 return;
4286 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4287 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4288 return;
4289 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4290 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4291 return;
4292 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4293 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4294 return;
4295 }
4296 break;
4297 case Intrinsic::aarch64_neon_ld1x3:
4298 if (VT == MVT::v8i8) {
4299 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4300 return;
4301 } else if (VT == MVT::v16i8) {
4302 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4303 return;
4304 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4305 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4306 return;
4307 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4308 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4309 return;
4310 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4311 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4312 return;
4313 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4314 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4315 return;
4316 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4317 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4318 return;
4319 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4320 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4321 return;
4322 }
4323 break;
4324 case Intrinsic::aarch64_neon_ld1x4:
4325 if (VT == MVT::v8i8) {
4326 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4327 return;
4328 } else if (VT == MVT::v16i8) {
4329 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4330 return;
4331 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4332 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4333 return;
4334 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4335 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4336 return;
4337 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4338 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4339 return;
4340 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4341 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4342 return;
4343 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4344 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4345 return;
4346 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4347 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4348 return;
4349 }
4350 break;
4351 case Intrinsic::aarch64_neon_ld2:
4352 if (VT == MVT::v8i8) {
4353 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4354 return;
4355 } else if (VT == MVT::v16i8) {
4356 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4357 return;
4358 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4359 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4360 return;
4361 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4362 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4363 return;
4364 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4365 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4366 return;
4367 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4368 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4369 return;
4370 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4371 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4372 return;
4373 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4374 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4375 return;
4376 }
4377 break;
4378 case Intrinsic::aarch64_neon_ld3:
4379 if (VT == MVT::v8i8) {
4380 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4381 return;
4382 } else if (VT == MVT::v16i8) {
4383 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4384 return;
4385 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4386 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4387 return;
4388 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4389 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4390 return;
4391 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4392 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4393 return;
4394 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4395 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4396 return;
4397 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4398 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4399 return;
4400 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4401 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4402 return;
4403 }
4404 break;
4405 case Intrinsic::aarch64_neon_ld4:
4406 if (VT == MVT::v8i8) {
4407 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4408 return;
4409 } else if (VT == MVT::v16i8) {
4410 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4411 return;
4412 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4413 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4414 return;
4415 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4416 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4417 return;
4418 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4419 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4420 return;
4421 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4422 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4423 return;
4424 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4425 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4426 return;
4427 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4428 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4429 return;
4430 }
4431 break;
4432 case Intrinsic::aarch64_neon_ld2r:
4433 if (VT == MVT::v8i8) {
4434 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4435 return;
4436 } else if (VT == MVT::v16i8) {
4437 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4438 return;
4439 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4440 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4441 return;
4442 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4443 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4444 return;
4445 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4446 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4447 return;
4448 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4449 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4450 return;
4451 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4452 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4453 return;
4454 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4455 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4456 return;
4457 }
4458 break;
4459 case Intrinsic::aarch64_neon_ld3r:
4460 if (VT == MVT::v8i8) {
4461 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4462 return;
4463 } else if (VT == MVT::v16i8) {
4464 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4465 return;
4466 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4467 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4468 return;
4469 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4470 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4471 return;
4472 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4473 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4474 return;
4475 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4476 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4477 return;
4478 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4479 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4480 return;
4481 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4482 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4483 return;
4484 }
4485 break;
4486 case Intrinsic::aarch64_neon_ld4r:
4487 if (VT == MVT::v8i8) {
4488 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4489 return;
4490 } else if (VT == MVT::v16i8) {
4491 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4492 return;
4493 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4494 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4495 return;
4496 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4497 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4498 return;
4499 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4500 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
4501 return;
4502 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4503 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
4504 return;
4505 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4506 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
4507 return;
4508 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4509 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
4510 return;
4511 }
4512 break;
4513 case Intrinsic::aarch64_neon_ld2lane:
4514 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4515 SelectLoadLane(Node, 2, AArch64::LD2i8);
4516 return;
4517 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4518 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4519 SelectLoadLane(Node, 2, AArch64::LD2i16);
4520 return;
4521 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4522 VT == MVT::v2f32) {
4523 SelectLoadLane(Node, 2, AArch64::LD2i32);
4524 return;
4525 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4526 VT == MVT::v1f64) {
4527 SelectLoadLane(Node, 2, AArch64::LD2i64);
4528 return;
4529 }
4530 break;
4531 case Intrinsic::aarch64_neon_ld3lane:
4532 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4533 SelectLoadLane(Node, 3, AArch64::LD3i8);
4534 return;
4535 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4536 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4537 SelectLoadLane(Node, 3, AArch64::LD3i16);
4538 return;
4539 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4540 VT == MVT::v2f32) {
4541 SelectLoadLane(Node, 3, AArch64::LD3i32);
4542 return;
4543 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4544 VT == MVT::v1f64) {
4545 SelectLoadLane(Node, 3, AArch64::LD3i64);
4546 return;
4547 }
4548 break;
4549 case Intrinsic::aarch64_neon_ld4lane:
4550 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4551 SelectLoadLane(Node, 4, AArch64::LD4i8);
4552 return;
4553 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4554 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4555 SelectLoadLane(Node, 4, AArch64::LD4i16);
4556 return;
4557 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4558 VT == MVT::v2f32) {
4559 SelectLoadLane(Node, 4, AArch64::LD4i32);
4560 return;
4561 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4562 VT == MVT::v1f64) {
4563 SelectLoadLane(Node, 4, AArch64::LD4i64);
4564 return;
4565 }
4566 break;
4567 case Intrinsic::aarch64_ld64b:
4568 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
4569 return;
4570 case Intrinsic::aarch64_sve_ld2_sret: {
4571 if (VT == MVT::nxv16i8) {
4572 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
4573 true);
4574 return;
4575 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4576 VT == MVT::nxv8bf16) {
4577 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
4578 true);
4579 return;
4580 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4581 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
4582 true);
4583 return;
4584 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4585 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
4586 true);
4587 return;
4588 }
4589 break;
4590 }
4591 case Intrinsic::aarch64_sve_ld3_sret: {
4592 if (VT == MVT::nxv16i8) {
4593 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
4594 true);
4595 return;
4596 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4597 VT == MVT::nxv8bf16) {
4598 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
4599 true);
4600 return;
4601 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4602 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
4603 true);
4604 return;
4605 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4606 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
4607 true);
4608 return;
4609 }
4610 break;
4611 }
4612 case Intrinsic::aarch64_sve_ld4_sret: {
4613 if (VT == MVT::nxv16i8) {
4614 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
4615 true);
4616 return;
4617 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
4618 VT == MVT::nxv8bf16) {
4619 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
4620 true);
4621 return;
4622 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
4623 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
4624 true);
4625 return;
4626 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
4627 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
4628 true);
4629 return;
4630 }
4631 break;
4632 }
4633 case Intrinsic::swift_async_context_addr: {
4634 SDLoc DL(Node);
4635 SDValue Chain = Node->getOperand(0);
4636 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
4637 SDValue Res = SDValue(
4638 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
4639 CurDAG->getTargetConstant(8, DL, MVT::i32),
4640 CurDAG->getTargetConstant(0, DL, MVT::i32)),
4641 0);
4642 ReplaceUses(SDValue(Node, 0), Res);
4643 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
4644 CurDAG->RemoveDeadNode(Node);
4645
4646 auto &MF = CurDAG->getMachineFunction();
4647 MF.getFrameInfo().setFrameAddressIsTaken(true);
4648 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
4649 return;
4650 }
4651 }
4652 } break;
4653 case ISD::INTRINSIC_WO_CHAIN: {
4654 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
4655 switch (IntNo) {
4656 default:
4657 break;
4658 case Intrinsic::aarch64_tagp:
4659 SelectTagP(Node);
4660 return;
4661 case Intrinsic::aarch64_neon_tbl2:
4662 SelectTable(Node, 2,
4663 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
4664 false);
4665 return;
4666 case Intrinsic::aarch64_neon_tbl3:
4667 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
4668 : AArch64::TBLv16i8Three,
4669 false);
4670 return;
4671 case Intrinsic::aarch64_neon_tbl4:
4672 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
4673 : AArch64::TBLv16i8Four,
4674 false);
4675 return;
4676 case Intrinsic::aarch64_neon_tbx2:
4677 SelectTable(Node, 2,
4678 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
4679 true);
4680 return;
4681 case Intrinsic::aarch64_neon_tbx3:
4682 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
4683 : AArch64::TBXv16i8Three,
4684 true);
4685 return;
4686 case Intrinsic::aarch64_neon_tbx4:
4687 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
4688 : AArch64::TBXv16i8Four,
4689 true);
4690 return;
4691 case Intrinsic::aarch64_neon_smull:
4692 case Intrinsic::aarch64_neon_umull:
4693 if (tryMULLV64LaneV128(IntNo, Node))
4694 return;
4695 break;
4696 case Intrinsic::aarch64_sve_whilege_x2:
4697 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4698 Node->getValueType(0),
4699 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
4700 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
4701 SelectWhilePair(Node, Op);
4702 return;
4703 case Intrinsic::aarch64_sve_whilegt_x2:
4704 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4705 Node->getValueType(0),
4706 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
4707 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
4708 SelectWhilePair(Node, Op);
4709 return;
4710 case Intrinsic::aarch64_sve_whilehi_x2:
4711 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4712 Node->getValueType(0),
4713 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
4714 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
4715 SelectWhilePair(Node, Op);
4716 return;
4717 case Intrinsic::aarch64_sve_whilehs_x2:
4718 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4719 Node->getValueType(0),
4720 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
4721 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
4722 SelectWhilePair(Node, Op);
4723 return;
4724 case Intrinsic::aarch64_sve_whilele_x2:
4725 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4726 Node->getValueType(0),
4727 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
4728 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
4729 SelectWhilePair(Node, Op);
4730 return;
4731 case Intrinsic::aarch64_sve_whilelo_x2:
4732 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4733 Node->getValueType(0),
4734 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
4735 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
4736 SelectWhilePair(Node, Op);
4737 return;
4738 case Intrinsic::aarch64_sve_whilels_x2:
4739 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4740 Node->getValueType(0),
4741 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
4742 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
4743 SelectWhilePair(Node, Op);
4744 return;
4745 case Intrinsic::aarch64_sve_whilelt_x2:
4746 if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
4747 Node->getValueType(0),
4748 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
4749 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
4750 SelectWhilePair(Node, Op);
4751 return;
4752 case Intrinsic::aarch64_sve_fcvts_x2:
4753 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
4754 return;
4755 case Intrinsic::aarch64_sve_scvtf_x2:
4756 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
4757 return;
4758 case Intrinsic::aarch64_sve_fcvtu_x2:
4759 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
4760 return;
4761 case Intrinsic::aarch64_sve_ucvtf_x2:
4762 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
4763 return;
4764 case Intrinsic::aarch64_sve_fcvts_x4:
4765 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
4766 return;
4767 case Intrinsic::aarch64_sve_scvtf_x4:
4768 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
4769 return;
4770 case Intrinsic::aarch64_sve_fcvtu_x4:
4771 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
4772 return;
4773 case Intrinsic::aarch64_sve_ucvtf_x4:
4774 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
4775 return;
4776 }
4777 break;
4778 }
4779 case ISD::INTRINSIC_VOID: {
4780 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
4781 if (Node->getNumOperands() >= 3)
4782 VT = Node->getOperand(2)->getValueType(0);
4783 switch (IntNo) {
4784 default:
4785 break;
4786 case Intrinsic::aarch64_neon_st1x2: {
4787 if (VT == MVT::v8i8) {
4788 SelectStore(Node, 2, AArch64::ST1Twov8b);
4789 return;
4790 } else if (VT == MVT::v16i8) {
4791 SelectStore(Node, 2, AArch64::ST1Twov16b);
4792 return;
4793 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4794 VT == MVT::v4bf16) {
4795 SelectStore(Node, 2, AArch64::ST1Twov4h);
4796 return;
4797 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4798 VT == MVT::v8bf16) {
4799 SelectStore(Node, 2, AArch64::ST1Twov8h);
4800 return;
4801 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4802 SelectStore(Node, 2, AArch64::ST1Twov2s);
4803 return;
4804 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4805 SelectStore(Node, 2, AArch64::ST1Twov4s);
4806 return;
4807 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4808 SelectStore(Node, 2, AArch64::ST1Twov2d);
4809 return;
4810 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4811 SelectStore(Node, 2, AArch64::ST1Twov1d);
4812 return;
4813 }
4814 break;
4815 }
4816 case Intrinsic::aarch64_neon_st1x3: {
4817 if (VT == MVT::v8i8) {
4818 SelectStore(Node, 3, AArch64::ST1Threev8b);
4819 return;
4820 } else if (VT == MVT::v16i8) {
4821 SelectStore(Node, 3, AArch64::ST1Threev16b);
4822 return;
4823 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4824 VT == MVT::v4bf16) {
4825 SelectStore(Node, 3, AArch64::ST1Threev4h);
4826 return;
4827 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4828 VT == MVT::v8bf16) {
4829 SelectStore(Node, 3, AArch64::ST1Threev8h);
4830 return;
4831 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4832 SelectStore(Node, 3, AArch64::ST1Threev2s);
4833 return;
4834 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4835 SelectStore(Node, 3, AArch64::ST1Threev4s);
4836 return;
4837 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4838 SelectStore(Node, 3, AArch64::ST1Threev2d);
4839 return;
4840 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4841 SelectStore(Node, 3, AArch64::ST1Threev1d);
4842 return;
4843 }
4844 break;
4845 }
4846 case Intrinsic::aarch64_neon_st1x4: {
4847 if (VT == MVT::v8i8) {
4848 SelectStore(Node, 4, AArch64::ST1Fourv8b);
4849 return;
4850 } else if (VT == MVT::v16i8) {
4851 SelectStore(Node, 4, AArch64::ST1Fourv16b);
4852 return;
4853 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4854 VT == MVT::v4bf16) {
4855 SelectStore(Node, 4, AArch64::ST1Fourv4h);
4856 return;
4857 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4858 VT == MVT::v8bf16) {
4859 SelectStore(Node, 4, AArch64::ST1Fourv8h);
4860 return;
4861 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4862 SelectStore(Node, 4, AArch64::ST1Fourv2s);
4863 return;
4864 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4865 SelectStore(Node, 4, AArch64::ST1Fourv4s);
4866 return;
4867 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4868 SelectStore(Node, 4, AArch64::ST1Fourv2d);
4869 return;
4870 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4871 SelectStore(Node, 4, AArch64::ST1Fourv1d);
4872 return;
4873 }
4874 break;
4875 }
4876 case Intrinsic::aarch64_neon_st2: {
4877 if (VT == MVT::v8i8) {
4878 SelectStore(Node, 2, AArch64::ST2Twov8b);
4879 return;
4880 } else if (VT == MVT::v16i8) {
4881 SelectStore(Node, 2, AArch64::ST2Twov16b);
4882 return;
4883 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4884 VT == MVT::v4bf16) {
4885 SelectStore(Node, 2, AArch64::ST2Twov4h);
4886 return;
4887 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4888 VT == MVT::v8bf16) {
4889 SelectStore(Node, 2, AArch64::ST2Twov8h);
4890 return;
4891 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4892 SelectStore(Node, 2, AArch64::ST2Twov2s);
4893 return;
4894 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4895 SelectStore(Node, 2, AArch64::ST2Twov4s);
4896 return;
4897 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4898 SelectStore(Node, 2, AArch64::ST2Twov2d);
4899 return;
4900 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4901 SelectStore(Node, 2, AArch64::ST1Twov1d);
4902 return;
4903 }
4904 break;
4905 }
4906 case Intrinsic::aarch64_neon_st3: {
4907 if (VT == MVT::v8i8) {
4908 SelectStore(Node, 3, AArch64::ST3Threev8b);
4909 return;
4910 } else if (VT == MVT::v16i8) {
4911 SelectStore(Node, 3, AArch64::ST3Threev16b);
4912 return;
4913 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4914 VT == MVT::v4bf16) {
4915 SelectStore(Node, 3, AArch64::ST3Threev4h);
4916 return;
4917 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4918 VT == MVT::v8bf16) {
4919 SelectStore(Node, 3, AArch64::ST3Threev8h);
4920 return;
4921 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4922 SelectStore(Node, 3, AArch64::ST3Threev2s);
4923 return;
4924 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4925 SelectStore(Node, 3, AArch64::ST3Threev4s);
4926 return;
4927 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4928 SelectStore(Node, 3, AArch64::ST3Threev2d);
4929 return;
4930 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4931 SelectStore(Node, 3, AArch64::ST1Threev1d);
4932 return;
4933 }
4934 break;
4935 }
4936 case Intrinsic::aarch64_neon_st4: {
4937 if (VT == MVT::v8i8) {
4938 SelectStore(Node, 4, AArch64::ST4Fourv8b);
4939 return;
4940 } else if (VT == MVT::v16i8) {
4941 SelectStore(Node, 4, AArch64::ST4Fourv16b);
4942 return;
4943 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
4944 VT == MVT::v4bf16) {
4945 SelectStore(Node, 4, AArch64::ST4Fourv4h);
4946 return;
4947 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
4948 VT == MVT::v8bf16) {
4949 SelectStore(Node, 4, AArch64::ST4Fourv8h);
4950 return;
4951 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4952 SelectStore(Node, 4, AArch64::ST4Fourv2s);
4953 return;
4954 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4955 SelectStore(Node, 4, AArch64::ST4Fourv4s);
4956 return;
4957 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4958 SelectStore(Node, 4, AArch64::ST4Fourv2d);
4959 return;
4960 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4961 SelectStore(Node, 4, AArch64::ST1Fourv1d);
4962 return;
4963 }
4964 break;
4965 }
4966 case Intrinsic::aarch64_neon_st2lane: {
4967 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4968 SelectStoreLane(Node, 2, AArch64::ST2i8);
4969 return;
4970 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4971 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4972 SelectStoreLane(Node, 2, AArch64::ST2i16);
4973 return;
4974 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4975 VT == MVT::v2f32) {
4976 SelectStoreLane(Node, 2, AArch64::ST2i32);
4977 return;
4978 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4979 VT == MVT::v1f64) {
4980 SelectStoreLane(Node, 2, AArch64::ST2i64);
4981 return;
4982 }
4983 break;
4984 }
4985 case Intrinsic::aarch64_neon_st3lane: {
4986 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4987 SelectStoreLane(Node, 3, AArch64::ST3i8);
4988 return;
4989 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4990 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4991 SelectStoreLane(Node, 3, AArch64::ST3i16);
4992 return;
4993 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4994 VT == MVT::v2f32) {
4995 SelectStoreLane(Node, 3, AArch64::ST3i32);
4996 return;
4997 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4998 VT == MVT::v1f64) {
4999 SelectStoreLane(Node, 3, AArch64::ST3i64);
5000 return;
5001 }
5002 break;
5003 }
5004 case Intrinsic::aarch64_neon_st4lane: {
5005 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5006 SelectStoreLane(Node, 4, AArch64::ST4i8);
5007 return;
5008 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5009 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5010 SelectStoreLane(Node, 4, AArch64::ST4i16);
5011 return;
5012 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5013 VT == MVT::v2f32) {
5014 SelectStoreLane(Node, 4, AArch64::ST4i32);
5015 return;
5016 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5017 VT == MVT::v1f64) {
5018 SelectStoreLane(Node, 4, AArch64::ST4i64);
5019 return;
5020 }
5021 break;
5022 }
5023 case Intrinsic::aarch64_sve_st2: {
5024 if (VT == MVT::nxv16i8) {
5025 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
5026 return;
5027 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5028 VT == MVT::nxv8bf16) {
5029 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
5030 return;
5031 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5032 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
5033 return;
5034 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5035 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
5036 return;
5037 }
5038 break;
5039 }
5040 case Intrinsic::aarch64_sve_st3: {
5041 if (VT == MVT::nxv16i8) {
5042 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
5043 return;
5044 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5045 VT == MVT::nxv8bf16) {
5046 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
5047 return;
5048 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5049 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
5050 return;
5051 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5052 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
5053 return;
5054 }
5055 break;
5056 }
5057 case Intrinsic::aarch64_sve_st4: {
5058 if (VT == MVT::nxv16i8) {
5059 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
5060 return;
5061 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5062 VT == MVT::nxv8bf16) {
5063 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
5064 return;
5065 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5066 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
5067 return;
5068 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5069 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
5070 return;
5071 }
5072 break;
5073 }
5074 }
5075 break;
5076 }
5077 case AArch64ISD::LD2post: {
5078 if (VT == MVT::v8i8) {
5079 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
5080 return;
5081 } else if (VT == MVT::v16i8) {
5082 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
5083 return;
5084 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5085 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
5086 return;
5087 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5088 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
5089 return;
5090 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5091 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
5092 return;
5093 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5094 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
5095 return;
5096 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5097 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
5098 return;
5099 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5100 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
5101 return;
5102 }
5103 break;
5104 }
5105 case AArch64ISD::LD3post: {
5106 if (VT == MVT::v8i8) {
5107 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
5108 return;
5109 } else if (VT == MVT::v16i8) {
5110 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
5111 return;
5112 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5113 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
5114 return;
5115 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5116 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
5117 return;
5118 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5119 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
5120 return;
5121 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5122 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
5123 return;
5124 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5125 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
5126 return;
5127 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5128 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
5129 return;
5130 }
5131 break;
5132 }
5133 case AArch64ISD::LD4post: {
5134 if (VT == MVT::v8i8) {
5135 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
5136 return;
5137 } else if (VT == MVT::v16i8) {
5138 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
5139 return;
5140 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5141 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
5142 return;
5143 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5144 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
5145 return;
5146 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5147 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
5148 return;
5149 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5150 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
5151 return;
5152 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5153 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
5154 return;
5155 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5156 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
5157 return;
5158 }
5159 break;
5160 }
5161 case AArch64ISD::LD1x2post: {
5162 if (VT == MVT::v8i8) {
5163 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
5164 return;
5165 } else if (VT == MVT::v16i8) {
5166 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
5167 return;
5168 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5169 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
5170 return;
5171 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5172 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
5173 return;
5174 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5175 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
5176 return;
5177 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5178 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
5179 return;
5180 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5181 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
5182 return;
5183 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5184 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
5185 return;
5186 }
5187 break;
5188 }
5189 case AArch64ISD::LD1x3post: {
5190 if (VT == MVT::v8i8) {
5191 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
5192 return;
5193 } else if (VT == MVT::v16i8) {
5194 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
5195 return;
5196 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5197 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
5198 return;
5199 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5200 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
5201 return;
5202 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5203 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
5204 return;
5205 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5206 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
5207 return;
5208 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5209 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
5210 return;
5211 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5212 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
5213 return;
5214 }
5215 break;
5216 }
5217 case AArch64ISD::LD1x4post: {
5218 if (VT == MVT::v8i8) {
5219 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
5220 return;
5221 } else if (VT == MVT::v16i8) {
5222 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
5223 return;
5224 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5225 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
5226 return;
5227 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5228 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
5229 return;
5230 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5231 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
5232 return;
5233 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5234 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
5235 return;
5236 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5237 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
5238 return;
5239 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5240 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
5241 return;
5242 }
5243 break;
5244 }
5245 case AArch64ISD::LD1DUPpost: {
5246 if (VT == MVT::v8i8) {
5247 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
5248 return;
5249 } else if (VT == MVT::v16i8) {
5250 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
5251 return;
5252 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5253 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
5254 return;
5255 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5256 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
5257 return;
5258 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5259 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
5260 return;
5261 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5262 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
5263 return;
5264 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5265 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
5266 return;
5267 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5268 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
5269 return;
5270 }
5271 break;
5272 }
5273 case AArch64ISD::LD2DUPpost: {
5274 if (VT == MVT::v8i8) {
5275 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
5276 return;
5277 } else if (VT == MVT::v16i8) {
5278 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
5279 return;
5280 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5281 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
5282 return;
5283 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5284 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
5285 return;
5286 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5287 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
5288 return;
5289 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5290 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
5291 return;
5292 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5293 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
5294 return;
5295 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5296 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
5297 return;
5298 }
5299 break;
5300 }
5301 case AArch64ISD::LD3DUPpost: {
5302 if (VT == MVT::v8i8) {
5303 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
5304 return;
5305 } else if (VT == MVT::v16i8) {
5306 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
5307 return;
5308 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5309 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
5310 return;
5311 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5312 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
5313 return;
5314 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5315 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
5316 return;
5317 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5318 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
5319 return;
5320 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5321 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
5322 return;
5323 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5324 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
5325 return;
5326 }
5327 break;
5328 }
5329 case AArch64ISD::LD4DUPpost: {
5330 if (VT == MVT::v8i8) {
5331 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
5332 return;
5333 } else if (VT == MVT::v16i8) {
5334 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
5335 return;
5336 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5337 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
5338 return;
5339 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5340 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
5341 return;
5342 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5343 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
5344 return;
5345 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5346 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
5347 return;
5348 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5349 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
5350 return;
5351 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5352 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
5353 return;
5354 }
5355 break;
5356 }
5357 case AArch64ISD::LD1LANEpost: {
5358 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5359 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
5360 return;
5361 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5362 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5363 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
5364 return;
5365 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5366 VT == MVT::v2f32) {
5367 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
5368 return;
5369 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5370 VT == MVT::v1f64) {
5371 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
5372 return;
5373 }
5374 break;
5375 }
5376 case AArch64ISD::LD2LANEpost: {
5377 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5378 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
5379 return;
5380 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5381 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5382 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
5383 return;
5384 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5385 VT == MVT::v2f32) {
5386 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
5387 return;
5388 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5389 VT == MVT::v1f64) {
5390 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
5391 return;
5392 }
5393 break;
5394 }
5395 case AArch64ISD::LD3LANEpost: {
5396 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5397 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
5398 return;
5399 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5400 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5401 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
5402 return;
5403 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5404 VT == MVT::v2f32) {
5405 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
5406 return;
5407 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5408 VT == MVT::v1f64) {
5409 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
5410 return;
5411 }
5412 break;
5413 }
5414 case AArch64ISD::LD4LANEpost: {
5415 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5416 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
5417 return;
5418 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5419 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5420 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
5421 return;
5422 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5423 VT == MVT::v2f32) {
5424 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
5425 return;
5426 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5427 VT == MVT::v1f64) {
5428 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
5429 return;
5430 }
5431 break;
5432 }
5433 case AArch64ISD::ST2post: {
5434 VT = Node->getOperand(1).getValueType();
5435 if (VT == MVT::v8i8) {
5436 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
5437 return;
5438 } else if (VT == MVT::v16i8) {
5439 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
5440 return;
5441 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5442 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
5443 return;
5444 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5445 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
5446 return;
5447 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5448 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
5449 return;
5450 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5451 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
5452 return;
5453 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5454 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
5455 return;
5456 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5457 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
5458 return;
5459 }
5460 break;
5461 }
5462 case AArch64ISD::ST3post: {
5463 VT = Node->getOperand(1).getValueType();
5464 if (VT == MVT::v8i8) {
5465 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
5466 return;
5467 } else if (VT == MVT::v16i8) {
5468 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
5469 return;
5470 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5471 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
5472 return;
5473 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5474 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
5475 return;
5476 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5477 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
5478 return;
5479 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5480 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
5481 return;
5482 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5483 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
5484 return;
5485 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5486 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
5487 return;
5488 }
5489 break;
5490 }
5491 case AArch64ISD::ST4post: {
5492 VT = Node->getOperand(1).getValueType();
5493 if (VT == MVT::v8i8) {
5494 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
5495 return;
5496 } else if (VT == MVT::v16i8) {
5497 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
5498 return;
5499 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5500 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
5501 return;
5502 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5503 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
5504 return;
5505 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5506 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
5507 return;
5508 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5509 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
5510 return;
5511 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5512 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
5513 return;
5514 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5515 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
5516 return;
5517 }
5518 break;
5519 }
5520 case AArch64ISD::ST1x2post: {
5521 VT = Node->getOperand(1).getValueType();
5522 if (VT == MVT::v8i8) {
5523 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
5524 return;
5525 } else if (VT == MVT::v16i8) {
5526 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
5527 return;
5528 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5529 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
5530 return;
5531 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5532 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
5533 return;
5534 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5535 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
5536 return;
5537 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5538 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
5539 return;
5540 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5541 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
5542 return;
5543 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5544 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
5545 return;
5546 }
5547 break;
5548 }
5549 case AArch64ISD::ST1x3post: {
5550 VT = Node->getOperand(1).getValueType();
5551 if (VT == MVT::v8i8) {
5552 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
5553 return;
5554 } else if (VT == MVT::v16i8) {
5555 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
5556 return;
5557 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5558 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
5559 return;
5560 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
5561 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
5562 return;
5563 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5564 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
5565 return;
5566 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5567 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
5568 return;
5569 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5570 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
5571 return;
5572 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5573 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
5574 return;
5575 }
5576 break;
5577 }
5578 case AArch64ISD::ST1x4post: {
5579 VT = Node->getOperand(1).getValueType();
5580 if (VT == MVT::v8i8) {
5581 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
5582 return;
5583 } else if (VT == MVT::v16i8) {
5584 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
5585 return;
5586 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5587 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
5588 return;
5589 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5590 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
5591 return;
5592 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5593 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
5594 return;
5595 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5596 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
5597 return;
5598 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5599 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
5600 return;
5601 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5602 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
5603 return;
5604 }
5605 break;
5606 }
5607 case AArch64ISD::ST2LANEpost: {
5608 VT = Node->getOperand(1).getValueType();
5609 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5610 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
5611 return;
5612 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5613 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5614 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
5615 return;
5616 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5617 VT == MVT::v2f32) {
5618 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
5619 return;
5620 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5621 VT == MVT::v1f64) {
5622 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
5623 return;
5624 }
5625 break;
5626 }
5627 case AArch64ISD::ST3LANEpost: {
5628 VT = Node->getOperand(1).getValueType();
5629 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5630 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
5631 return;
5632 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5633 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5634 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
5635 return;
5636 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5637 VT == MVT::v2f32) {
5638 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
5639 return;
5640 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5641 VT == MVT::v1f64) {
5642 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
5643 return;
5644 }
5645 break;
5646 }
5647 case AArch64ISD::ST4LANEpost: {
5648 VT = Node->getOperand(1).getValueType();
5649 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5650 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
5651 return;
5652 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5653 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5654 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
5655 return;
5656 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5657 VT == MVT::v2f32) {
5658 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
5659 return;
5660 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5661 VT == MVT::v1f64) {
5662 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
5663 return;
5664 }
5665 break;
5666 }
5667 case AArch64ISD::SVE_LD2_MERGE_ZERO: {
5668 if (VT == MVT::nxv16i8) {
5669 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
5670 return;
5671 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5672 VT == MVT::nxv8bf16) {
5673 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
5674 return;
5675 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5676 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
5677 return;
5678 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5679 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
5680 return;
5681 }
5682 break;
5683 }
5684 case AArch64ISD::SVE_LD3_MERGE_ZERO: {
5685 if (VT == MVT::nxv16i8) {
5686 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
5687 return;
5688 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5689 VT == MVT::nxv8bf16) {
5690 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
5691 return;
5692 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5693 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
5694 return;
5695 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5696 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
5697 return;
5698 }
5699 break;
5700 }
5701 case AArch64ISD::SVE_LD4_MERGE_ZERO: {
5702 if (VT == MVT::nxv16i8) {
5703 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
5704 return;
5705 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5706 VT == MVT::nxv8bf16) {
5707 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
5708 return;
5709 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5710 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
5711 return;
5712 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5713 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
5714 return;
5715 }
5716 break;
5717 }
5718 }
5719
5720 // Select the default instruction
5721 SelectCode(Node);
5722 }
5723
5724 /// createAArch64ISelDag - This pass converts a legalized DAG into a
5725 /// AArch64-specific DAG, ready for instruction scheduling.
createAArch64ISelDag(AArch64TargetMachine & TM,CodeGenOpt::Level OptLevel)5726 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
5727 CodeGenOpt::Level OptLevel) {
5728 return new AArch64DAGToDAGISel(TM, OptLevel);
5729 }
5730
5731 /// When \p PredVT is a scalable vector predicate in the form
5732 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of
5733 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
5734 /// structured vectors (NumVec >1), the output data type is
5735 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
5736 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
5737 /// EVT.
getPackedVectorTypeFromPredicateType(LLVMContext & Ctx,EVT PredVT,unsigned NumVec)5738 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
5739 unsigned NumVec) {
5740 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
5741 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
5742 return EVT();
5743
5744 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
5745 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
5746 return EVT();
5747
5748 ElementCount EC = PredVT.getVectorElementCount();
5749 EVT ScalarVT =
5750 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
5751 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
5752
5753 return MemVT;
5754 }
5755
5756 /// Return the EVT of the data associated to a memory operation in \p
5757 /// Root. If such EVT cannot be retrived, it returns an invalid EVT.
getMemVTFromNode(LLVMContext & Ctx,SDNode * Root)5758 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
5759 if (isa<MemSDNode>(Root))
5760 return cast<MemSDNode>(Root)->getMemoryVT();
5761
5762 if (isa<MemIntrinsicSDNode>(Root))
5763 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
5764
5765 const unsigned Opcode = Root->getOpcode();
5766 // For custom ISD nodes, we have to look at them individually to extract the
5767 // type of the data moved to/from memory.
5768 switch (Opcode) {
5769 case AArch64ISD::LD1_MERGE_ZERO:
5770 case AArch64ISD::LD1S_MERGE_ZERO:
5771 case AArch64ISD::LDNF1_MERGE_ZERO:
5772 case AArch64ISD::LDNF1S_MERGE_ZERO:
5773 return cast<VTSDNode>(Root->getOperand(3))->getVT();
5774 case AArch64ISD::ST1_PRED:
5775 return cast<VTSDNode>(Root->getOperand(4))->getVT();
5776 case AArch64ISD::SVE_LD2_MERGE_ZERO:
5777 return getPackedVectorTypeFromPredicateType(
5778 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
5779 case AArch64ISD::SVE_LD3_MERGE_ZERO:
5780 return getPackedVectorTypeFromPredicateType(
5781 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
5782 case AArch64ISD::SVE_LD4_MERGE_ZERO:
5783 return getPackedVectorTypeFromPredicateType(
5784 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
5785 default:
5786 break;
5787 }
5788
5789 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
5790 return EVT();
5791
5792 switch (cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue()) {
5793 default:
5794 return EVT();
5795 case Intrinsic::aarch64_sme_ldr:
5796 case Intrinsic::aarch64_sme_str:
5797 return MVT::nxv16i8;
5798 case Intrinsic::aarch64_sve_prf:
5799 // We are using an SVE prefetch intrinsic. Type must be inferred from the
5800 // width of the predicate.
5801 return getPackedVectorTypeFromPredicateType(
5802 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
5803 case Intrinsic::aarch64_sve_ld2_sret:
5804 return getPackedVectorTypeFromPredicateType(
5805 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
5806 case Intrinsic::aarch64_sve_ld3_sret:
5807 return getPackedVectorTypeFromPredicateType(
5808 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
5809 case Intrinsic::aarch64_sve_ld4_sret:
5810 return getPackedVectorTypeFromPredicateType(
5811 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
5812 }
5813 }
5814
5815 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
5816 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
5817 /// where Root is the memory access using N for its address.
5818 template <int64_t Min, int64_t Max>
SelectAddrModeIndexedSVE(SDNode * Root,SDValue N,SDValue & Base,SDValue & OffImm)5819 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
5820 SDValue &Base,
5821 SDValue &OffImm) {
5822 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
5823 const DataLayout &DL = CurDAG->getDataLayout();
5824 const MachineFrameInfo &MFI = MF->getFrameInfo();
5825
5826 if (N.getOpcode() == ISD::FrameIndex) {
5827 int FI = cast<FrameIndexSDNode>(N)->getIndex();
5828 // We can only encode VL scaled offsets, so only fold in frame indexes
5829 // referencing SVE objects.
5830 if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) {
5831 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
5832 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
5833 return true;
5834 }
5835
5836 return false;
5837 }
5838
5839 if (MemVT == EVT())
5840 return false;
5841
5842 if (N.getOpcode() != ISD::ADD)
5843 return false;
5844
5845 SDValue VScale = N.getOperand(1);
5846 if (VScale.getOpcode() != ISD::VSCALE)
5847 return false;
5848
5849 TypeSize TS = MemVT.getSizeInBits();
5850 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
5851 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
5852
5853 if ((MulImm % MemWidthBytes) != 0)
5854 return false;
5855
5856 int64_t Offset = MulImm / MemWidthBytes;
5857 if (Offset < Min || Offset > Max)
5858 return false;
5859
5860 Base = N.getOperand(0);
5861 if (Base.getOpcode() == ISD::FrameIndex) {
5862 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
5863 // We can only encode VL scaled offsets, so only fold in frame indexes
5864 // referencing SVE objects.
5865 if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector)
5866 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
5867 }
5868
5869 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
5870 return true;
5871 }
5872
5873 /// Select register plus register addressing mode for SVE, with scaled
5874 /// offset.
SelectSVERegRegAddrMode(SDValue N,unsigned Scale,SDValue & Base,SDValue & Offset)5875 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
5876 SDValue &Base,
5877 SDValue &Offset) {
5878 if (N.getOpcode() != ISD::ADD)
5879 return false;
5880
5881 // Process an ADD node.
5882 const SDValue LHS = N.getOperand(0);
5883 const SDValue RHS = N.getOperand(1);
5884
5885 // 8 bit data does not come with the SHL node, so it is treated
5886 // separately.
5887 if (Scale == 0) {
5888 Base = LHS;
5889 Offset = RHS;
5890 return true;
5891 }
5892
5893 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
5894 int64_t ImmOff = C->getSExtValue();
5895 unsigned Size = 1 << Scale;
5896
5897 // To use the reg+reg addressing mode, the immediate must be a multiple of
5898 // the vector element's byte size.
5899 if (ImmOff % Size)
5900 return false;
5901
5902 SDLoc DL(N);
5903 Base = LHS;
5904 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
5905 SDValue Ops[] = {Offset};
5906 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
5907 Offset = SDValue(MI, 0);
5908 return true;
5909 }
5910
5911 // Check if the RHS is a shift node with a constant.
5912 if (RHS.getOpcode() != ISD::SHL)
5913 return false;
5914
5915 const SDValue ShiftRHS = RHS.getOperand(1);
5916 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
5917 if (C->getZExtValue() == Scale) {
5918 Base = LHS;
5919 Offset = RHS.getOperand(0);
5920 return true;
5921 }
5922
5923 return false;
5924 }
5925
SelectAllActivePredicate(SDValue N)5926 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
5927 const AArch64TargetLowering *TLI =
5928 static_cast<const AArch64TargetLowering *>(getTargetLowering());
5929
5930 return TLI->isAllActivePredicate(*CurDAG, N);
5931 }
5932
SelectSMETileSlice(SDValue N,unsigned MaxSize,SDValue & Base,SDValue & Offset,unsigned Scale)5933 bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
5934 SDValue &Base, SDValue &Offset,
5935 unsigned Scale) {
5936 if (N.getOpcode() != ISD::ADD) {
5937 Base = N;
5938 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
5939 return true;
5940 }
5941
5942 // Process an ADD node.
5943 const SDValue LHS = N.getOperand(0);
5944 const SDValue RHS = N.getOperand(1);
5945
5946 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
5947 int64_t ImmOff = C->getSExtValue();
5948
5949 if ((ImmOff < 0 || ImmOff > MaxSize) || (ImmOff % Scale != 0))
5950 return false;
5951
5952 Base = LHS;
5953 Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
5954 return true;
5955 }
5956
5957 return false;
5958 }
5959