1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interfaces that VE uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "VEISelLowering.h"
15 #include "MCTargetDesc/VEMCExpr.h"
16 #include "VECustomDAG.h"
17 #include "VEInstrBuilder.h"
18 #include "VEMachineFunctionInfo.h"
19 #include "VERegisterInfo.h"
20 #include "VETargetMachine.h"
21 #include "llvm/ADT/StringSwitch.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
27 #include "llvm/CodeGen/MachineModuleInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/SelectionDAG.h"
30 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/Module.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/KnownBits.h"
37 using namespace llvm;
38
39 #define DEBUG_TYPE "ve-lower"
40
41 //===----------------------------------------------------------------------===//
42 // Calling Convention Implementation
43 //===----------------------------------------------------------------------===//
44
45 #include "VEGenCallingConv.inc"
46
getReturnCC(CallingConv::ID CallConv)47 CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
48 switch (CallConv) {
49 default:
50 return RetCC_VE_C;
51 case CallingConv::Fast:
52 return RetCC_VE_Fast;
53 }
54 }
55
getParamCC(CallingConv::ID CallConv,bool IsVarArg)56 CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
57 if (IsVarArg)
58 return CC_VE2;
59 switch (CallConv) {
60 default:
61 return CC_VE_C;
62 case CallingConv::Fast:
63 return CC_VE_Fast;
64 }
65 }
66
CanLowerReturn(CallingConv::ID CallConv,MachineFunction & MF,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,LLVMContext & Context) const67 bool VETargetLowering::CanLowerReturn(
68 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
69 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
70 CCAssignFn *RetCC = getReturnCC(CallConv);
71 SmallVector<CCValAssign, 16> RVLocs;
72 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
73 return CCInfo.CheckReturn(Outs, RetCC);
74 }
75
76 static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
77 MVT::v256f32, MVT::v512f32, MVT::v256f64};
78
79 static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
80
81 static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
82
initRegisterClasses()83 void VETargetLowering::initRegisterClasses() {
84 // Set up the register classes.
85 addRegisterClass(MVT::i32, &VE::I32RegClass);
86 addRegisterClass(MVT::i64, &VE::I64RegClass);
87 addRegisterClass(MVT::f32, &VE::F32RegClass);
88 addRegisterClass(MVT::f64, &VE::I64RegClass);
89 addRegisterClass(MVT::f128, &VE::F128RegClass);
90
91 if (Subtarget->enableVPU()) {
92 for (MVT VecVT : AllVectorVTs)
93 addRegisterClass(VecVT, &VE::V64RegClass);
94 addRegisterClass(MVT::v256i1, &VE::VMRegClass);
95 addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
96 }
97 }
98
initSPUActions()99 void VETargetLowering::initSPUActions() {
100 const auto &TM = getTargetMachine();
101 /// Load & Store {
102
103 // VE doesn't have i1 sign extending load.
104 for (MVT VT : MVT::integer_valuetypes()) {
105 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
106 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
107 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
108 setTruncStoreAction(VT, MVT::i1, Expand);
109 }
110
111 // VE doesn't have floating point extload/truncstore, so expand them.
112 for (MVT FPVT : MVT::fp_valuetypes()) {
113 for (MVT OtherFPVT : MVT::fp_valuetypes()) {
114 setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
115 setTruncStoreAction(FPVT, OtherFPVT, Expand);
116 }
117 }
118
119 // VE doesn't have fp128 load/store, so expand them in custom lower.
120 setOperationAction(ISD::LOAD, MVT::f128, Custom);
121 setOperationAction(ISD::STORE, MVT::f128, Custom);
122
123 /// } Load & Store
124
125 // Custom legalize address nodes into LO/HI parts.
126 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
127 setOperationAction(ISD::BlockAddress, PtrVT, Custom);
128 setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
129 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
130 setOperationAction(ISD::ConstantPool, PtrVT, Custom);
131 setOperationAction(ISD::JumpTable, PtrVT, Custom);
132
133 /// VAARG handling {
134 setOperationAction(ISD::VASTART, MVT::Other, Custom);
135 // VAARG needs to be lowered to access with 8 bytes alignment.
136 setOperationAction(ISD::VAARG, MVT::Other, Custom);
137 // Use the default implementation.
138 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
139 setOperationAction(ISD::VAEND, MVT::Other, Expand);
140 /// } VAARG handling
141
142 /// Stack {
143 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
144 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
145
146 // Use the default implementation.
147 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
148 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
149 /// } Stack
150
151 /// Branch {
152
153 // VE doesn't have BRCOND
154 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
155
156 // BR_JT is not implemented yet.
157 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158
159 /// } Branch
160
161 /// Int Ops {
162 for (MVT IntVT : {MVT::i32, MVT::i64}) {
163 // VE has no REM or DIVREM operations.
164 setOperationAction(ISD::UREM, IntVT, Expand);
165 setOperationAction(ISD::SREM, IntVT, Expand);
166 setOperationAction(ISD::SDIVREM, IntVT, Expand);
167 setOperationAction(ISD::UDIVREM, IntVT, Expand);
168
169 // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
170 setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
171 setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
172 setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
173
174 // VE has no MULHU/S or U/SMUL_LOHI operations.
175 // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
176 setOperationAction(ISD::MULHU, IntVT, Expand);
177 setOperationAction(ISD::MULHS, IntVT, Expand);
178 setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
179 setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
180
181 // VE has no CTTZ, ROTL, ROTR operations.
182 setOperationAction(ISD::CTTZ, IntVT, Expand);
183 setOperationAction(ISD::ROTL, IntVT, Expand);
184 setOperationAction(ISD::ROTR, IntVT, Expand);
185
186 // VE has 64 bits instruction which works as i64 BSWAP operation. This
187 // instruction works fine as i32 BSWAP operation with an additional
188 // parameter. Use isel patterns to lower BSWAP.
189 setOperationAction(ISD::BSWAP, IntVT, Legal);
190
191 // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
192 // operations. Use isel patterns for i64, promote for i32.
193 LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
194 setOperationAction(ISD::BITREVERSE, IntVT, Act);
195 setOperationAction(ISD::CTLZ, IntVT, Act);
196 setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
197 setOperationAction(ISD::CTPOP, IntVT, Act);
198
199 // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
200 // Use isel patterns for i64, promote for i32.
201 setOperationAction(ISD::AND, IntVT, Act);
202 setOperationAction(ISD::OR, IntVT, Act);
203 setOperationAction(ISD::XOR, IntVT, Act);
204
205 // Legal smax and smin
206 setOperationAction(ISD::SMAX, IntVT, Legal);
207 setOperationAction(ISD::SMIN, IntVT, Legal);
208 }
209 /// } Int Ops
210
211 /// Conversion {
212 // VE doesn't have instructions for fp<->uint, so expand them by llvm
213 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
214 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
215 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
216 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
217
218 // fp16 not supported
219 for (MVT FPVT : MVT::fp_valuetypes()) {
220 setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
221 setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
222 }
223 /// } Conversion
224
225 /// Floating-point Ops {
226 /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
227 /// and fcmp.
228
229 // VE doesn't have following floating point operations.
230 for (MVT VT : MVT::fp_valuetypes()) {
231 setOperationAction(ISD::FNEG, VT, Expand);
232 setOperationAction(ISD::FREM, VT, Expand);
233 }
234
235 // VE doesn't have fdiv of f128.
236 setOperationAction(ISD::FDIV, MVT::f128, Expand);
237
238 for (MVT FPVT : {MVT::f32, MVT::f64}) {
239 // f32 and f64 uses ConstantFP. f128 uses ConstantPool.
240 setOperationAction(ISD::ConstantFP, FPVT, Legal);
241 }
242 /// } Floating-point Ops
243
244 /// Floating-point math functions {
245
246 // VE doesn't have following floating point math functions.
247 for (MVT VT : MVT::fp_valuetypes()) {
248 setOperationAction(ISD::FABS, VT, Expand);
249 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
250 setOperationAction(ISD::FCOS, VT, Expand);
251 setOperationAction(ISD::FMA, VT, Expand);
252 setOperationAction(ISD::FPOW, VT, Expand);
253 setOperationAction(ISD::FSIN, VT, Expand);
254 setOperationAction(ISD::FSQRT, VT, Expand);
255 }
256
257 // VE has single and double FMINNUM and FMAXNUM
258 for (MVT VT : {MVT::f32, MVT::f64}) {
259 setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, VT, Legal);
260 }
261
262 /// } Floating-point math functions
263
264 /// Atomic instructions {
265
266 setMaxAtomicSizeInBitsSupported(64);
267 setMinCmpXchgSizeInBits(32);
268 setSupportsUnalignedAtomics(false);
269
270 // Use custom inserter for ATOMIC_FENCE.
271 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
272
273 // Other atomic instructions.
274 for (MVT VT : MVT::integer_valuetypes()) {
275 // Support i8/i16 atomic swap.
276 setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
277
278 // FIXME: Support "atmam" instructions.
279 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
280 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
281 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
282 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
283
284 // VE doesn't have follwing instructions.
285 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
286 setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
287 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
288 setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
289 setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
290 setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
291 setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
292 setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
293 }
294
295 /// } Atomic instructions
296
297 /// SJLJ instructions {
298 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
299 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
300 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
301 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
302 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
303 /// } SJLJ instructions
304
305 // Intrinsic instructions
306 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
307 }
308
initVPUActions()309 void VETargetLowering::initVPUActions() {
310 for (MVT LegalMaskVT : AllMaskVTs)
311 setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom);
312
313 for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
314 setOperationAction(Opc, MVT::v512i1, Custom);
315
316 for (MVT LegalVecVT : AllVectorVTs) {
317 setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
318 setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
319 setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
320 // Translate all vector instructions with legal element types to VVP_*
321 // nodes.
322 // TODO We will custom-widen into VVP_* nodes in the future. While we are
323 // buildling the infrastructure for this, we only do this for legal vector
324 // VTs.
325 #define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
326 setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
327 #define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
328 setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
329 setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, LegalVecVT, Custom);
330 setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE, LegalVecVT, Custom);
331 #include "VVPNodes.def"
332 }
333
334 for (MVT LegalPackedVT : AllPackedVTs) {
335 setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
336 setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
337 }
338
339 // vNt32, vNt64 ops (legal element types)
340 for (MVT VT : MVT::vector_valuetypes()) {
341 MVT ElemVT = VT.getVectorElementType();
342 unsigned ElemBits = ElemVT.getScalarSizeInBits();
343 if (ElemBits != 32 && ElemBits != 64)
344 continue;
345
346 for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
347 setOperationAction(MemOpc, VT, Custom);
348
349 const ISD::NodeType IntReductionOCs[] = {
350 ISD::VECREDUCE_ADD, ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND,
351 ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMIN,
352 ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
353
354 for (unsigned IntRedOpc : IntReductionOCs)
355 setOperationAction(IntRedOpc, VT, Custom);
356 }
357
358 // v256i1 and v512i1 ops
359 for (MVT MaskVT : AllMaskVTs) {
360 // Custom lower mask ops
361 setOperationAction(ISD::STORE, MaskVT, Custom);
362 setOperationAction(ISD::LOAD, MaskVT, Custom);
363 }
364 }
365
366 SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & DL,SelectionDAG & DAG) const367 VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
368 bool IsVarArg,
369 const SmallVectorImpl<ISD::OutputArg> &Outs,
370 const SmallVectorImpl<SDValue> &OutVals,
371 const SDLoc &DL, SelectionDAG &DAG) const {
372 // CCValAssign - represent the assignment of the return value to locations.
373 SmallVector<CCValAssign, 16> RVLocs;
374
375 // CCState - Info about the registers and stack slot.
376 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
377 *DAG.getContext());
378
379 // Analyze return values.
380 CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
381
382 SDValue Flag;
383 SmallVector<SDValue, 4> RetOps(1, Chain);
384
385 // Copy the result values into the output registers.
386 for (unsigned i = 0; i != RVLocs.size(); ++i) {
387 CCValAssign &VA = RVLocs[i];
388 assert(VA.isRegLoc() && "Can only return in registers!");
389 assert(!VA.needsCustom() && "Unexpected custom lowering");
390 SDValue OutVal = OutVals[i];
391
392 // Integer return values must be sign or zero extended by the callee.
393 switch (VA.getLocInfo()) {
394 case CCValAssign::Full:
395 break;
396 case CCValAssign::SExt:
397 OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
398 break;
399 case CCValAssign::ZExt:
400 OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
401 break;
402 case CCValAssign::AExt:
403 OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
404 break;
405 case CCValAssign::BCvt: {
406 // Convert a float return value to i64 with padding.
407 // 63 31 0
408 // +------+------+
409 // | float| 0 |
410 // +------+------+
411 assert(VA.getLocVT() == MVT::i64);
412 assert(VA.getValVT() == MVT::f32);
413 SDValue Undef = SDValue(
414 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
415 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
416 OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
417 MVT::i64, Undef, OutVal, Sub_f32),
418 0);
419 break;
420 }
421 default:
422 llvm_unreachable("Unknown loc info!");
423 }
424
425 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
426
427 // Guarantee that all emitted copies are stuck together with flags.
428 Flag = Chain.getValue(1);
429 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
430 }
431
432 RetOps[0] = Chain; // Update chain.
433
434 // Add the flag if we have it.
435 if (Flag.getNode())
436 RetOps.push_back(Flag);
437
438 return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
439 }
440
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & DL,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const441 SDValue VETargetLowering::LowerFormalArguments(
442 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
443 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
444 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
445 MachineFunction &MF = DAG.getMachineFunction();
446
447 // Get the base offset of the incoming arguments stack space.
448 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
449 // Get the size of the preserved arguments area
450 unsigned ArgsPreserved = 64;
451
452 // Analyze arguments according to CC_VE.
453 SmallVector<CCValAssign, 16> ArgLocs;
454 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
455 *DAG.getContext());
456 // Allocate the preserved area first.
457 CCInfo.AllocateStack(ArgsPreserved, Align(8));
458 // We already allocated the preserved area, so the stack offset computed
459 // by CC_VE would be correct now.
460 CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
461
462 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
463 CCValAssign &VA = ArgLocs[i];
464 assert(!VA.needsCustom() && "Unexpected custom lowering");
465 if (VA.isRegLoc()) {
466 // This argument is passed in a register.
467 // All integer register arguments are promoted by the caller to i64.
468
469 // Create a virtual register for the promoted live-in value.
470 Register VReg =
471 MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
472 SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
473
474 // The caller promoted the argument, so insert an Assert?ext SDNode so we
475 // won't promote the value again in this function.
476 switch (VA.getLocInfo()) {
477 case CCValAssign::SExt:
478 Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
479 DAG.getValueType(VA.getValVT()));
480 break;
481 case CCValAssign::ZExt:
482 Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
483 DAG.getValueType(VA.getValVT()));
484 break;
485 case CCValAssign::BCvt: {
486 // Extract a float argument from i64 with padding.
487 // 63 31 0
488 // +------+------+
489 // | float| 0 |
490 // +------+------+
491 assert(VA.getLocVT() == MVT::i64);
492 assert(VA.getValVT() == MVT::f32);
493 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
494 Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
495 MVT::f32, Arg, Sub_f32),
496 0);
497 break;
498 }
499 default:
500 break;
501 }
502
503 // Truncate the register down to the argument type.
504 if (VA.isExtInLoc())
505 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
506
507 InVals.push_back(Arg);
508 continue;
509 }
510
511 // The registers are exhausted. This argument was passed on the stack.
512 assert(VA.isMemLoc());
513 // The CC_VE_Full/Half functions compute stack offsets relative to the
514 // beginning of the arguments area at %fp + the size of reserved area.
515 unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
516 unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
517
518 // Adjust offset for a float argument by adding 4 since the argument is
519 // stored in 8 bytes buffer with offset like below. LLVM generates
520 // 4 bytes load instruction, so need to adjust offset here. This
521 // adjustment is required in only LowerFormalArguments. In LowerCall,
522 // a float argument is converted to i64 first, and stored as 8 bytes
523 // data, which is required by ABI, so no need for adjustment.
524 // 0 4
525 // +------+------+
526 // | empty| float|
527 // +------+------+
528 if (VA.getValVT() == MVT::f32)
529 Offset += 4;
530
531 int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
532 InVals.push_back(
533 DAG.getLoad(VA.getValVT(), DL, Chain,
534 DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
535 MachinePointerInfo::getFixedStack(MF, FI)));
536 }
537
538 if (!IsVarArg)
539 return Chain;
540
541 // This function takes variable arguments, some of which may have been passed
542 // in registers %s0-%s8.
543 //
544 // The va_start intrinsic needs to know the offset to the first variable
545 // argument.
546 // TODO: need to calculate offset correctly once we support f128.
547 unsigned ArgOffset = ArgLocs.size() * 8;
548 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
549 // Skip the reserved area at the top of stack.
550 FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
551
552 return Chain;
553 }
554
555 // FIXME? Maybe this could be a TableGen attribute on some registers and
556 // this table could be generated automatically from RegInfo.
getRegisterByName(const char * RegName,LLT VT,const MachineFunction & MF) const557 Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
558 const MachineFunction &MF) const {
559 Register Reg = StringSwitch<Register>(RegName)
560 .Case("sp", VE::SX11) // Stack pointer
561 .Case("fp", VE::SX9) // Frame pointer
562 .Case("sl", VE::SX8) // Stack limit
563 .Case("lr", VE::SX10) // Link register
564 .Case("tp", VE::SX14) // Thread pointer
565 .Case("outer", VE::SX12) // Outer regiser
566 .Case("info", VE::SX17) // Info area register
567 .Case("got", VE::SX15) // Global offset table register
568 .Case("plt", VE::SX16) // Procedure linkage table register
569 .Default(0);
570
571 if (Reg)
572 return Reg;
573
574 report_fatal_error("Invalid register name global variable");
575 }
576
577 //===----------------------------------------------------------------------===//
578 // TargetLowering Implementation
579 //===----------------------------------------------------------------------===//
580
LowerCall(TargetLowering::CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const581 SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
582 SmallVectorImpl<SDValue> &InVals) const {
583 SelectionDAG &DAG = CLI.DAG;
584 SDLoc DL = CLI.DL;
585 SDValue Chain = CLI.Chain;
586 auto PtrVT = getPointerTy(DAG.getDataLayout());
587
588 // VE target does not yet support tail call optimization.
589 CLI.IsTailCall = false;
590
591 // Get the base offset of the outgoing arguments stack space.
592 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
593 // Get the size of the preserved arguments area
594 unsigned ArgsPreserved = 8 * 8u;
595
596 // Analyze operands of the call, assigning locations to each operand.
597 SmallVector<CCValAssign, 16> ArgLocs;
598 CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
599 *DAG.getContext());
600 // Allocate the preserved area first.
601 CCInfo.AllocateStack(ArgsPreserved, Align(8));
602 // We already allocated the preserved area, so the stack offset computed
603 // by CC_VE would be correct now.
604 CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
605
606 // VE requires to use both register and stack for varargs or no-prototyped
607 // functions.
608 bool UseBoth = CLI.IsVarArg;
609
610 // Analyze operands again if it is required to store BOTH.
611 SmallVector<CCValAssign, 16> ArgLocs2;
612 CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
613 ArgLocs2, *DAG.getContext());
614 if (UseBoth)
615 CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
616
617 // Get the size of the outgoing arguments stack space requirement.
618 unsigned ArgsSize = CCInfo.getNextStackOffset();
619
620 // Keep stack frames 16-byte aligned.
621 ArgsSize = alignTo(ArgsSize, 16);
622
623 // Adjust the stack pointer to make room for the arguments.
624 // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
625 // with more than 6 arguments.
626 Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
627
628 // Collect the set of registers to pass to the function and their values.
629 // This will be emitted as a sequence of CopyToReg nodes glued to the call
630 // instruction.
631 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
632
633 // Collect chains from all the memory opeations that copy arguments to the
634 // stack. They must follow the stack pointer adjustment above and precede the
635 // call instruction itself.
636 SmallVector<SDValue, 8> MemOpChains;
637
638 // VE needs to get address of callee function in a register
639 // So, prepare to copy it to SX12 here.
640
641 // If the callee is a GlobalAddress node (quite common, every direct call is)
642 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
643 // Likewise ExternalSymbol -> TargetExternalSymbol.
644 SDValue Callee = CLI.Callee;
645
646 bool IsPICCall = isPositionIndependent();
647
648 // PC-relative references to external symbols should go through $stub.
649 // If so, we need to prepare GlobalBaseReg first.
650 const TargetMachine &TM = DAG.getTarget();
651 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
652 const GlobalValue *GV = nullptr;
653 auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
654 if (CalleeG)
655 GV = CalleeG->getGlobal();
656 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
657 bool UsePlt = !Local;
658 MachineFunction &MF = DAG.getMachineFunction();
659
660 // Turn GlobalAddress/ExternalSymbol node into a value node
661 // containing the address of them here.
662 if (CalleeG) {
663 if (IsPICCall) {
664 if (UsePlt)
665 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
666 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
667 Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
668 } else {
669 Callee =
670 makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
671 }
672 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
673 if (IsPICCall) {
674 if (UsePlt)
675 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
676 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
677 Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
678 } else {
679 Callee =
680 makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
681 }
682 }
683
684 RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
685
686 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
687 CCValAssign &VA = ArgLocs[i];
688 SDValue Arg = CLI.OutVals[i];
689
690 // Promote the value if needed.
691 switch (VA.getLocInfo()) {
692 default:
693 llvm_unreachable("Unknown location info!");
694 case CCValAssign::Full:
695 break;
696 case CCValAssign::SExt:
697 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
698 break;
699 case CCValAssign::ZExt:
700 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
701 break;
702 case CCValAssign::AExt:
703 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
704 break;
705 case CCValAssign::BCvt: {
706 // Convert a float argument to i64 with padding.
707 // 63 31 0
708 // +------+------+
709 // | float| 0 |
710 // +------+------+
711 assert(VA.getLocVT() == MVT::i64);
712 assert(VA.getValVT() == MVT::f32);
713 SDValue Undef = SDValue(
714 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
715 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
716 Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
717 MVT::i64, Undef, Arg, Sub_f32),
718 0);
719 break;
720 }
721 }
722
723 if (VA.isRegLoc()) {
724 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
725 if (!UseBoth)
726 continue;
727 VA = ArgLocs2[i];
728 }
729
730 assert(VA.isMemLoc());
731
732 // Create a store off the stack pointer for this argument.
733 SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
734 // The argument area starts at %fp/%sp + the size of reserved area.
735 SDValue PtrOff =
736 DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
737 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
738 MemOpChains.push_back(
739 DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
740 }
741
742 // Emit all stores, make sure they occur before the call.
743 if (!MemOpChains.empty())
744 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
745
746 // Build a sequence of CopyToReg nodes glued together with token chain and
747 // glue operands which copy the outgoing args into registers. The InGlue is
748 // necessary since all emitted instructions must be stuck together in order
749 // to pass the live physical registers.
750 SDValue InGlue;
751 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
752 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
753 RegsToPass[i].second, InGlue);
754 InGlue = Chain.getValue(1);
755 }
756
757 // Build the operands for the call instruction itself.
758 SmallVector<SDValue, 8> Ops;
759 Ops.push_back(Chain);
760 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
761 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
762 RegsToPass[i].second.getValueType()));
763
764 // Add a register mask operand representing the call-preserved registers.
765 const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
766 const uint32_t *Mask =
767 TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
768 assert(Mask && "Missing call preserved mask for calling convention");
769 Ops.push_back(DAG.getRegisterMask(Mask));
770
771 // Make sure the CopyToReg nodes are glued to the call instruction which
772 // consumes the registers.
773 if (InGlue.getNode())
774 Ops.push_back(InGlue);
775
776 // Now the call itself.
777 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
778 Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
779 InGlue = Chain.getValue(1);
780
781 // Revert the stack pointer immediately after the call.
782 Chain = DAG.getCALLSEQ_END(Chain, ArgsSize, 0, InGlue, DL);
783 InGlue = Chain.getValue(1);
784
785 // Now extract the return values. This is more or less the same as
786 // LowerFormalArguments.
787
788 // Assign locations to each value returned by this call.
789 SmallVector<CCValAssign, 16> RVLocs;
790 CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
791 *DAG.getContext());
792
793 // Set inreg flag manually for codegen generated library calls that
794 // return float.
795 if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
796 CLI.Ins[0].Flags.setInReg();
797
798 RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
799
800 // Copy all of the result registers out of their specified physreg.
801 for (unsigned i = 0; i != RVLocs.size(); ++i) {
802 CCValAssign &VA = RVLocs[i];
803 assert(!VA.needsCustom() && "Unexpected custom lowering");
804 Register Reg = VA.getLocReg();
805
806 // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
807 // reside in the same register in the high and low bits. Reuse the
808 // CopyFromReg previous node to avoid duplicate copies.
809 SDValue RV;
810 if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
811 if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
812 RV = Chain.getValue(0);
813
814 // But usually we'll create a new CopyFromReg for a different register.
815 if (!RV.getNode()) {
816 RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
817 Chain = RV.getValue(1);
818 InGlue = Chain.getValue(2);
819 }
820
821 // The callee promoted the return value, so insert an Assert?ext SDNode so
822 // we won't promote the value again in this function.
823 switch (VA.getLocInfo()) {
824 case CCValAssign::SExt:
825 RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
826 DAG.getValueType(VA.getValVT()));
827 break;
828 case CCValAssign::ZExt:
829 RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
830 DAG.getValueType(VA.getValVT()));
831 break;
832 case CCValAssign::BCvt: {
833 // Extract a float return value from i64 with padding.
834 // 63 31 0
835 // +------+------+
836 // | float| 0 |
837 // +------+------+
838 assert(VA.getLocVT() == MVT::i64);
839 assert(VA.getValVT() == MVT::f32);
840 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
841 RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
842 MVT::f32, RV, Sub_f32),
843 0);
844 break;
845 }
846 default:
847 break;
848 }
849
850 // Truncate the register down to the return value type.
851 if (VA.isExtInLoc())
852 RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
853
854 InVals.push_back(RV);
855 }
856
857 return Chain;
858 }
859
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const860 bool VETargetLowering::isOffsetFoldingLegal(
861 const GlobalAddressSDNode *GA) const {
862 // VE uses 64 bit addressing, so we need multiple instructions to generate
863 // an address. Folding address with offset increases the number of
864 // instructions, so that we disable it here. Offsets will be folded in
865 // the DAG combine later if it worth to do so.
866 return false;
867 }
868
869 /// isFPImmLegal - Returns true if the target can instruction select the
870 /// specified FP immediate natively. If false, the legalizer will
871 /// materialize the FP immediate as a load from a constant pool.
isFPImmLegal(const APFloat & Imm,EVT VT,bool ForCodeSize) const872 bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
873 bool ForCodeSize) const {
874 return VT == MVT::f32 || VT == MVT::f64;
875 }
876
877 /// Determine if the target supports unaligned memory accesses.
878 ///
879 /// This function returns true if the target allows unaligned memory accesses
880 /// of the specified type in the given address space. If true, it also returns
881 /// whether the unaligned memory access is "fast" in the last argument by
882 /// reference. This is used, for example, in situations where an array
883 /// copy/move/set is converted to a sequence of store operations. Its use
884 /// helps to ensure that such replacements don't generate code that causes an
885 /// alignment error (trap) on the target machine.
allowsMisalignedMemoryAccesses(EVT VT,unsigned AddrSpace,Align A,MachineMemOperand::Flags,unsigned * Fast) const886 bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
887 unsigned AddrSpace,
888 Align A,
889 MachineMemOperand::Flags,
890 unsigned *Fast) const {
891 if (Fast) {
892 // It's fast anytime on VE
893 *Fast = 1;
894 }
895 return true;
896 }
897
VETargetLowering(const TargetMachine & TM,const VESubtarget & STI)898 VETargetLowering::VETargetLowering(const TargetMachine &TM,
899 const VESubtarget &STI)
900 : TargetLowering(TM), Subtarget(&STI) {
901 // Instructions which use registers as conditionals examine all the
902 // bits (as does the pseudo SELECT_CC expansion). I don't think it
903 // matters much whether it's ZeroOrOneBooleanContent, or
904 // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
905 // former.
906 setBooleanContents(ZeroOrOneBooleanContent);
907 setBooleanVectorContents(ZeroOrOneBooleanContent);
908
909 initRegisterClasses();
910 initSPUActions();
911 initVPUActions();
912
913 setStackPointerRegisterToSaveRestore(VE::SX11);
914
915 // We have target-specific dag combine patterns for the following nodes:
916 setTargetDAGCombine(ISD::TRUNCATE);
917 setTargetDAGCombine(ISD::SELECT);
918 setTargetDAGCombine(ISD::SELECT_CC);
919
920 // Set function alignment to 16 bytes
921 setMinFunctionAlignment(Align(16));
922
923 // VE stores all argument by 8 bytes alignment
924 setMinStackArgumentAlignment(Align(8));
925
926 computeRegisterProperties(Subtarget->getRegisterInfo());
927 }
928
getTargetNodeName(unsigned Opcode) const929 const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
930 #define TARGET_NODE_CASE(NAME) \
931 case VEISD::NAME: \
932 return "VEISD::" #NAME;
933 switch ((VEISD::NodeType)Opcode) {
934 case VEISD::FIRST_NUMBER:
935 break;
936 TARGET_NODE_CASE(CMPI)
937 TARGET_NODE_CASE(CMPU)
938 TARGET_NODE_CASE(CMPF)
939 TARGET_NODE_CASE(CMPQ)
940 TARGET_NODE_CASE(CMOV)
941 TARGET_NODE_CASE(CALL)
942 TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
943 TARGET_NODE_CASE(EH_SJLJ_SETJMP)
944 TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
945 TARGET_NODE_CASE(GETFUNPLT)
946 TARGET_NODE_CASE(GETSTACKTOP)
947 TARGET_NODE_CASE(GETTLSADDR)
948 TARGET_NODE_CASE(GLOBAL_BASE_REG)
949 TARGET_NODE_CASE(Hi)
950 TARGET_NODE_CASE(Lo)
951 TARGET_NODE_CASE(RET_FLAG)
952 TARGET_NODE_CASE(TS1AM)
953 TARGET_NODE_CASE(VEC_UNPACK_LO)
954 TARGET_NODE_CASE(VEC_UNPACK_HI)
955 TARGET_NODE_CASE(VEC_PACK)
956 TARGET_NODE_CASE(VEC_BROADCAST)
957 TARGET_NODE_CASE(REPL_I32)
958 TARGET_NODE_CASE(REPL_F32)
959
960 TARGET_NODE_CASE(LEGALAVL)
961
962 // Register the VVP_* SDNodes.
963 #define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
964 #include "VVPNodes.def"
965 }
966 #undef TARGET_NODE_CASE
967 return nullptr;
968 }
969
getSetCCResultType(const DataLayout &,LLVMContext &,EVT VT) const970 EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
971 EVT VT) const {
972 return MVT::i32;
973 }
974
975 // Convert to a target node and set target flags.
withTargetFlags(SDValue Op,unsigned TF,SelectionDAG & DAG) const976 SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
977 SelectionDAG &DAG) const {
978 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
979 return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
980 GA->getValueType(0), GA->getOffset(), TF);
981
982 if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
983 return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
984 0, TF);
985
986 if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
987 return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
988 CP->getAlign(), CP->getOffset(), TF);
989
990 if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
991 return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
992 TF);
993
994 if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
995 return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
996
997 llvm_unreachable("Unhandled address SDNode");
998 }
999
1000 // Split Op into high and low parts according to HiTF and LoTF.
1001 // Return an ADD node combining the parts.
makeHiLoPair(SDValue Op,unsigned HiTF,unsigned LoTF,SelectionDAG & DAG) const1002 SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
1003 SelectionDAG &DAG) const {
1004 SDLoc DL(Op);
1005 EVT VT = Op.getValueType();
1006 SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
1007 SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
1008 return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
1009 }
1010
1011 // Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
1012 // or ExternalSymbol SDNode.
makeAddress(SDValue Op,SelectionDAG & DAG) const1013 SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
1014 SDLoc DL(Op);
1015 EVT PtrVT = Op.getValueType();
1016
1017 // Handle PIC mode first. VE needs a got load for every variable!
1018 if (isPositionIndependent()) {
1019 auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
1020
1021 if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
1022 (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
1023 // Create following instructions for local linkage PIC code.
1024 // lea %reg, label@gotoff_lo
1025 // and %reg, %reg, (32)0
1026 // lea.sl %reg, label@gotoff_hi(%reg, %got)
1027 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
1028 VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1029 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1030 return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1031 }
1032 // Create following instructions for not local linkage PIC code.
1033 // lea %reg, label@got_lo
1034 // and %reg, %reg, (32)0
1035 // lea.sl %reg, label@got_hi(%reg)
1036 // ld %reg, (%reg, %got)
1037 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
1038 VEMCExpr::VK_VE_GOT_LO32, DAG);
1039 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1040 SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1041 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
1042 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
1043 }
1044
1045 // This is one of the absolute code models.
1046 switch (getTargetMachine().getCodeModel()) {
1047 default:
1048 llvm_unreachable("Unsupported absolute code model");
1049 case CodeModel::Small:
1050 case CodeModel::Medium:
1051 case CodeModel::Large:
1052 // abs64.
1053 return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1054 }
1055 }
1056
1057 /// Custom Lower {
1058
1059 // The mappings for emitLeading/TrailingFence for VE is designed by following
1060 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
emitLeadingFence(IRBuilderBase & Builder,Instruction * Inst,AtomicOrdering Ord) const1061 Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1062 Instruction *Inst,
1063 AtomicOrdering Ord) const {
1064 switch (Ord) {
1065 case AtomicOrdering::NotAtomic:
1066 case AtomicOrdering::Unordered:
1067 llvm_unreachable("Invalid fence: unordered/non-atomic");
1068 case AtomicOrdering::Monotonic:
1069 case AtomicOrdering::Acquire:
1070 return nullptr; // Nothing to do
1071 case AtomicOrdering::Release:
1072 case AtomicOrdering::AcquireRelease:
1073 return Builder.CreateFence(AtomicOrdering::Release);
1074 case AtomicOrdering::SequentiallyConsistent:
1075 if (!Inst->hasAtomicStore())
1076 return nullptr; // Nothing to do
1077 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1078 }
1079 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1080 }
1081
emitTrailingFence(IRBuilderBase & Builder,Instruction * Inst,AtomicOrdering Ord) const1082 Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1083 Instruction *Inst,
1084 AtomicOrdering Ord) const {
1085 switch (Ord) {
1086 case AtomicOrdering::NotAtomic:
1087 case AtomicOrdering::Unordered:
1088 llvm_unreachable("Invalid fence: unordered/not-atomic");
1089 case AtomicOrdering::Monotonic:
1090 case AtomicOrdering::Release:
1091 return nullptr; // Nothing to do
1092 case AtomicOrdering::Acquire:
1093 case AtomicOrdering::AcquireRelease:
1094 return Builder.CreateFence(AtomicOrdering::Acquire);
1095 case AtomicOrdering::SequentiallyConsistent:
1096 return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1097 }
1098 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1099 }
1100
lowerATOMIC_FENCE(SDValue Op,SelectionDAG & DAG) const1101 SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1102 SelectionDAG &DAG) const {
1103 SDLoc DL(Op);
1104 AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
1105 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
1106 SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
1107 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
1108
1109 // VE uses Release consistency, so need a fence instruction if it is a
1110 // cross-thread fence.
1111 if (FenceSSID == SyncScope::System) {
1112 switch (FenceOrdering) {
1113 case AtomicOrdering::NotAtomic:
1114 case AtomicOrdering::Unordered:
1115 case AtomicOrdering::Monotonic:
1116 // No need to generate fencem instruction here.
1117 break;
1118 case AtomicOrdering::Acquire:
1119 // Generate "fencem 2" as acquire fence.
1120 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1121 DAG.getTargetConstant(2, DL, MVT::i32),
1122 Op.getOperand(0)),
1123 0);
1124 case AtomicOrdering::Release:
1125 // Generate "fencem 1" as release fence.
1126 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1127 DAG.getTargetConstant(1, DL, MVT::i32),
1128 Op.getOperand(0)),
1129 0);
1130 case AtomicOrdering::AcquireRelease:
1131 case AtomicOrdering::SequentiallyConsistent:
1132 // Generate "fencem 3" as acq_rel and seq_cst fence.
1133 // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
1134 // so seq_cst may require more instruction for them.
1135 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1136 DAG.getTargetConstant(3, DL, MVT::i32),
1137 Op.getOperand(0)),
1138 0);
1139 }
1140 }
1141
1142 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1143 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1144 }
1145
1146 TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * AI) const1147 VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1148 // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1149 if (AI->getOperation() == AtomicRMWInst::Xchg) {
1150 return AtomicExpansionKind::None;
1151 }
1152 // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1153
1154 // Otherwise, expand it using compare and exchange instruction to not call
1155 // __sync_fetch_and_* functions.
1156 return AtomicExpansionKind::CmpXChg;
1157 }
1158
prepareTS1AM(SDValue Op,SelectionDAG & DAG,SDValue & Flag,SDValue & Bits)1159 static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1160 SDValue &Bits) {
1161 SDLoc DL(Op);
1162 AtomicSDNode *N = cast<AtomicSDNode>(Op);
1163 SDValue Ptr = N->getOperand(1);
1164 SDValue Val = N->getOperand(2);
1165 EVT PtrVT = Ptr.getValueType();
1166 bool Byte = N->getMemoryVT() == MVT::i8;
1167 // Remainder = AND Ptr, 3
1168 // Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1169 // Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1170 // Bits = Remainder << 3
1171 // NewVal = Val << Bits
1172 SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
1173 SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
1174 SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
1175 : DAG.getConstant(3, DL, MVT::i32);
1176 Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
1177 Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
1178 return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
1179 }
1180
finalizeTS1AM(SDValue Op,SelectionDAG & DAG,SDValue Data,SDValue Bits)1181 static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1182 SDValue Bits) {
1183 SDLoc DL(Op);
1184 EVT VT = Data.getValueType();
1185 bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
1186 // NewData = Data >> Bits
1187 // Result = NewData & 0xff ; If Byte is true (1 byte)
1188 // Result = NewData & 0xffff ; If Byte is false (2 bytes)
1189
1190 SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
1191 return DAG.getNode(ISD::AND, DL, VT,
1192 {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
1193 }
1194
lowerATOMIC_SWAP(SDValue Op,SelectionDAG & DAG) const1195 SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1196 SelectionDAG &DAG) const {
1197 SDLoc DL(Op);
1198 AtomicSDNode *N = cast<AtomicSDNode>(Op);
1199
1200 if (N->getMemoryVT() == MVT::i8) {
1201 // For i8, use "ts1am"
1202 // Input:
1203 // ATOMIC_SWAP Ptr, Val, Order
1204 //
1205 // Output:
1206 // Remainder = AND Ptr, 3
1207 // Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1208 // Bits = Remainder << 3
1209 // NewVal = Val << Bits
1210 //
1211 // Aligned = AND Ptr, -4
1212 // Data = TS1AM Aligned, Flag, NewVal
1213 //
1214 // NewData = Data >> Bits
1215 // Result = NewData & 0xff ; 1 byte result
1216 SDValue Flag;
1217 SDValue Bits;
1218 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1219
1220 SDValue Ptr = N->getOperand(1);
1221 SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1222 {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1223 SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1224 DAG.getVTList(Op.getNode()->getValueType(0),
1225 Op.getNode()->getValueType(1)),
1226 {N->getChain(), Aligned, Flag, NewVal},
1227 N->getMemOperand());
1228
1229 SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1230 SDValue Chain = TS1AM.getValue(1);
1231 return DAG.getMergeValues({Result, Chain}, DL);
1232 }
1233 if (N->getMemoryVT() == MVT::i16) {
1234 // For i16, use "ts1am"
1235 SDValue Flag;
1236 SDValue Bits;
1237 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1238
1239 SDValue Ptr = N->getOperand(1);
1240 SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1241 {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1242 SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1243 DAG.getVTList(Op.getNode()->getValueType(0),
1244 Op.getNode()->getValueType(1)),
1245 {N->getChain(), Aligned, Flag, NewVal},
1246 N->getMemOperand());
1247
1248 SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1249 SDValue Chain = TS1AM.getValue(1);
1250 return DAG.getMergeValues({Result, Chain}, DL);
1251 }
1252 // Otherwise, let llvm legalize it.
1253 return Op;
1254 }
1255
lowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const1256 SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1257 SelectionDAG &DAG) const {
1258 return makeAddress(Op, DAG);
1259 }
1260
lowerBlockAddress(SDValue Op,SelectionDAG & DAG) const1261 SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1262 SelectionDAG &DAG) const {
1263 return makeAddress(Op, DAG);
1264 }
1265
lowerConstantPool(SDValue Op,SelectionDAG & DAG) const1266 SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1267 SelectionDAG &DAG) const {
1268 return makeAddress(Op, DAG);
1269 }
1270
1271 SDValue
lowerToTLSGeneralDynamicModel(SDValue Op,SelectionDAG & DAG) const1272 VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1273 SelectionDAG &DAG) const {
1274 SDLoc DL(Op);
1275
1276 // Generate the following code:
1277 // t1: ch,glue = callseq_start t0, 0, 0
1278 // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1279 // t3: ch,glue = callseq_end t2, 0, 0, t2:2
1280 // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1281 SDValue Label = withTargetFlags(Op, 0, DAG);
1282 EVT PtrVT = Op.getValueType();
1283
1284 // Lowering the machine isd will make sure everything is in the right
1285 // location.
1286 SDValue Chain = DAG.getEntryNode();
1287 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1288 const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1289 DAG.getMachineFunction(), CallingConv::C);
1290 Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
1291 SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
1292 Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
1293 Chain = DAG.getCALLSEQ_END(Chain, 64, 0, Chain.getValue(1), DL);
1294 Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
1295
1296 // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1297 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1298 MFI.setHasCalls(true);
1299
1300 // Also generate code to prepare a GOT register if it is PIC.
1301 if (isPositionIndependent()) {
1302 MachineFunction &MF = DAG.getMachineFunction();
1303 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
1304 }
1305
1306 return Chain;
1307 }
1308
lowerGlobalTLSAddress(SDValue Op,SelectionDAG & DAG) const1309 SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1310 SelectionDAG &DAG) const {
1311 // The current implementation of nld (2.26) doesn't allow local exec model
1312 // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1313 // generate the general dynamic model code sequence.
1314 //
1315 // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1316 return lowerToTLSGeneralDynamicModel(Op, DAG);
1317 }
1318
lowerJumpTable(SDValue Op,SelectionDAG & DAG) const1319 SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1320 return makeAddress(Op, DAG);
1321 }
1322
1323 // Lower a f128 load into two f64 loads.
lowerLoadF128(SDValue Op,SelectionDAG & DAG)1324 static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1325 SDLoc DL(Op);
1326 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1327 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1328 Align Alignment = LdNode->getAlign();
1329 if (Alignment > 8)
1330 Alignment = Align(8);
1331
1332 SDValue Lo64 =
1333 DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
1334 LdNode->getPointerInfo(), Alignment,
1335 LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1336 : MachineMemOperand::MONone);
1337 EVT AddrVT = LdNode->getBasePtr().getValueType();
1338 SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
1339 DAG.getConstant(8, DL, AddrVT));
1340 SDValue Hi64 =
1341 DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
1342 LdNode->getPointerInfo(), Alignment,
1343 LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1344 : MachineMemOperand::MONone);
1345
1346 SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1347 SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1348
1349 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1350 SDNode *InFP128 =
1351 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
1352 InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1353 SDValue(InFP128, 0), Hi64, SubRegEven);
1354 InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1355 SDValue(InFP128, 0), Lo64, SubRegOdd);
1356 SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1357 SDValue(Hi64.getNode(), 1)};
1358 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1359 SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1360 return DAG.getMergeValues(Ops, DL);
1361 }
1362
1363 // Lower a vXi1 load into following instructions
1364 // LDrii %1, (,%addr)
1365 // LVMxir %vm, 0, %1
1366 // LDrii %2, 8(,%addr)
1367 // LVMxir %vm, 0, %2
1368 // ...
lowerLoadI1(SDValue Op,SelectionDAG & DAG)1369 static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1370 SDLoc DL(Op);
1371 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1372 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1373
1374 SDValue BasePtr = LdNode->getBasePtr();
1375 Align Alignment = LdNode->getAlign();
1376 if (Alignment > 8)
1377 Alignment = Align(8);
1378
1379 EVT AddrVT = BasePtr.getValueType();
1380 EVT MemVT = LdNode->getMemoryVT();
1381 if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1382 SDValue OutChains[4];
1383 SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1384 for (int i = 0; i < 4; ++i) {
1385 // Generate load dag and prepare chains.
1386 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1387 DAG.getConstant(8 * i, DL, AddrVT));
1388 SDValue Val =
1389 DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1390 LdNode->getPointerInfo(), Alignment,
1391 LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1392 : MachineMemOperand::MONone);
1393 OutChains[i] = SDValue(Val.getNode(), 1);
1394
1395 VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64,
1396 DAG.getTargetConstant(i, DL, MVT::i64), Val,
1397 SDValue(VM, 0));
1398 }
1399 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1400 SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1401 return DAG.getMergeValues(Ops, DL);
1402 } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1403 SDValue OutChains[8];
1404 SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1405 for (int i = 0; i < 8; ++i) {
1406 // Generate load dag and prepare chains.
1407 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1408 DAG.getConstant(8 * i, DL, AddrVT));
1409 SDValue Val =
1410 DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1411 LdNode->getPointerInfo(), Alignment,
1412 LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1413 : MachineMemOperand::MONone);
1414 OutChains[i] = SDValue(Val.getNode(), 1);
1415
1416 VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64,
1417 DAG.getTargetConstant(i, DL, MVT::i64), Val,
1418 SDValue(VM, 0));
1419 }
1420 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1421 SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1422 return DAG.getMergeValues(Ops, DL);
1423 } else {
1424 // Otherwise, ask llvm to expand it.
1425 return SDValue();
1426 }
1427 }
1428
lowerLOAD(SDValue Op,SelectionDAG & DAG) const1429 SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1430 LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
1431
1432 EVT MemVT = LdNode->getMemoryVT();
1433
1434 // Dispatch to vector isel.
1435 if (MemVT.isVector() && !isMaskType(MemVT))
1436 return lowerToVVP(Op, DAG);
1437
1438 SDValue BasePtr = LdNode->getBasePtr();
1439 if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1440 // Do not expand store instruction with frame index here because of
1441 // dependency problems. We expand it later in eliminateFrameIndex().
1442 return Op;
1443 }
1444
1445 if (MemVT == MVT::f128)
1446 return lowerLoadF128(Op, DAG);
1447 if (isMaskType(MemVT))
1448 return lowerLoadI1(Op, DAG);
1449
1450 return Op;
1451 }
1452
1453 // Lower a f128 store into two f64 stores.
lowerStoreF128(SDValue Op,SelectionDAG & DAG)1454 static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1455 SDLoc DL(Op);
1456 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1457 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1458
1459 SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1460 SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1461
1462 SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1463 StNode->getValue(), SubRegEven);
1464 SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1465 StNode->getValue(), SubRegOdd);
1466
1467 Align Alignment = StNode->getAlign();
1468 if (Alignment > 8)
1469 Alignment = Align(8);
1470
1471 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1472 SDValue OutChains[2];
1473 OutChains[0] =
1474 DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
1475 StNode->getBasePtr(), MachinePointerInfo(), Alignment,
1476 StNode->isVolatile() ? MachineMemOperand::MOVolatile
1477 : MachineMemOperand::MONone);
1478 EVT AddrVT = StNode->getBasePtr().getValueType();
1479 SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
1480 DAG.getConstant(8, DL, AddrVT));
1481 OutChains[1] =
1482 DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
1483 MachinePointerInfo(), Alignment,
1484 StNode->isVolatile() ? MachineMemOperand::MOVolatile
1485 : MachineMemOperand::MONone);
1486 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1487 }
1488
1489 // Lower a vXi1 store into following instructions
1490 // SVMi %1, %vm, 0
1491 // STrii %1, (,%addr)
1492 // SVMi %2, %vm, 1
1493 // STrii %2, 8(,%addr)
1494 // ...
lowerStoreI1(SDValue Op,SelectionDAG & DAG)1495 static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1496 SDLoc DL(Op);
1497 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1498 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1499
1500 SDValue BasePtr = StNode->getBasePtr();
1501 Align Alignment = StNode->getAlign();
1502 if (Alignment > 8)
1503 Alignment = Align(8);
1504 EVT AddrVT = BasePtr.getValueType();
1505 EVT MemVT = StNode->getMemoryVT();
1506 if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1507 SDValue OutChains[4];
1508 for (int i = 0; i < 4; ++i) {
1509 SDNode *V =
1510 DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(),
1511 DAG.getTargetConstant(i, DL, MVT::i64));
1512 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1513 DAG.getConstant(8 * i, DL, AddrVT));
1514 OutChains[i] =
1515 DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1516 MachinePointerInfo(), Alignment,
1517 StNode->isVolatile() ? MachineMemOperand::MOVolatile
1518 : MachineMemOperand::MONone);
1519 }
1520 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1521 } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1522 SDValue OutChains[8];
1523 for (int i = 0; i < 8; ++i) {
1524 SDNode *V =
1525 DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(),
1526 DAG.getTargetConstant(i, DL, MVT::i64));
1527 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1528 DAG.getConstant(8 * i, DL, AddrVT));
1529 OutChains[i] =
1530 DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1531 MachinePointerInfo(), Alignment,
1532 StNode->isVolatile() ? MachineMemOperand::MOVolatile
1533 : MachineMemOperand::MONone);
1534 }
1535 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1536 } else {
1537 // Otherwise, ask llvm to expand it.
1538 return SDValue();
1539 }
1540 }
1541
lowerSTORE(SDValue Op,SelectionDAG & DAG) const1542 SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1543 StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
1544 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1545
1546 // always expand non-mask vector loads to VVP
1547 EVT MemVT = StNode->getMemoryVT();
1548 if (MemVT.isVector() && !isMaskType(MemVT))
1549 return lowerToVVP(Op, DAG);
1550
1551 SDValue BasePtr = StNode->getBasePtr();
1552 if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1553 // Do not expand store instruction with frame index here because of
1554 // dependency problems. We expand it later in eliminateFrameIndex().
1555 return Op;
1556 }
1557
1558 if (MemVT == MVT::f128)
1559 return lowerStoreF128(Op, DAG);
1560 if (isMaskType(MemVT))
1561 return lowerStoreI1(Op, DAG);
1562
1563 // Otherwise, ask llvm to expand it.
1564 return SDValue();
1565 }
1566
lowerVASTART(SDValue Op,SelectionDAG & DAG) const1567 SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1568 MachineFunction &MF = DAG.getMachineFunction();
1569 VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1570 auto PtrVT = getPointerTy(DAG.getDataLayout());
1571
1572 // Need frame address to find the address of VarArgsFrameIndex.
1573 MF.getFrameInfo().setFrameAddressIsTaken(true);
1574
1575 // vastart just stores the address of the VarArgsFrameIndex slot into the
1576 // memory location argument.
1577 SDLoc DL(Op);
1578 SDValue Offset =
1579 DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
1580 DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
1581 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1582 return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
1583 MachinePointerInfo(SV));
1584 }
1585
lowerVAARG(SDValue Op,SelectionDAG & DAG) const1586 SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1587 SDNode *Node = Op.getNode();
1588 EVT VT = Node->getValueType(0);
1589 SDValue InChain = Node->getOperand(0);
1590 SDValue VAListPtr = Node->getOperand(1);
1591 EVT PtrVT = VAListPtr.getValueType();
1592 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1593 SDLoc DL(Node);
1594 SDValue VAList =
1595 DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
1596 SDValue Chain = VAList.getValue(1);
1597 SDValue NextPtr;
1598
1599 if (VT == MVT::f128) {
1600 // VE f128 values must be stored with 16 bytes alignment. We don't
1601 // know the actual alignment of VAList, so we take alignment of it
1602 // dynamically.
1603 int Align = 16;
1604 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1605 DAG.getConstant(Align - 1, DL, PtrVT));
1606 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
1607 DAG.getConstant(-Align, DL, PtrVT));
1608 // Increment the pointer, VAList, by 16 to the next vaarg.
1609 NextPtr =
1610 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
1611 } else if (VT == MVT::f32) {
1612 // float --> need special handling like below.
1613 // 0 4
1614 // +------+------+
1615 // | empty| float|
1616 // +------+------+
1617 // Increment the pointer, VAList, by 8 to the next vaarg.
1618 NextPtr =
1619 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1620 // Then, adjust VAList.
1621 unsigned InternalOffset = 4;
1622 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1623 DAG.getConstant(InternalOffset, DL, PtrVT));
1624 } else {
1625 // Increment the pointer, VAList, by 8 to the next vaarg.
1626 NextPtr =
1627 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1628 }
1629
1630 // Store the incremented VAList to the legalized pointer.
1631 InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
1632
1633 // Load the actual argument out of the pointer VAList.
1634 // We can't count on greater alignment than the word size.
1635 return DAG.getLoad(
1636 VT, DL, InChain, VAList, MachinePointerInfo(),
1637 Align(std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8));
1638 }
1639
lowerDYNAMIC_STACKALLOC(SDValue Op,SelectionDAG & DAG) const1640 SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1641 SelectionDAG &DAG) const {
1642 // Generate following code.
1643 // (void)__llvm_grow_stack(size);
1644 // ret = GETSTACKTOP; // pseudo instruction
1645 SDLoc DL(Op);
1646
1647 // Get the inputs.
1648 SDNode *Node = Op.getNode();
1649 SDValue Chain = Op.getOperand(0);
1650 SDValue Size = Op.getOperand(1);
1651 MaybeAlign Alignment(Op.getConstantOperandVal(2));
1652 EVT VT = Node->getValueType(0);
1653
1654 // Chain the dynamic stack allocation so that it doesn't modify the stack
1655 // pointer when other instructions are using the stack.
1656 Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
1657
1658 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1659 Align StackAlign = TFI.getStackAlign();
1660 bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1661
1662 // Prepare arguments
1663 TargetLowering::ArgListTy Args;
1664 TargetLowering::ArgListEntry Entry;
1665 Entry.Node = Size;
1666 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1667 Args.push_back(Entry);
1668 if (NeedsAlign) {
1669 Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
1670 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1671 Args.push_back(Entry);
1672 }
1673 Type *RetTy = Type::getVoidTy(*DAG.getContext());
1674
1675 EVT PtrVT = Op.getValueType();
1676 SDValue Callee;
1677 if (NeedsAlign) {
1678 Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
1679 } else {
1680 Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
1681 }
1682
1683 TargetLowering::CallLoweringInfo CLI(DAG);
1684 CLI.setDebugLoc(DL)
1685 .setChain(Chain)
1686 .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
1687 .setDiscardResult(true);
1688 std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1689 Chain = pair.second;
1690 SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
1691 if (NeedsAlign) {
1692 Result = DAG.getNode(ISD::ADD, DL, VT, Result,
1693 DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
1694 Result = DAG.getNode(ISD::AND, DL, VT, Result,
1695 DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
1696 }
1697 // Chain = Result.getValue(1);
1698 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), DL);
1699
1700 SDValue Ops[2] = {Result, Chain};
1701 return DAG.getMergeValues(Ops, DL);
1702 }
1703
lowerEH_SJLJ_LONGJMP(SDValue Op,SelectionDAG & DAG) const1704 SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1705 SelectionDAG &DAG) const {
1706 SDLoc DL(Op);
1707 return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
1708 Op.getOperand(1));
1709 }
1710
lowerEH_SJLJ_SETJMP(SDValue Op,SelectionDAG & DAG) const1711 SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1712 SelectionDAG &DAG) const {
1713 SDLoc DL(Op);
1714 return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
1715 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
1716 Op.getOperand(1));
1717 }
1718
lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,SelectionDAG & DAG) const1719 SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1720 SelectionDAG &DAG) const {
1721 SDLoc DL(Op);
1722 return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
1723 Op.getOperand(0));
1724 }
1725
lowerFRAMEADDR(SDValue Op,SelectionDAG & DAG,const VETargetLowering & TLI,const VESubtarget * Subtarget)1726 static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1727 const VETargetLowering &TLI,
1728 const VESubtarget *Subtarget) {
1729 SDLoc DL(Op);
1730 MachineFunction &MF = DAG.getMachineFunction();
1731 EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
1732
1733 MachineFrameInfo &MFI = MF.getFrameInfo();
1734 MFI.setFrameAddressIsTaken(true);
1735
1736 unsigned Depth = Op.getConstantOperandVal(0);
1737 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1738 Register FrameReg = RegInfo->getFrameRegister(MF);
1739 SDValue FrameAddr =
1740 DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
1741 while (Depth--)
1742 FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
1743 FrameAddr, MachinePointerInfo());
1744 return FrameAddr;
1745 }
1746
lowerRETURNADDR(SDValue Op,SelectionDAG & DAG,const VETargetLowering & TLI,const VESubtarget * Subtarget)1747 static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1748 const VETargetLowering &TLI,
1749 const VESubtarget *Subtarget) {
1750 MachineFunction &MF = DAG.getMachineFunction();
1751 MachineFrameInfo &MFI = MF.getFrameInfo();
1752 MFI.setReturnAddressIsTaken(true);
1753
1754 if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1755 return SDValue();
1756
1757 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1758
1759 SDLoc DL(Op);
1760 EVT VT = Op.getValueType();
1761 SDValue Offset = DAG.getConstant(8, DL, VT);
1762 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1763 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1764 MachinePointerInfo());
1765 }
1766
lowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const1767 SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1768 SelectionDAG &DAG) const {
1769 SDLoc DL(Op);
1770 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1771 switch (IntNo) {
1772 default: // Don't custom lower most intrinsics.
1773 return SDValue();
1774 case Intrinsic::eh_sjlj_lsda: {
1775 MachineFunction &MF = DAG.getMachineFunction();
1776 MVT VT = Op.getSimpleValueType();
1777 const VETargetMachine *TM =
1778 static_cast<const VETargetMachine *>(&DAG.getTarget());
1779
1780 // Create GCC_except_tableXX string. The real symbol for that will be
1781 // generated in EHStreamer::emitExceptionTable() later. So, we just
1782 // borrow it's name here.
1783 TM->getStrList()->push_back(std::string(
1784 (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1785 SDValue Addr =
1786 DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
1787 if (isPositionIndependent()) {
1788 Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32,
1789 VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1790 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
1791 return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
1792 }
1793 return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1794 }
1795 }
1796 }
1797
getUniqueInsertion(SDNode * N,unsigned & UniqueIdx)1798 static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1799 if (!isa<BuildVectorSDNode>(N))
1800 return false;
1801 const auto *BVN = cast<BuildVectorSDNode>(N);
1802
1803 // Find first non-undef insertion.
1804 unsigned Idx;
1805 for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1806 auto ElemV = BVN->getOperand(Idx);
1807 if (!ElemV->isUndef())
1808 break;
1809 }
1810 // Catch the (hypothetical) all-undef case.
1811 if (Idx == BVN->getNumOperands())
1812 return false;
1813 // Remember insertion.
1814 UniqueIdx = Idx++;
1815 // Verify that all other insertions are undef.
1816 for (; Idx < BVN->getNumOperands(); ++Idx) {
1817 auto ElemV = BVN->getOperand(Idx);
1818 if (!ElemV->isUndef())
1819 return false;
1820 }
1821 return true;
1822 }
1823
getSplatValue(SDNode * N)1824 static SDValue getSplatValue(SDNode *N) {
1825 if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
1826 return BuildVec->getSplatValue();
1827 }
1828 return SDValue();
1829 }
1830
lowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const1831 SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1832 SelectionDAG &DAG) const {
1833 VECustomDAG CDAG(DAG, Op);
1834 MVT ResultVT = Op.getSimpleValueType();
1835
1836 // If there is just one element, expand to INSERT_VECTOR_ELT.
1837 unsigned UniqueIdx;
1838 if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
1839 SDValue AccuV = CDAG.getUNDEF(Op.getValueType());
1840 auto ElemV = Op->getOperand(UniqueIdx);
1841 SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);
1842 return CDAG.getNode(ISD::INSERT_VECTOR_ELT, ResultVT, {AccuV, ElemV, IdxV});
1843 }
1844
1845 // Else emit a broadcast.
1846 if (SDValue ScalarV = getSplatValue(Op.getNode())) {
1847 unsigned NumEls = ResultVT.getVectorNumElements();
1848 auto AVL = CDAG.getConstant(NumEls, MVT::i32);
1849 return CDAG.getBroadcast(ResultVT, ScalarV, AVL);
1850 }
1851
1852 // Expand
1853 return SDValue();
1854 }
1855
1856 TargetLowering::LegalizeAction
getCustomOperationAction(SDNode & Op) const1857 VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1858 // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
1859 // these operations (transform nodes such that their AVL parameter refers to
1860 // packs of 64bit, instead of number of elements.
1861
1862 // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1863 // re-visit them.
1864 if (isPackingSupportOpcode(Op.getOpcode()))
1865 return Legal;
1866
1867 // Custom lower to legalize AVL for packed mode.
1868 if (isVVPOrVEC(Op.getOpcode()))
1869 return Custom;
1870 return Legal;
1871 }
1872
LowerOperation(SDValue Op,SelectionDAG & DAG) const1873 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1874 LLVM_DEBUG(dbgs() << "::LowerOperation"; Op->print(dbgs()););
1875 unsigned Opcode = Op.getOpcode();
1876
1877 /// Scalar isel.
1878 switch (Opcode) {
1879 case ISD::ATOMIC_FENCE:
1880 return lowerATOMIC_FENCE(Op, DAG);
1881 case ISD::ATOMIC_SWAP:
1882 return lowerATOMIC_SWAP(Op, DAG);
1883 case ISD::BlockAddress:
1884 return lowerBlockAddress(Op, DAG);
1885 case ISD::ConstantPool:
1886 return lowerConstantPool(Op, DAG);
1887 case ISD::DYNAMIC_STACKALLOC:
1888 return lowerDYNAMIC_STACKALLOC(Op, DAG);
1889 case ISD::EH_SJLJ_LONGJMP:
1890 return lowerEH_SJLJ_LONGJMP(Op, DAG);
1891 case ISD::EH_SJLJ_SETJMP:
1892 return lowerEH_SJLJ_SETJMP(Op, DAG);
1893 case ISD::EH_SJLJ_SETUP_DISPATCH:
1894 return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1895 case ISD::FRAMEADDR:
1896 return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
1897 case ISD::GlobalAddress:
1898 return lowerGlobalAddress(Op, DAG);
1899 case ISD::GlobalTLSAddress:
1900 return lowerGlobalTLSAddress(Op, DAG);
1901 case ISD::INTRINSIC_WO_CHAIN:
1902 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1903 case ISD::JumpTable:
1904 return lowerJumpTable(Op, DAG);
1905 case ISD::LOAD:
1906 return lowerLOAD(Op, DAG);
1907 case ISD::RETURNADDR:
1908 return lowerRETURNADDR(Op, DAG, *this, Subtarget);
1909 case ISD::BUILD_VECTOR:
1910 return lowerBUILD_VECTOR(Op, DAG);
1911 case ISD::STORE:
1912 return lowerSTORE(Op, DAG);
1913 case ISD::VASTART:
1914 return lowerVASTART(Op, DAG);
1915 case ISD::VAARG:
1916 return lowerVAARG(Op, DAG);
1917
1918 case ISD::INSERT_VECTOR_ELT:
1919 return lowerINSERT_VECTOR_ELT(Op, DAG);
1920 case ISD::EXTRACT_VECTOR_ELT:
1921 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1922 }
1923
1924 /// Vector isel.
1925 LLVM_DEBUG(dbgs() << "::LowerOperation_VVP"; Op->print(dbgs()););
1926 if (ISD::isVPOpcode(Opcode))
1927 return lowerToVVP(Op, DAG);
1928
1929 switch (Opcode) {
1930 default:
1931 llvm_unreachable("Should not custom lower this!");
1932
1933 // Legalize the AVL of this internal node.
1934 case VEISD::VEC_BROADCAST:
1935 #define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1936 #include "VVPNodes.def"
1937 // AVL already legalized.
1938 if (getAnnotatedNodeAVL(Op).second)
1939 return Op;
1940 return legalizeInternalVectorOp(Op, DAG);
1941
1942 // Translate into a VEC_*/VVP_* layer operation.
1943 case ISD::MLOAD:
1944 case ISD::MSTORE:
1945 #define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1946 #include "VVPNodes.def"
1947 if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType()))
1948 return splitMaskArithmetic(Op, DAG);
1949 return lowerToVVP(Op, DAG);
1950 }
1951 }
1952 /// } Custom Lower
1953
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const1954 void VETargetLowering::ReplaceNodeResults(SDNode *N,
1955 SmallVectorImpl<SDValue> &Results,
1956 SelectionDAG &DAG) const {
1957 switch (N->getOpcode()) {
1958 case ISD::ATOMIC_SWAP:
1959 // Let LLVM expand atomic swap instruction through LowerOperation.
1960 return;
1961 default:
1962 LLVM_DEBUG(N->dumpr(&DAG));
1963 llvm_unreachable("Do not know how to custom type legalize this operation!");
1964 }
1965 }
1966
1967 /// JumpTable for VE.
1968 ///
1969 /// VE cannot generate relocatable symbol in jump table. VE cannot
1970 /// generate expressions using symbols in both text segment and data
1971 /// segment like below.
1972 /// .4byte .LBB0_2-.LJTI0_0
1973 /// So, we generate offset from the top of function like below as
1974 /// a custom label.
1975 /// .4byte .LBB0_2-<function name>
1976
getJumpTableEncoding() const1977 unsigned VETargetLowering::getJumpTableEncoding() const {
1978 // Use custom label for PIC.
1979 if (isPositionIndependent())
1980 return MachineJumpTableInfo::EK_Custom32;
1981
1982 // Otherwise, use the normal jump table encoding heuristics.
1983 return TargetLowering::getJumpTableEncoding();
1984 }
1985
LowerCustomJumpTableEntry(const MachineJumpTableInfo * MJTI,const MachineBasicBlock * MBB,unsigned Uid,MCContext & Ctx) const1986 const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1987 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1988 unsigned Uid, MCContext &Ctx) const {
1989 assert(isPositionIndependent());
1990
1991 // Generate custom label for PIC like below.
1992 // .4bytes .LBB0_2-<function name>
1993 const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
1994 MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
1995 const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
1996 return MCBinaryExpr::createSub(Value, Base, Ctx);
1997 }
1998
getPICJumpTableRelocBase(SDValue Table,SelectionDAG & DAG) const1999 SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
2000 SelectionDAG &DAG) const {
2001 assert(isPositionIndependent());
2002 SDLoc DL(Table);
2003 Function *Function = &DAG.getMachineFunction().getFunction();
2004 assert(Function != nullptr);
2005 auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
2006
2007 // In the jump table, we have following values in PIC mode.
2008 // .4bytes .LBB0_2-<function name>
2009 // We need to add this value and the address of this function to generate
2010 // .LBB0_2 label correctly under PIC mode. So, we want to generate following
2011 // instructions:
2012 // lea %reg, fun@gotoff_lo
2013 // and %reg, %reg, (32)0
2014 // lea.sl %reg, fun@gotoff_hi(%reg, %got)
2015 // In order to do so, we need to genarate correctly marked DAG node using
2016 // makeHiLoPair.
2017 SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
2018 SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
2019 VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
2020 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
2021 return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
2022 }
2023
prepareMBB(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,MachineBasicBlock * TargetBB,const DebugLoc & DL) const2024 Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
2025 MachineBasicBlock::iterator I,
2026 MachineBasicBlock *TargetBB,
2027 const DebugLoc &DL) const {
2028 MachineFunction *MF = MBB.getParent();
2029 MachineRegisterInfo &MRI = MF->getRegInfo();
2030 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2031
2032 const TargetRegisterClass *RC = &VE::I64RegClass;
2033 Register Tmp1 = MRI.createVirtualRegister(RC);
2034 Register Tmp2 = MRI.createVirtualRegister(RC);
2035 Register Result = MRI.createVirtualRegister(RC);
2036
2037 if (isPositionIndependent()) {
2038 // Create following instructions for local linkage PIC code.
2039 // lea %Tmp1, TargetBB@gotoff_lo
2040 // and %Tmp2, %Tmp1, (32)0
2041 // lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2042 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2043 .addImm(0)
2044 .addImm(0)
2045 .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);
2046 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2047 .addReg(Tmp1, getKillRegState(true))
2048 .addImm(M0(32));
2049 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2050 .addReg(VE::SX15)
2051 .addReg(Tmp2, getKillRegState(true))
2052 .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);
2053 } else {
2054 // Create following instructions for non-PIC code.
2055 // lea %Tmp1, TargetBB@lo
2056 // and %Tmp2, %Tmp1, (32)0
2057 // lea.sl %Result, TargetBB@hi(%Tmp2)
2058 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2059 .addImm(0)
2060 .addImm(0)
2061 .addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
2062 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2063 .addReg(Tmp1, getKillRegState(true))
2064 .addImm(M0(32));
2065 BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2066 .addReg(Tmp2, getKillRegState(true))
2067 .addImm(0)
2068 .addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
2069 }
2070 return Result;
2071 }
2072
prepareSymbol(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,StringRef Symbol,const DebugLoc & DL,bool IsLocal=false,bool IsCall=false) const2073 Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2074 MachineBasicBlock::iterator I,
2075 StringRef Symbol, const DebugLoc &DL,
2076 bool IsLocal = false,
2077 bool IsCall = false) const {
2078 MachineFunction *MF = MBB.getParent();
2079 MachineRegisterInfo &MRI = MF->getRegInfo();
2080 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2081
2082 const TargetRegisterClass *RC = &VE::I64RegClass;
2083 Register Result = MRI.createVirtualRegister(RC);
2084
2085 if (isPositionIndependent()) {
2086 if (IsCall && !IsLocal) {
2087 // Create following instructions for non-local linkage PIC code function
2088 // calls. These instructions uses IC and magic number -24, so we expand
2089 // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2090 // lea %Reg, Symbol@plt_lo(-24)
2091 // and %Reg, %Reg, (32)0
2092 // sic %s16
2093 // lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2094 BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
2095 .addExternalSymbol("abort");
2096 } else if (IsLocal) {
2097 Register Tmp1 = MRI.createVirtualRegister(RC);
2098 Register Tmp2 = MRI.createVirtualRegister(RC);
2099 // Create following instructions for local linkage PIC code.
2100 // lea %Tmp1, Symbol@gotoff_lo
2101 // and %Tmp2, %Tmp1, (32)0
2102 // lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2103 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2104 .addImm(0)
2105 .addImm(0)
2106 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);
2107 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2108 .addReg(Tmp1, getKillRegState(true))
2109 .addImm(M0(32));
2110 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2111 .addReg(VE::SX15)
2112 .addReg(Tmp2, getKillRegState(true))
2113 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);
2114 } else {
2115 Register Tmp1 = MRI.createVirtualRegister(RC);
2116 Register Tmp2 = MRI.createVirtualRegister(RC);
2117 // Create following instructions for not local linkage PIC code.
2118 // lea %Tmp1, Symbol@got_lo
2119 // and %Tmp2, %Tmp1, (32)0
2120 // lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2121 // ld %Result, 0(%Tmp3)
2122 Register Tmp3 = MRI.createVirtualRegister(RC);
2123 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2124 .addImm(0)
2125 .addImm(0)
2126 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);
2127 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2128 .addReg(Tmp1, getKillRegState(true))
2129 .addImm(M0(32));
2130 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
2131 .addReg(VE::SX15)
2132 .addReg(Tmp2, getKillRegState(true))
2133 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);
2134 BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
2135 .addReg(Tmp3, getKillRegState(true))
2136 .addImm(0)
2137 .addImm(0);
2138 }
2139 } else {
2140 Register Tmp1 = MRI.createVirtualRegister(RC);
2141 Register Tmp2 = MRI.createVirtualRegister(RC);
2142 // Create following instructions for non-PIC code.
2143 // lea %Tmp1, Symbol@lo
2144 // and %Tmp2, %Tmp1, (32)0
2145 // lea.sl %Result, Symbol@hi(%Tmp2)
2146 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2147 .addImm(0)
2148 .addImm(0)
2149 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
2150 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2151 .addReg(Tmp1, getKillRegState(true))
2152 .addImm(M0(32));
2153 BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2154 .addReg(Tmp2, getKillRegState(true))
2155 .addImm(0)
2156 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
2157 }
2158 return Result;
2159 }
2160
setupEntryBlockForSjLj(MachineInstr & MI,MachineBasicBlock * MBB,MachineBasicBlock * DispatchBB,int FI,int Offset) const2161 void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2162 MachineBasicBlock *MBB,
2163 MachineBasicBlock *DispatchBB,
2164 int FI, int Offset) const {
2165 DebugLoc DL = MI.getDebugLoc();
2166 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2167
2168 Register LabelReg =
2169 prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);
2170
2171 // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2172 // referenced by longjmp (throw) later.
2173 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2174 addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2175 MIB.addReg(LabelReg, getKillRegState(true));
2176 }
2177
2178 MachineBasicBlock *
emitEHSjLjSetJmp(MachineInstr & MI,MachineBasicBlock * MBB) const2179 VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2180 MachineBasicBlock *MBB) const {
2181 DebugLoc DL = MI.getDebugLoc();
2182 MachineFunction *MF = MBB->getParent();
2183 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2184 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2185 MachineRegisterInfo &MRI = MF->getRegInfo();
2186
2187 const BasicBlock *BB = MBB->getBasicBlock();
2188 MachineFunction::iterator I = ++MBB->getIterator();
2189
2190 // Memory Reference.
2191 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2192 MI.memoperands_end());
2193 Register BufReg = MI.getOperand(1).getReg();
2194
2195 Register DstReg;
2196
2197 DstReg = MI.getOperand(0).getReg();
2198 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
2199 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2200 (void)TRI;
2201 Register MainDestReg = MRI.createVirtualRegister(RC);
2202 Register RestoreDestReg = MRI.createVirtualRegister(RC);
2203
2204 // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2205 // instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2206 //
2207 // ThisMBB:
2208 // buf[3] = %s17 iff %s17 is used as BP
2209 // buf[1] = RestoreMBB as IC after longjmp
2210 // # SjLjSetup RestoreMBB
2211 //
2212 // MainMBB:
2213 // v_main = 0
2214 //
2215 // SinkMBB:
2216 // v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2217 // ...
2218 //
2219 // RestoreMBB:
2220 // %s17 = buf[3] = iff %s17 is used as BP
2221 // v_restore = 1
2222 // goto SinkMBB
2223
2224 MachineBasicBlock *ThisMBB = MBB;
2225 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2226 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2227 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2228 MF->insert(I, MainMBB);
2229 MF->insert(I, SinkMBB);
2230 MF->push_back(RestoreMBB);
2231 RestoreMBB->setMachineBlockAddressTaken();
2232
2233 // Transfer the remainder of BB and its successor edges to SinkMBB.
2234 SinkMBB->splice(SinkMBB->begin(), MBB,
2235 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2236 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
2237
2238 // ThisMBB:
2239 Register LabelReg =
2240 prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
2241
2242 // Store BP in buf[3] iff this function is using BP.
2243 const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2244 if (TFI->hasBP(*MF)) {
2245 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2246 MIB.addReg(BufReg);
2247 MIB.addImm(0);
2248 MIB.addImm(24);
2249 MIB.addReg(VE::SX17);
2250 MIB.setMemRefs(MMOs);
2251 }
2252
2253 // Store IP in buf[1].
2254 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2255 MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
2256 MIB.addImm(0);
2257 MIB.addImm(8);
2258 MIB.addReg(LabelReg, getKillRegState(true));
2259 MIB.setMemRefs(MMOs);
2260
2261 // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2262
2263 // Insert setup.
2264 MIB =
2265 BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
2266
2267 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2268 MIB.addRegMask(RegInfo->getNoPreservedMask());
2269 ThisMBB->addSuccessor(MainMBB);
2270 ThisMBB->addSuccessor(RestoreMBB);
2271
2272 // MainMBB:
2273 BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
2274 .addImm(0)
2275 .addImm(0)
2276 .addImm(0);
2277 MainMBB->addSuccessor(SinkMBB);
2278
2279 // SinkMBB:
2280 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
2281 .addReg(MainDestReg)
2282 .addMBB(MainMBB)
2283 .addReg(RestoreDestReg)
2284 .addMBB(RestoreMBB);
2285
2286 // RestoreMBB:
2287 // Restore BP from buf[3] iff this function is using BP. The address of
2288 // buf is in SX10.
2289 // FIXME: Better to not use SX10 here
2290 if (TFI->hasBP(*MF)) {
2291 MachineInstrBuilder MIB =
2292 BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
2293 MIB.addReg(VE::SX10);
2294 MIB.addImm(0);
2295 MIB.addImm(24);
2296 MIB.setMemRefs(MMOs);
2297 }
2298 BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
2299 .addImm(0)
2300 .addImm(0)
2301 .addImm(1);
2302 BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
2303 RestoreMBB->addSuccessor(SinkMBB);
2304
2305 MI.eraseFromParent();
2306 return SinkMBB;
2307 }
2308
2309 MachineBasicBlock *
emitEHSjLjLongJmp(MachineInstr & MI,MachineBasicBlock * MBB) const2310 VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2311 MachineBasicBlock *MBB) const {
2312 DebugLoc DL = MI.getDebugLoc();
2313 MachineFunction *MF = MBB->getParent();
2314 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2315 MachineRegisterInfo &MRI = MF->getRegInfo();
2316
2317 // Memory Reference.
2318 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2319 MI.memoperands_end());
2320 Register BufReg = MI.getOperand(0).getReg();
2321
2322 Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
2323 // Since FP is only updated here but NOT referenced, it's treated as GPR.
2324 Register FP = VE::SX9;
2325 Register SP = VE::SX11;
2326
2327 MachineInstrBuilder MIB;
2328
2329 MachineBasicBlock *ThisMBB = MBB;
2330
2331 // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2332 //
2333 // ThisMBB:
2334 // %fp = load buf[0]
2335 // %jmp = load buf[1]
2336 // %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2337 // %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2338 // jmp %jmp
2339
2340 // Reload FP.
2341 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
2342 MIB.addReg(BufReg);
2343 MIB.addImm(0);
2344 MIB.addImm(0);
2345 MIB.setMemRefs(MMOs);
2346
2347 // Reload IP.
2348 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
2349 MIB.addReg(BufReg);
2350 MIB.addImm(0);
2351 MIB.addImm(8);
2352 MIB.setMemRefs(MMOs);
2353
2354 // Copy BufReg to SX10 for later use in setjmp.
2355 // FIXME: Better to not use SX10 here
2356 BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
2357 .addReg(BufReg)
2358 .addImm(0);
2359
2360 // Reload SP.
2361 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
2362 MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
2363 MIB.addImm(0);
2364 MIB.addImm(16);
2365 MIB.setMemRefs(MMOs);
2366
2367 // Jump.
2368 BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
2369 .addReg(Tmp, getKillRegState(true))
2370 .addImm(0);
2371
2372 MI.eraseFromParent();
2373 return ThisMBB;
2374 }
2375
2376 MachineBasicBlock *
emitSjLjDispatchBlock(MachineInstr & MI,MachineBasicBlock * BB) const2377 VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2378 MachineBasicBlock *BB) const {
2379 DebugLoc DL = MI.getDebugLoc();
2380 MachineFunction *MF = BB->getParent();
2381 MachineFrameInfo &MFI = MF->getFrameInfo();
2382 MachineRegisterInfo &MRI = MF->getRegInfo();
2383 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2384 int FI = MFI.getFunctionContextIndex();
2385
2386 // Get a mapping of the call site numbers to all of the landing pads they're
2387 // associated with.
2388 DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
2389 unsigned MaxCSNum = 0;
2390 for (auto &MBB : *MF) {
2391 if (!MBB.isEHPad())
2392 continue;
2393
2394 MCSymbol *Sym = nullptr;
2395 for (const auto &MI : MBB) {
2396 if (MI.isDebugInstr())
2397 continue;
2398
2399 assert(MI.isEHLabel() && "expected EH_LABEL");
2400 Sym = MI.getOperand(0).getMCSymbol();
2401 break;
2402 }
2403
2404 if (!MF->hasCallSiteLandingPad(Sym))
2405 continue;
2406
2407 for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2408 CallSiteNumToLPad[CSI].push_back(&MBB);
2409 MaxCSNum = std::max(MaxCSNum, CSI);
2410 }
2411 }
2412
2413 // Get an ordered list of the machine basic blocks for the jump table.
2414 std::vector<MachineBasicBlock *> LPadList;
2415 SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
2416 LPadList.reserve(CallSiteNumToLPad.size());
2417
2418 for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2419 for (auto &LP : CallSiteNumToLPad[CSI]) {
2420 LPadList.push_back(LP);
2421 InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
2422 }
2423 }
2424
2425 assert(!LPadList.empty() &&
2426 "No landing pad destinations for the dispatch jump table!");
2427
2428 // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2429 // %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2430 //
2431 // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2432 // First `i64` is callsite, so callsite is FI+8.
2433 static const int OffsetIC = 72;
2434 static const int OffsetCS = 8;
2435
2436 // Create the MBBs for the dispatch code like following:
2437 //
2438 // ThisMBB:
2439 // Prepare DispatchBB address and store it to buf[1].
2440 // ...
2441 //
2442 // DispatchBB:
2443 // %s15 = GETGOT iff isPositionIndependent
2444 // %callsite = load callsite
2445 // brgt.l.t #size of callsites, %callsite, DispContBB
2446 //
2447 // TrapBB:
2448 // Call abort.
2449 //
2450 // DispContBB:
2451 // %breg = address of jump table
2452 // %pc = load and calculate next pc from %breg and %callsite
2453 // jmp %pc
2454
2455 // Shove the dispatch's address into the return slot in the function context.
2456 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2457 DispatchBB->setIsEHPad(true);
2458
2459 // Trap BB will causes trap like `assert(0)`.
2460 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2461 DispatchBB->addSuccessor(TrapBB);
2462
2463 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2464 DispatchBB->addSuccessor(DispContBB);
2465
2466 // Insert MBBs.
2467 MF->push_back(DispatchBB);
2468 MF->push_back(DispContBB);
2469 MF->push_back(TrapBB);
2470
2471 // Insert code to call abort in the TrapBB.
2472 Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
2473 /* Local */ false, /* Call */ true);
2474 BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
2475 .addReg(Abort, getKillRegState(true))
2476 .addImm(0)
2477 .addImm(0);
2478
2479 // Insert code into the entry block that creates and registers the function
2480 // context.
2481 setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
2482
2483 // Create the jump table and associated information
2484 unsigned JTE = getJumpTableEncoding();
2485 MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
2486 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
2487
2488 const VERegisterInfo &RI = TII->getRegisterInfo();
2489 // Add a register mask with no preserved registers. This results in all
2490 // registers being marked as clobbered.
2491 BuildMI(DispatchBB, DL, TII->get(VE::NOP))
2492 .addRegMask(RI.getNoPreservedMask());
2493
2494 if (isPositionIndependent()) {
2495 // Force to generate GETGOT, since current implementation doesn't store GOT
2496 // register.
2497 BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
2498 }
2499
2500 // IReg is used as an index in a memory operand and therefore can't be SP
2501 const TargetRegisterClass *RC = &VE::I64RegClass;
2502 Register IReg = MRI.createVirtualRegister(RC);
2503 addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
2504 OffsetCS);
2505 if (LPadList.size() < 64) {
2506 BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
2507 .addImm(VECC::CC_ILE)
2508 .addImm(LPadList.size())
2509 .addReg(IReg)
2510 .addMBB(TrapBB);
2511 } else {
2512 assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2513 Register TmpReg = MRI.createVirtualRegister(RC);
2514 BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
2515 .addImm(0)
2516 .addImm(0)
2517 .addImm(LPadList.size());
2518 BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
2519 .addImm(VECC::CC_ILE)
2520 .addReg(TmpReg, getKillRegState(true))
2521 .addReg(IReg)
2522 .addMBB(TrapBB);
2523 }
2524
2525 Register BReg = MRI.createVirtualRegister(RC);
2526 Register Tmp1 = MRI.createVirtualRegister(RC);
2527 Register Tmp2 = MRI.createVirtualRegister(RC);
2528
2529 if (isPositionIndependent()) {
2530 // Create following instructions for local linkage PIC code.
2531 // lea %Tmp1, .LJTI0_0@gotoff_lo
2532 // and %Tmp2, %Tmp1, (32)0
2533 // lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2534 BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2535 .addImm(0)
2536 .addImm(0)
2537 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);
2538 BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2539 .addReg(Tmp1, getKillRegState(true))
2540 .addImm(M0(32));
2541 BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
2542 .addReg(VE::SX15)
2543 .addReg(Tmp2, getKillRegState(true))
2544 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);
2545 } else {
2546 // Create following instructions for non-PIC code.
2547 // lea %Tmp1, .LJTI0_0@lo
2548 // and %Tmp2, %Tmp1, (32)0
2549 // lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2550 BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2551 .addImm(0)
2552 .addImm(0)
2553 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);
2554 BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2555 .addReg(Tmp1, getKillRegState(true))
2556 .addImm(M0(32));
2557 BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
2558 .addReg(Tmp2, getKillRegState(true))
2559 .addImm(0)
2560 .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);
2561 }
2562
2563 switch (JTE) {
2564 case MachineJumpTableInfo::EK_BlockAddress: {
2565 // Generate simple block address code for no-PIC model.
2566 // sll %Tmp1, %IReg, 3
2567 // lds %TReg, 0(%Tmp1, %BReg)
2568 // bcfla %TReg
2569
2570 Register TReg = MRI.createVirtualRegister(RC);
2571 Register Tmp1 = MRI.createVirtualRegister(RC);
2572
2573 BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2574 .addReg(IReg, getKillRegState(true))
2575 .addImm(3);
2576 BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
2577 .addReg(BReg, getKillRegState(true))
2578 .addReg(Tmp1, getKillRegState(true))
2579 .addImm(0);
2580 BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2581 .addReg(TReg, getKillRegState(true))
2582 .addImm(0);
2583 break;
2584 }
2585 case MachineJumpTableInfo::EK_Custom32: {
2586 // Generate block address code using differences from the function pointer
2587 // for PIC model.
2588 // sll %Tmp1, %IReg, 2
2589 // ldl.zx %OReg, 0(%Tmp1, %BReg)
2590 // Prepare function address in BReg2.
2591 // adds.l %TReg, %BReg2, %OReg
2592 // bcfla %TReg
2593
2594 assert(isPositionIndependent());
2595 Register OReg = MRI.createVirtualRegister(RC);
2596 Register TReg = MRI.createVirtualRegister(RC);
2597 Register Tmp1 = MRI.createVirtualRegister(RC);
2598
2599 BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2600 .addReg(IReg, getKillRegState(true))
2601 .addImm(2);
2602 BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
2603 .addReg(BReg, getKillRegState(true))
2604 .addReg(Tmp1, getKillRegState(true))
2605 .addImm(0);
2606 Register BReg2 =
2607 prepareSymbol(*DispContBB, DispContBB->end(),
2608 DispContBB->getParent()->getName(), DL, /* Local */ true);
2609 BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
2610 .addReg(OReg, getKillRegState(true))
2611 .addReg(BReg2, getKillRegState(true));
2612 BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2613 .addReg(TReg, getKillRegState(true))
2614 .addImm(0);
2615 break;
2616 }
2617 default:
2618 llvm_unreachable("Unexpected jump table encoding");
2619 }
2620
2621 // Add the jump table entries as successors to the MBB.
2622 SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
2623 for (auto &LP : LPadList)
2624 if (SeenMBBs.insert(LP).second)
2625 DispContBB->addSuccessor(LP);
2626
2627 // N.B. the order the invoke BBs are processed in doesn't matter here.
2628 SmallVector<MachineBasicBlock *, 64> MBBLPads;
2629 const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2630 for (MachineBasicBlock *MBB : InvokeBBs) {
2631 // Remove the landing pad successor from the invoke block and replace it
2632 // with the new dispatch block.
2633 // Keep a copy of Successors since it's modified inside the loop.
2634 SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
2635 MBB->succ_rend());
2636 // FIXME: Avoid quadratic complexity.
2637 for (auto *MBBS : Successors) {
2638 if (MBBS->isEHPad()) {
2639 MBB->removeSuccessor(MBBS);
2640 MBBLPads.push_back(MBBS);
2641 }
2642 }
2643
2644 MBB->addSuccessor(DispatchBB);
2645
2646 // Find the invoke call and mark all of the callee-saved registers as
2647 // 'implicit defined' so that they're spilled. This prevents code from
2648 // moving instructions to before the EH block, where they will never be
2649 // executed.
2650 for (auto &II : reverse(*MBB)) {
2651 if (!II.isCall())
2652 continue;
2653
2654 DenseMap<Register, bool> DefRegs;
2655 for (auto &MOp : II.operands())
2656 if (MOp.isReg())
2657 DefRegs[MOp.getReg()] = true;
2658
2659 MachineInstrBuilder MIB(*MF, &II);
2660 for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2661 Register Reg = SavedRegs[RI];
2662 if (!DefRegs[Reg])
2663 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
2664 }
2665
2666 break;
2667 }
2668 }
2669
2670 // Mark all former landing pads as non-landing pads. The dispatch is the only
2671 // landing pad now.
2672 for (auto &LP : MBBLPads)
2673 LP->setIsEHPad(false);
2674
2675 // The instruction is gone now.
2676 MI.eraseFromParent();
2677 return BB;
2678 }
2679
2680 MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr & MI,MachineBasicBlock * BB) const2681 VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2682 MachineBasicBlock *BB) const {
2683 switch (MI.getOpcode()) {
2684 default:
2685 llvm_unreachable("Unknown Custom Instruction!");
2686 case VE::EH_SjLj_LongJmp:
2687 return emitEHSjLjLongJmp(MI, BB);
2688 case VE::EH_SjLj_SetJmp:
2689 return emitEHSjLjSetJmp(MI, BB);
2690 case VE::EH_SjLj_Setup_Dispatch:
2691 return emitSjLjDispatchBlock(MI, BB);
2692 }
2693 }
2694
isSimm7(SDValue V)2695 static bool isSimm7(SDValue V) {
2696 EVT VT = V.getValueType();
2697 if (VT.isVector())
2698 return false;
2699
2700 if (VT.isInteger()) {
2701 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2702 return isInt<7>(C->getSExtValue());
2703 } else if (VT.isFloatingPoint()) {
2704 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2705 if (VT == MVT::f32 || VT == MVT::f64) {
2706 const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2707 uint64_t Val = Imm.getSExtValue();
2708 if (Imm.getBitWidth() == 32)
2709 Val <<= 32; // Immediate value of float place at higher bits on VE.
2710 return isInt<7>(Val);
2711 }
2712 }
2713 }
2714 return false;
2715 }
2716
isMImm(SDValue V)2717 static bool isMImm(SDValue V) {
2718 EVT VT = V.getValueType();
2719 if (VT.isVector())
2720 return false;
2721
2722 if (VT.isInteger()) {
2723 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2724 return isMImmVal(getImmVal(C));
2725 } else if (VT.isFloatingPoint()) {
2726 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2727 if (VT == MVT::f32) {
2728 // Float value places at higher bits, so ignore lower 32 bits.
2729 return isMImm32Val(getFpImmVal(C) >> 32);
2730 } else if (VT == MVT::f64) {
2731 return isMImmVal(getFpImmVal(C));
2732 }
2733 }
2734 }
2735 return false;
2736 }
2737
decideComp(EVT SrcVT,ISD::CondCode CC)2738 static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2739 if (SrcVT.isFloatingPoint()) {
2740 if (SrcVT == MVT::f128)
2741 return VEISD::CMPQ;
2742 return VEISD::CMPF;
2743 }
2744 return isSignedIntSetCC(CC) ? VEISD::CMPI : VEISD::CMPU;
2745 }
2746
decideCompType(EVT SrcVT)2747 static EVT decideCompType(EVT SrcVT) {
2748 if (SrcVT == MVT::f128)
2749 return MVT::f64;
2750 return SrcVT;
2751 }
2752
safeWithoutCompWithNull(EVT SrcVT,ISD::CondCode CC,bool WithCMov)2753 static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,
2754 bool WithCMov) {
2755 if (SrcVT.isFloatingPoint()) {
2756 // For the case of floating point setcc, only unordered comparison
2757 // or general comparison with -enable-no-nans-fp-math option reach
2758 // here, so it is safe even if values are NaN. Only f128 doesn't
2759 // safe since VE uses f64 result of f128 comparison.
2760 return SrcVT != MVT::f128;
2761 }
2762 if (isIntEqualitySetCC(CC)) {
2763 // For the case of equal or not equal, it is safe without comparison with 0.
2764 return true;
2765 }
2766 if (WithCMov) {
2767 // For the case of integer setcc with cmov, all signed comparison with 0
2768 // are safe.
2769 return isSignedIntSetCC(CC);
2770 }
2771 // For the case of integer setcc, only signed 64 bits comparison is safe.
2772 // For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2773 // less than 0 witout CMPU. For 32 bits, other half of 32 bits are
2774 // uncoditional, so it is not safe too without CMPI..
2775 return isSignedIntSetCC(CC) && SrcVT == MVT::i64;
2776 }
2777
generateComparison(EVT VT,SDValue LHS,SDValue RHS,ISD::CondCode CC,bool WithCMov,const SDLoc & DL,SelectionDAG & DAG)2778 static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,
2779 ISD::CondCode CC, bool WithCMov,
2780 const SDLoc &DL, SelectionDAG &DAG) {
2781 // Compare values. If RHS is 0 and it is safe to calculate without
2782 // comparison, we don't generate an instruction for comparison.
2783 EVT CompVT = decideCompType(VT);
2784 if (CompVT == VT && safeWithoutCompWithNull(VT, CC, WithCMov) &&
2785 (isNullConstant(RHS) || isNullFPConstant(RHS))) {
2786 return LHS;
2787 }
2788 return DAG.getNode(decideComp(VT, CC), DL, CompVT, LHS, RHS);
2789 }
2790
combineSelect(SDNode * N,DAGCombinerInfo & DCI) const2791 SDValue VETargetLowering::combineSelect(SDNode *N,
2792 DAGCombinerInfo &DCI) const {
2793 assert(N->getOpcode() == ISD::SELECT &&
2794 "Should be called with a SELECT node");
2795 ISD::CondCode CC = ISD::CondCode::SETNE;
2796 SDValue Cond = N->getOperand(0);
2797 SDValue True = N->getOperand(1);
2798 SDValue False = N->getOperand(2);
2799
2800 // We handle only scalar SELECT.
2801 EVT VT = N->getValueType(0);
2802 if (VT.isVector())
2803 return SDValue();
2804
2805 // Peform combineSelect after leagalize DAG.
2806 if (!DCI.isAfterLegalizeDAG())
2807 return SDValue();
2808
2809 EVT VT0 = Cond.getValueType();
2810 if (isMImm(True)) {
2811 // VE's condition move can handle MImm in True clause, so nothing to do.
2812 } else if (isMImm(False)) {
2813 // VE's condition move can handle MImm in True clause, so swap True and
2814 // False clauses if False has MImm value. And, update condition code.
2815 std::swap(True, False);
2816 CC = getSetCCInverse(CC, VT0);
2817 }
2818
2819 SDLoc DL(N);
2820 SelectionDAG &DAG = DCI.DAG;
2821 VECC::CondCode VECCVal;
2822 if (VT0.isFloatingPoint()) {
2823 VECCVal = fpCondCode2Fcc(CC);
2824 } else {
2825 VECCVal = intCondCode2Icc(CC);
2826 }
2827 SDValue Ops[] = {Cond, True, False,
2828 DAG.getConstant(VECCVal, DL, MVT::i32)};
2829 return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2830 }
2831
combineSelectCC(SDNode * N,DAGCombinerInfo & DCI) const2832 SDValue VETargetLowering::combineSelectCC(SDNode *N,
2833 DAGCombinerInfo &DCI) const {
2834 assert(N->getOpcode() == ISD::SELECT_CC &&
2835 "Should be called with a SELECT_CC node");
2836 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
2837 SDValue LHS = N->getOperand(0);
2838 SDValue RHS = N->getOperand(1);
2839 SDValue True = N->getOperand(2);
2840 SDValue False = N->getOperand(3);
2841
2842 // We handle only scalar SELECT_CC.
2843 EVT VT = N->getValueType(0);
2844 if (VT.isVector())
2845 return SDValue();
2846
2847 // Peform combineSelectCC after leagalize DAG.
2848 if (!DCI.isAfterLegalizeDAG())
2849 return SDValue();
2850
2851 // We handle only i32/i64/f32/f64/f128 comparisons.
2852 EVT LHSVT = LHS.getValueType();
2853 assert(LHSVT == RHS.getValueType());
2854 switch (LHSVT.getSimpleVT().SimpleTy) {
2855 case MVT::i32:
2856 case MVT::i64:
2857 case MVT::f32:
2858 case MVT::f64:
2859 case MVT::f128:
2860 break;
2861 default:
2862 // Return SDValue to let llvm handle other types.
2863 return SDValue();
2864 }
2865
2866 if (isMImm(RHS)) {
2867 // VE's comparison can handle MImm in RHS, so nothing to do.
2868 } else if (isSimm7(RHS)) {
2869 // VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2870 // update condition code.
2871 std::swap(LHS, RHS);
2872 CC = getSetCCSwappedOperands(CC);
2873 }
2874 if (isMImm(True)) {
2875 // VE's condition move can handle MImm in True clause, so nothing to do.
2876 } else if (isMImm(False)) {
2877 // VE's condition move can handle MImm in True clause, so swap True and
2878 // False clauses if False has MImm value. And, update condition code.
2879 std::swap(True, False);
2880 CC = getSetCCInverse(CC, LHSVT);
2881 }
2882
2883 SDLoc DL(N);
2884 SelectionDAG &DAG = DCI.DAG;
2885
2886 bool WithCMov = true;
2887 SDValue CompNode = generateComparison(LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2888
2889 VECC::CondCode VECCVal;
2890 if (LHSVT.isFloatingPoint()) {
2891 VECCVal = fpCondCode2Fcc(CC);
2892 } else {
2893 VECCVal = intCondCode2Icc(CC);
2894 }
2895 SDValue Ops[] = {CompNode, True, False,
2896 DAG.getConstant(VECCVal, DL, MVT::i32)};
2897 return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2898 }
2899
2900 static bool isI32InsnAllUses(const SDNode *User, const SDNode *N);
isI32Insn(const SDNode * User,const SDNode * N)2901 static bool isI32Insn(const SDNode *User, const SDNode *N) {
2902 switch (User->getOpcode()) {
2903 default:
2904 return false;
2905 case ISD::ADD:
2906 case ISD::SUB:
2907 case ISD::MUL:
2908 case ISD::SDIV:
2909 case ISD::UDIV:
2910 case ISD::SETCC:
2911 case ISD::SMIN:
2912 case ISD::SMAX:
2913 case ISD::SHL:
2914 case ISD::SRA:
2915 case ISD::BSWAP:
2916 case ISD::SINT_TO_FP:
2917 case ISD::UINT_TO_FP:
2918 case ISD::BR_CC:
2919 case ISD::BITCAST:
2920 case ISD::ATOMIC_CMP_SWAP:
2921 case ISD::ATOMIC_SWAP:
2922 case VEISD::CMPU:
2923 case VEISD::CMPI:
2924 return true;
2925 case ISD::SRL:
2926 if (N->getOperand(0).getOpcode() != ISD::SRL)
2927 return true;
2928 // (srl (trunc (srl ...))) may be optimized by combining srl, so
2929 // doesn't optimize trunc now.
2930 return false;
2931 case ISD::SELECT_CC:
2932 if (User->getOperand(2).getNode() != N &&
2933 User->getOperand(3).getNode() != N)
2934 return true;
2935 return isI32InsnAllUses(User, N);
2936 case VEISD::CMOV:
2937 // CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2938 // However, trunc in true or false clauses is not safe.
2939 if (User->getOperand(1).getNode() != N &&
2940 User->getOperand(2).getNode() != N &&
2941 isa<ConstantSDNode>(User->getOperand(3))) {
2942 VECC::CondCode VECCVal = static_cast<VECC::CondCode>(
2943 cast<ConstantSDNode>(User->getOperand(3))->getZExtValue());
2944 return isIntVECondCode(VECCVal);
2945 }
2946 [[fallthrough]];
2947 case ISD::AND:
2948 case ISD::OR:
2949 case ISD::XOR:
2950 case ISD::SELECT:
2951 case ISD::CopyToReg:
2952 // Check all use of selections, bit operations, and copies. If all of them
2953 // are safe, optimize truncate to extract_subreg.
2954 return isI32InsnAllUses(User, N);
2955 }
2956 }
2957
isI32InsnAllUses(const SDNode * User,const SDNode * N)2958 static bool isI32InsnAllUses(const SDNode *User, const SDNode *N) {
2959 // Check all use of User node. If all of them are safe, optimize
2960 // truncate to extract_subreg.
2961 for (const SDNode *U : User->uses()) {
2962 switch (U->getOpcode()) {
2963 default:
2964 // If the use is an instruction which treats the source operand as i32,
2965 // it is safe to avoid truncate here.
2966 if (isI32Insn(U, N))
2967 continue;
2968 break;
2969 case ISD::ANY_EXTEND:
2970 case ISD::SIGN_EXTEND:
2971 case ISD::ZERO_EXTEND: {
2972 // Special optimizations to the combination of ext and trunc.
2973 // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2974 // since this truncate instruction clears higher 32 bits which is filled
2975 // by one of ext instructions later.
2976 assert(N->getValueType(0) == MVT::i32 &&
2977 "find truncate to not i32 integer");
2978 if (User->getOpcode() == ISD::SELECT_CC ||
2979 User->getOpcode() == ISD::SELECT || User->getOpcode() == VEISD::CMOV)
2980 continue;
2981 break;
2982 }
2983 }
2984 return false;
2985 }
2986 return true;
2987 }
2988
2989 // Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2990 // sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2991 // is sometime too late. So, doing it at here.
combineTRUNCATE(SDNode * N,DAGCombinerInfo & DCI) const2992 SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2993 DAGCombinerInfo &DCI) const {
2994 assert(N->getOpcode() == ISD::TRUNCATE &&
2995 "Should be called with a TRUNCATE node");
2996
2997 SelectionDAG &DAG = DCI.DAG;
2998 SDLoc DL(N);
2999 EVT VT = N->getValueType(0);
3000
3001 // We prefer to do this when all types are legal.
3002 if (!DCI.isAfterLegalizeDAG())
3003 return SDValue();
3004
3005 // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
3006 if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
3007 isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
3008 isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
3009 return SDValue();
3010
3011 // Check all use of this TRUNCATE.
3012 for (const SDNode *User : N->uses()) {
3013 // Make sure that we're not going to replace TRUNCATE for non i32
3014 // instructions.
3015 //
3016 // FIXME: Although we could sometimes handle this, and it does occur in
3017 // practice that one of the condition inputs to the select is also one of
3018 // the outputs, we currently can't deal with this.
3019 if (isI32Insn(User, N))
3020 continue;
3021
3022 return SDValue();
3023 }
3024
3025 SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3026 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
3027 N->getOperand(0), SubI32),
3028 0);
3029 }
3030
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const3031 SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
3032 DAGCombinerInfo &DCI) const {
3033 switch (N->getOpcode()) {
3034 default:
3035 break;
3036 case ISD::SELECT:
3037 return combineSelect(N, DCI);
3038 case ISD::SELECT_CC:
3039 return combineSelectCC(N, DCI);
3040 case ISD::TRUNCATE:
3041 return combineTRUNCATE(N, DCI);
3042 }
3043
3044 return SDValue();
3045 }
3046
3047 //===----------------------------------------------------------------------===//
3048 // VE Inline Assembly Support
3049 //===----------------------------------------------------------------------===//
3050
3051 VETargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const3052 VETargetLowering::getConstraintType(StringRef Constraint) const {
3053 if (Constraint.size() == 1) {
3054 switch (Constraint[0]) {
3055 default:
3056 break;
3057 case 'v': // vector registers
3058 return C_RegisterClass;
3059 }
3060 }
3061 return TargetLowering::getConstraintType(Constraint);
3062 }
3063
3064 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const3065 VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3066 StringRef Constraint,
3067 MVT VT) const {
3068 const TargetRegisterClass *RC = nullptr;
3069 if (Constraint.size() == 1) {
3070 switch (Constraint[0]) {
3071 default:
3072 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3073 case 'r':
3074 RC = &VE::I64RegClass;
3075 break;
3076 case 'v':
3077 RC = &VE::V64RegClass;
3078 break;
3079 }
3080 return std::make_pair(0U, RC);
3081 }
3082
3083 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3084 }
3085
3086 //===----------------------------------------------------------------------===//
3087 // VE Target Optimization Support
3088 //===----------------------------------------------------------------------===//
3089
getMinimumJumpTableEntries() const3090 unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3091 // Specify 8 for PIC model to relieve the impact of PIC load instructions.
3092 if (isJumpTableRelative())
3093 return 8;
3094
3095 return TargetLowering::getMinimumJumpTableEntries();
3096 }
3097
hasAndNot(SDValue Y) const3098 bool VETargetLowering::hasAndNot(SDValue Y) const {
3099 EVT VT = Y.getValueType();
3100
3101 // VE doesn't have vector and not instruction.
3102 if (VT.isVector())
3103 return false;
3104
3105 // VE allows different immediate values for X and Y where ~X & Y.
3106 // Only simm7 works for X, and only mimm works for Y on VE. However, this
3107 // function is used to check whether an immediate value is OK for and-not
3108 // instruction as both X and Y. Generating additional instruction to
3109 // retrieve an immediate value is no good since the purpose of this
3110 // function is to convert a series of 3 instructions to another series of
3111 // 3 instructions with better parallelism. Therefore, we return false
3112 // for all immediate values now.
3113 // FIXME: Change hasAndNot function to have two operands to make it work
3114 // correctly with Aurora VE.
3115 if (isa<ConstantSDNode>(Y))
3116 return false;
3117
3118 // It's ok for generic registers.
3119 return true;
3120 }
3121
lowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const3122 SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3123 SelectionDAG &DAG) const {
3124 assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3125 MVT VT = Op.getOperand(0).getSimpleValueType();
3126
3127 // Special treatment for packed V64 types.
3128 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3129 (void)VT;
3130 // Example of codes:
3131 // %packed_v = extractelt %vr, %idx / 2
3132 // %v = %packed_v >> (%idx % 2 * 32)
3133 // %res = %v & 0xffffffff
3134
3135 SDValue Vec = Op.getOperand(0);
3136 SDValue Idx = Op.getOperand(1);
3137 SDLoc DL(Op);
3138 SDValue Result = Op;
3139 if (false /* Idx->isConstant() */) {
3140 // TODO: optimized implementation using constant values
3141 } else {
3142 SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3143 SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3144 SDValue PackedElt =
3145 SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3146 SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3147 SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3148 SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3149 Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3150 PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
3151 SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
3152 PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3153 SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3154 Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
3155 MVT::i32, PackedElt, SubI32),
3156 0);
3157
3158 if (Op.getSimpleValueType() == MVT::f32) {
3159 Result = DAG.getBitcast(MVT::f32, Result);
3160 } else {
3161 assert(Op.getSimpleValueType() == MVT::i32);
3162 }
3163 }
3164 return Result;
3165 }
3166
lowerINSERT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const3167 SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3168 SelectionDAG &DAG) const {
3169 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3170 MVT VT = Op.getOperand(0).getSimpleValueType();
3171
3172 // Special treatment for packed V64 types.
3173 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3174 (void)VT;
3175 // The v512i32 and v512f32 starts from upper bits (0..31). This "upper
3176 // bits" required `val << 32` from C implementation's point of view.
3177 //
3178 // Example of codes:
3179 // %packed_elt = extractelt %vr, (%idx >> 1)
3180 // %shift = ((%idx & 1) ^ 1) << 5
3181 // %packed_elt &= 0xffffffff00000000 >> shift
3182 // %packed_elt |= (zext %val) << shift
3183 // %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3184
3185 SDLoc DL(Op);
3186 SDValue Vec = Op.getOperand(0);
3187 SDValue Val = Op.getOperand(1);
3188 SDValue Idx = Op.getOperand(2);
3189 if (Idx.getSimpleValueType() == MVT::i32)
3190 Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
3191 if (Val.getSimpleValueType() == MVT::f32)
3192 Val = DAG.getBitcast(MVT::i32, Val);
3193 assert(Val.getSimpleValueType() == MVT::i32);
3194 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
3195
3196 SDValue Result = Op;
3197 if (false /* Idx->isConstant()*/) {
3198 // TODO: optimized implementation using constant values
3199 } else {
3200 SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3201 SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3202 SDValue PackedElt =
3203 SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3204 SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3205 SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3206 SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3207 Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3208 SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
3209 Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
3210 PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3211 Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
3212 PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
3213 Result =
3214 SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
3215 {HalfIdx, PackedElt, Vec}),
3216 0);
3217 }
3218 return Result;
3219 }
3220