xref: /llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp (revision 7fff2527f8a3c3d201136a6051cb9127e45f67a7)
1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCMCTargetDesc.h"
15 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPC.h"
17 #include "PPCCCState.h"
18 #include "PPCCallingConv.h"
19 #include "PPCFrameLowering.h"
20 #include "PPCInstrInfo.h"
21 #include "PPCMachineFunctionInfo.h"
22 #include "PPCPerfectShuffle.h"
23 #include "PPCRegisterInfo.h"
24 #include "PPCSubtarget.h"
25 #include "PPCTargetMachine.h"
26 #include "llvm/ADT/APFloat.h"
27 #include "llvm/ADT/APInt.h"
28 #include "llvm/ADT/APSInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/SmallPtrSet.h"
33 #include "llvm/ADT/SmallSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
36 #include "llvm/ADT/StringRef.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/SelectionDAG.h"
51 #include "llvm/CodeGen/SelectionDAGNodes.h"
52 #include "llvm/CodeGen/TargetInstrInfo.h"
53 #include "llvm/CodeGen/TargetLowering.h"
54 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
55 #include "llvm/CodeGen/TargetRegisterInfo.h"
56 #include "llvm/CodeGen/ValueTypes.h"
57 #include "llvm/CodeGenTypes/MachineValueType.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/IntrinsicsPowerPC.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/MC/MCContext.h"
75 #include "llvm/MC/MCExpr.h"
76 #include "llvm/MC/MCSectionXCOFF.h"
77 #include "llvm/MC/MCSymbolXCOFF.h"
78 #include "llvm/Support/AtomicOrdering.h"
79 #include "llvm/Support/BranchProbability.h"
80 #include "llvm/Support/Casting.h"
81 #include "llvm/Support/CodeGen.h"
82 #include "llvm/Support/CommandLine.h"
83 #include "llvm/Support/Compiler.h"
84 #include "llvm/Support/Debug.h"
85 #include "llvm/Support/ErrorHandling.h"
86 #include "llvm/Support/Format.h"
87 #include "llvm/Support/KnownBits.h"
88 #include "llvm/Support/MathExtras.h"
89 #include "llvm/Support/raw_ostream.h"
90 #include "llvm/Target/TargetMachine.h"
91 #include "llvm/Target/TargetOptions.h"
92 #include <algorithm>
93 #include <cassert>
94 #include <cstdint>
95 #include <iterator>
96 #include <list>
97 #include <optional>
98 #include <utility>
99 #include <vector>
100 
101 using namespace llvm;
102 
103 #define DEBUG_TYPE "ppc-lowering"
104 
105 static cl::opt<bool> DisableP10StoreForward(
106     "disable-p10-store-forward",
107     cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108     cl::init(false));
109 
110 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112 
113 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115 
116 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118 
119 static cl::opt<bool> DisableSCO("disable-ppc-sco",
120 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121 
122 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124 
125 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127 
128 static cl::opt<bool>
129     DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130                           cl::desc("disable vector permute decomposition"),
131                           cl::init(true), cl::Hidden);
132 
133 cl::opt<bool> DisableAutoPairedVecSt(
134     "disable-auto-paired-vec-st",
135     cl::desc("disable automatically generated 32byte paired vector stores"),
136     cl::init(true), cl::Hidden);
137 
138 static cl::opt<unsigned> PPCMinimumJumpTableEntries(
139     "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140     cl::desc("Set minimum number of entries to use a jump table on PPC"));
141 
142 static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
143     "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
144     cl::desc("max depth when checking alias info in GatherAllAliases()"));
145 
146 static cl::opt<unsigned> PPCAIXTLSModelOptUseIEForLDLimit(
147     "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
148     cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
149              "function to use initial-exec"));
150 
151 STATISTIC(NumTailCalls, "Number of tail calls");
152 STATISTIC(NumSiblingCalls, "Number of sibling calls");
153 STATISTIC(ShufflesHandledWithVPERM,
154           "Number of shuffles lowered to a VPERM or XXPERM");
155 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
156 
157 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
158 
159 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
160 
161 static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
162 
163 // A faster local-[exec|dynamic] TLS access sequence (enabled with the
164 // -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
165 // variables; consistent with the IBM XL compiler, we apply a max size of
166 // slightly under 32KB.
167 constexpr uint64_t AIXSmallTlsPolicySizeLimit = 32751;
168 
169 // FIXME: Remove this once the bug has been fixed!
170 extern cl::opt<bool> ANDIGlueBug;
171 
172 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
173                                      const PPCSubtarget &STI)
174     : TargetLowering(TM), Subtarget(STI) {
175   // Initialize map that relates the PPC addressing modes to the computed flags
176   // of a load/store instruction. The map is used to determine the optimal
177   // addressing mode when selecting load and stores.
178   initializeAddrModeMap();
179   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
180   // arguments are at least 4/8 bytes aligned.
181   bool isPPC64 = Subtarget.isPPC64();
182   setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
183   const MVT RegVT = Subtarget.getScalarIntVT();
184 
185   // Set up the register classes.
186   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
187   if (!useSoftFloat()) {
188     if (hasSPE()) {
189       addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
190       // EFPU2 APU only supports f32
191       if (!Subtarget.hasEFPU2())
192         addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
193     } else {
194       addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
195       addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
196     }
197   }
198 
199   setOperationAction(ISD::UADDO, RegVT, Custom);
200 
201   // On P10, the default lowering generates better code using the
202   // setbc instruction.
203   if (!Subtarget.hasP10Vector()) {
204     setOperationAction(ISD::SSUBO, MVT::i32, Custom);
205     if (isPPC64)
206       setOperationAction(ISD::SSUBO, MVT::i64, Custom);
207   }
208 
209   // Match BITREVERSE to customized fast code sequence in the td file.
210   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
211   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
212 
213   // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
214   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
215 
216   // Custom lower inline assembly to check for special registers.
217   setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
218   setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
219 
220   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
221   for (MVT VT : MVT::integer_valuetypes()) {
222     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
223     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
224   }
225 
226   if (Subtarget.isISA3_0()) {
227     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
228     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
229     setTruncStoreAction(MVT::f64, MVT::f16, Legal);
230     setTruncStoreAction(MVT::f32, MVT::f16, Legal);
231   } else {
232     // No extending loads from f16 or HW conversions back and forth.
233     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
234     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
235     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
236     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
237     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
238     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
239     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
240     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
241   }
242 
243   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
244 
245   // PowerPC has pre-inc load and store's.
246   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
247   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
248   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
249   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
250   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
251   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
252   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
253   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
254   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
255   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
256   if (!Subtarget.hasSPE()) {
257     setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
258     setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
259     setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
260     setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
261   }
262 
263   // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
264   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
265   for (MVT VT : ScalarIntVTs) {
266     setOperationAction(ISD::ADDC, VT, Legal);
267     setOperationAction(ISD::ADDE, VT, Legal);
268     setOperationAction(ISD::SUBC, VT, Legal);
269     setOperationAction(ISD::SUBE, VT, Legal);
270   }
271 
272   if (Subtarget.useCRBits()) {
273     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
274 
275     if (isPPC64 || Subtarget.hasFPCVT()) {
276       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
277       AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1, RegVT);
278       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
279       AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1, RegVT);
280 
281       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
282       AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
283       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
284       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
285 
286       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote);
287       AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1, RegVT);
288       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote);
289       AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1, RegVT);
290 
291       setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
292       AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
293       setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
294       AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
295     } else {
296       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
297       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
298       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
299       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
300     }
301 
302     // PowerPC does not support direct load/store of condition registers.
303     setOperationAction(ISD::LOAD, MVT::i1, Custom);
304     setOperationAction(ISD::STORE, MVT::i1, Custom);
305 
306     // FIXME: Remove this once the ANDI glue bug is fixed:
307     if (ANDIGlueBug)
308       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
309 
310     for (MVT VT : MVT::integer_valuetypes()) {
311       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
312       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
313       setTruncStoreAction(VT, MVT::i1, Expand);
314     }
315 
316     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
317   }
318 
319   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
320   // PPC (the libcall is not available).
321   setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
322   setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
323   setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
324   setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
325 
326   // We do not currently implement these libm ops for PowerPC.
327   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
328   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
329   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
330   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
331   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
332   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
333 
334   // PowerPC has no SREM/UREM instructions unless we are on P9
335   // On P9 we may use a hardware instruction to compute the remainder.
336   // When the result of both the remainder and the division is required it is
337   // more efficient to compute the remainder from the result of the division
338   // rather than use the remainder instruction. The instructions are legalized
339   // directly because the DivRemPairsPass performs the transformation at the IR
340   // level.
341   if (Subtarget.isISA3_0()) {
342     setOperationAction(ISD::SREM, MVT::i32, Legal);
343     setOperationAction(ISD::UREM, MVT::i32, Legal);
344     setOperationAction(ISD::SREM, MVT::i64, Legal);
345     setOperationAction(ISD::UREM, MVT::i64, Legal);
346   } else {
347     setOperationAction(ISD::SREM, MVT::i32, Expand);
348     setOperationAction(ISD::UREM, MVT::i32, Expand);
349     setOperationAction(ISD::SREM, MVT::i64, Expand);
350     setOperationAction(ISD::UREM, MVT::i64, Expand);
351   }
352 
353   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
354   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
355   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
356   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
357   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
358   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
359   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
360   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
361   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
362 
363   // Handle constrained floating-point operations of scalar.
364   // TODO: Handle SPE specific operation.
365   setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
366   setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
367   setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
368   setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
369   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
370 
371   setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
372   setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
373   setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
374   setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
375 
376   if (!Subtarget.hasSPE()) {
377     setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
378     setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
379   }
380 
381   if (Subtarget.hasVSX()) {
382     setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
383     setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
384   }
385 
386   if (Subtarget.hasFSQRT()) {
387     setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
388     setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
389   }
390 
391   if (Subtarget.hasFPRND()) {
392     setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
393     setOperationAction(ISD::STRICT_FCEIL,  MVT::f32, Legal);
394     setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
395     setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
396 
397     setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
398     setOperationAction(ISD::STRICT_FCEIL,  MVT::f64, Legal);
399     setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
400     setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
401   }
402 
403   // We don't support sin/cos/sqrt/fmod/pow
404   setOperationAction(ISD::FSIN , MVT::f64, Expand);
405   setOperationAction(ISD::FCOS , MVT::f64, Expand);
406   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
407   setOperationAction(ISD::FREM , MVT::f64, Expand);
408   setOperationAction(ISD::FPOW , MVT::f64, Expand);
409   setOperationAction(ISD::FSIN , MVT::f32, Expand);
410   setOperationAction(ISD::FCOS , MVT::f32, Expand);
411   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
412   setOperationAction(ISD::FREM , MVT::f32, Expand);
413   setOperationAction(ISD::FPOW , MVT::f32, Expand);
414 
415   // MASS transformation for LLVM intrinsics with replicating fast-math flag
416   // to be consistent to PPCGenScalarMASSEntries pass
417   if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
418     setOperationAction(ISD::FSIN , MVT::f64, Custom);
419     setOperationAction(ISD::FCOS , MVT::f64, Custom);
420     setOperationAction(ISD::FPOW , MVT::f64, Custom);
421     setOperationAction(ISD::FLOG, MVT::f64, Custom);
422     setOperationAction(ISD::FLOG10, MVT::f64, Custom);
423     setOperationAction(ISD::FEXP, MVT::f64, Custom);
424     setOperationAction(ISD::FSIN , MVT::f32, Custom);
425     setOperationAction(ISD::FCOS , MVT::f32, Custom);
426     setOperationAction(ISD::FPOW , MVT::f32, Custom);
427     setOperationAction(ISD::FLOG, MVT::f32, Custom);
428     setOperationAction(ISD::FLOG10, MVT::f32, Custom);
429     setOperationAction(ISD::FEXP, MVT::f32, Custom);
430   }
431 
432   if (Subtarget.hasSPE()) {
433     setOperationAction(ISD::FMA  , MVT::f64, Expand);
434     setOperationAction(ISD::FMA  , MVT::f32, Expand);
435   } else {
436     setOperationAction(ISD::FMA  , MVT::f64, Legal);
437     setOperationAction(ISD::FMA  , MVT::f32, Legal);
438     setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
439     setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
440   }
441 
442   if (Subtarget.hasSPE())
443     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
444 
445   // If we're enabling GP optimizations, use hardware square root
446   if (!Subtarget.hasFSQRT() &&
447       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
448         Subtarget.hasFRE()))
449     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
450 
451   if (!Subtarget.hasFSQRT() &&
452       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
453         Subtarget.hasFRES()))
454     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
455 
456   if (Subtarget.hasFCPSGN()) {
457     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
458     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
459   } else {
460     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
461     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
462   }
463 
464   if (Subtarget.hasFPRND()) {
465     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
466     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
467     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
468     setOperationAction(ISD::FROUND, MVT::f64, Legal);
469 
470     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
471     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
472     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
473     setOperationAction(ISD::FROUND, MVT::f32, Legal);
474   }
475 
476   // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
477   // instruction xxbrd to speed up scalar BSWAP64.
478   if (Subtarget.isISA3_1()) {
479     setOperationAction(ISD::BSWAP, MVT::i32, Legal);
480     setOperationAction(ISD::BSWAP, MVT::i64, Legal);
481   } else {
482     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
483     setOperationAction(ISD::BSWAP, MVT::i64,
484                        (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
485   }
486 
487   // CTPOP or CTTZ were introduced in P8/P9 respectively
488   if (Subtarget.isISA3_0()) {
489     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
490     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
491   } else {
492     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
493     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
494   }
495 
496   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
497     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
498     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
499   } else {
500     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
501     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
502   }
503 
504   // PowerPC does not have ROTR
505   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
506   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
507 
508   if (!Subtarget.useCRBits()) {
509     // PowerPC does not have Select
510     setOperationAction(ISD::SELECT, MVT::i32, Expand);
511     setOperationAction(ISD::SELECT, MVT::i64, Expand);
512     setOperationAction(ISD::SELECT, MVT::f32, Expand);
513     setOperationAction(ISD::SELECT, MVT::f64, Expand);
514   }
515 
516   // PowerPC wants to turn select_cc of FP into fsel when possible.
517   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
518   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
519 
520   // PowerPC wants to optimize integer setcc a bit
521   if (!Subtarget.useCRBits())
522     setOperationAction(ISD::SETCC, MVT::i32, Custom);
523 
524   if (Subtarget.hasFPU()) {
525     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
526     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
527     setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
528 
529     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
530     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
531     setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
532   }
533 
534   // PowerPC does not have BRCOND which requires SetCC
535   if (!Subtarget.useCRBits())
536     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
537 
538   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
539 
540   if (Subtarget.hasSPE()) {
541     // SPE has built-in conversions
542     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
543     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
544     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
545     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
546     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
547     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
548 
549     // SPE supports signaling compare of f32/f64.
550     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
551     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
552   } else {
553     // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
554     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
555     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
556 
557     // PowerPC does not have [U|S]INT_TO_FP
558     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
559     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
560     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
561     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
562   }
563 
564   if (Subtarget.hasDirectMove() && isPPC64) {
565     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
566     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
567     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
568     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
569     if (TM.Options.UnsafeFPMath) {
570       setOperationAction(ISD::LRINT, MVT::f64, Legal);
571       setOperationAction(ISD::LRINT, MVT::f32, Legal);
572       setOperationAction(ISD::LLRINT, MVT::f64, Legal);
573       setOperationAction(ISD::LLRINT, MVT::f32, Legal);
574       setOperationAction(ISD::LROUND, MVT::f64, Legal);
575       setOperationAction(ISD::LROUND, MVT::f32, Legal);
576       setOperationAction(ISD::LLROUND, MVT::f64, Legal);
577       setOperationAction(ISD::LLROUND, MVT::f32, Legal);
578     }
579   } else {
580     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
581     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
582     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
583     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
584   }
585 
586   // We cannot sextinreg(i1).  Expand to shifts.
587   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
588 
589   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
590   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
591   // support continuation, user-level threading, and etc.. As a result, no
592   // other SjLj exception interfaces are implemented and please don't build
593   // your own exception handling based on them.
594   // LLVM/Clang supports zero-cost DWARF exception handling.
595   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
596   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
597 
598   // We want to legalize GlobalAddress and ConstantPool nodes into the
599   // appropriate instructions to materialize the address.
600   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
601   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
602   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
603   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
604   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
605   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
606   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
607   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
608   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
609   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
610 
611   // TRAP is legal.
612   setOperationAction(ISD::TRAP, MVT::Other, Legal);
613 
614   // TRAMPOLINE is custom lowered.
615   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
616   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
617 
618   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
619   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
620 
621   if (Subtarget.is64BitELFABI()) {
622     // VAARG always uses double-word chunks, so promote anything smaller.
623     setOperationAction(ISD::VAARG, MVT::i1, Promote);
624     AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
625     setOperationAction(ISD::VAARG, MVT::i8, Promote);
626     AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
627     setOperationAction(ISD::VAARG, MVT::i16, Promote);
628     AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
629     setOperationAction(ISD::VAARG, MVT::i32, Promote);
630     AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
631     setOperationAction(ISD::VAARG, MVT::Other, Expand);
632   } else if (Subtarget.is32BitELFABI()) {
633     // VAARG is custom lowered with the 32-bit SVR4 ABI.
634     setOperationAction(ISD::VAARG, MVT::Other, Custom);
635     setOperationAction(ISD::VAARG, MVT::i64, Custom);
636   } else
637     setOperationAction(ISD::VAARG, MVT::Other, Expand);
638 
639   // VACOPY is custom lowered with the 32-bit SVR4 ABI.
640   if (Subtarget.is32BitELFABI())
641     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
642   else
643     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
644 
645   // Use the default implementation.
646   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
647   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
648   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
649   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
650   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
651   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
652   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
653   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
654   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
655 
656   // We want to custom lower some of our intrinsics.
657   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
658   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
659   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
660   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
661   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
662 
663   // To handle counter-based loop conditions.
664   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
665 
666   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
667   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
668   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
669   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
670 
671   // Comparisons that require checking two conditions.
672   if (Subtarget.hasSPE()) {
673     setCondCodeAction(ISD::SETO, MVT::f32, Expand);
674     setCondCodeAction(ISD::SETO, MVT::f64, Expand);
675     setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
676     setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
677   }
678   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
679   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
680   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
681   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
682   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
683   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
684   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
685   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
686   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
687   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
688   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
689   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
690 
691   setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
692   setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
693 
694   if (Subtarget.has64BitSupport()) {
695     // They also have instructions for converting between i64 and fp.
696     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
697     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
698     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
699     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
700     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
701     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
702     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
703     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
704     // This is just the low 32 bits of a (signed) fp->i64 conversion.
705     // We cannot do this with Promote because i64 is not a legal type.
706     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
707     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
708 
709     if (Subtarget.hasLFIWAX() || isPPC64) {
710       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
711       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
712     }
713   } else {
714     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
715     if (Subtarget.hasSPE()) {
716       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
717       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
718     } else {
719       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
720       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
721     }
722   }
723 
724   // With the instructions enabled under FPCVT, we can do everything.
725   if (Subtarget.hasFPCVT()) {
726     if (Subtarget.has64BitSupport()) {
727       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
728       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
729       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
730       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
731       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
732       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
733       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
734       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
735     }
736 
737     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
738     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
739     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
740     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
741     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
742     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
743     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
744     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
745   }
746 
747   if (Subtarget.use64BitRegs()) {
748     // 64-bit PowerPC implementations can support i64 types directly
749     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
750     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
751     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
752     // 64-bit PowerPC wants to expand i128 shifts itself.
753     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
754     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
755     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
756   } else {
757     // 32-bit PowerPC wants to expand i64 shifts itself.
758     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
759     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
760     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
761   }
762 
763   // PowerPC has better expansions for funnel shifts than the generic
764   // TargetLowering::expandFunnelShift.
765   if (Subtarget.has64BitSupport()) {
766     setOperationAction(ISD::FSHL, MVT::i64, Custom);
767     setOperationAction(ISD::FSHR, MVT::i64, Custom);
768   }
769   setOperationAction(ISD::FSHL, MVT::i32, Custom);
770   setOperationAction(ISD::FSHR, MVT::i32, Custom);
771 
772   if (Subtarget.hasVSX()) {
773     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
774     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
775     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
776     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
777   }
778 
779   if (Subtarget.hasAltivec()) {
780     for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
781       setOperationAction(ISD::SADDSAT, VT, Legal);
782       setOperationAction(ISD::SSUBSAT, VT, Legal);
783       setOperationAction(ISD::UADDSAT, VT, Legal);
784       setOperationAction(ISD::USUBSAT, VT, Legal);
785     }
786     // First set operation action for all vector types to expand. Then we
787     // will selectively turn on ones that can be effectively codegen'd.
788     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
789       // add/sub are legal for all supported vector VT's.
790       setOperationAction(ISD::ADD, VT, Legal);
791       setOperationAction(ISD::SUB, VT, Legal);
792 
793       // For v2i64, these are only valid with P8Vector. This is corrected after
794       // the loop.
795       if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
796         setOperationAction(ISD::SMAX, VT, Legal);
797         setOperationAction(ISD::SMIN, VT, Legal);
798         setOperationAction(ISD::UMAX, VT, Legal);
799         setOperationAction(ISD::UMIN, VT, Legal);
800       }
801       else {
802         setOperationAction(ISD::SMAX, VT, Expand);
803         setOperationAction(ISD::SMIN, VT, Expand);
804         setOperationAction(ISD::UMAX, VT, Expand);
805         setOperationAction(ISD::UMIN, VT, Expand);
806       }
807 
808       if (Subtarget.hasVSX()) {
809         setOperationAction(ISD::FMAXNUM, VT, Legal);
810         setOperationAction(ISD::FMINNUM, VT, Legal);
811       }
812 
813       // Vector instructions introduced in P8
814       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
815         setOperationAction(ISD::CTPOP, VT, Legal);
816         setOperationAction(ISD::CTLZ, VT, Legal);
817       }
818       else {
819         setOperationAction(ISD::CTPOP, VT, Expand);
820         setOperationAction(ISD::CTLZ, VT, Expand);
821       }
822 
823       // Vector instructions introduced in P9
824       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
825         setOperationAction(ISD::CTTZ, VT, Legal);
826       else
827         setOperationAction(ISD::CTTZ, VT, Expand);
828 
829       // We promote all shuffles to v16i8.
830       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
831       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
832 
833       // We promote all non-typed operations to v4i32.
834       setOperationAction(ISD::AND   , VT, Promote);
835       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
836       setOperationAction(ISD::OR    , VT, Promote);
837       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
838       setOperationAction(ISD::XOR   , VT, Promote);
839       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
840       setOperationAction(ISD::LOAD  , VT, Promote);
841       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
842       setOperationAction(ISD::SELECT, VT, Promote);
843       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
844       setOperationAction(ISD::VSELECT, VT, Legal);
845       setOperationAction(ISD::SELECT_CC, VT, Promote);
846       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
847       setOperationAction(ISD::STORE, VT, Promote);
848       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
849 
850       // No other operations are legal.
851       setOperationAction(ISD::MUL , VT, Expand);
852       setOperationAction(ISD::SDIV, VT, Expand);
853       setOperationAction(ISD::SREM, VT, Expand);
854       setOperationAction(ISD::UDIV, VT, Expand);
855       setOperationAction(ISD::UREM, VT, Expand);
856       setOperationAction(ISD::FDIV, VT, Expand);
857       setOperationAction(ISD::FREM, VT, Expand);
858       setOperationAction(ISD::FNEG, VT, Expand);
859       setOperationAction(ISD::FSQRT, VT, Expand);
860       setOperationAction(ISD::FLOG, VT, Expand);
861       setOperationAction(ISD::FLOG10, VT, Expand);
862       setOperationAction(ISD::FLOG2, VT, Expand);
863       setOperationAction(ISD::FEXP, VT, Expand);
864       setOperationAction(ISD::FEXP2, VT, Expand);
865       setOperationAction(ISD::FSIN, VT, Expand);
866       setOperationAction(ISD::FCOS, VT, Expand);
867       setOperationAction(ISD::FABS, VT, Expand);
868       setOperationAction(ISD::FFLOOR, VT, Expand);
869       setOperationAction(ISD::FCEIL,  VT, Expand);
870       setOperationAction(ISD::FTRUNC, VT, Expand);
871       setOperationAction(ISD::FRINT,  VT, Expand);
872       setOperationAction(ISD::FLDEXP, VT, Expand);
873       setOperationAction(ISD::FNEARBYINT, VT, Expand);
874       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
875       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
876       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
877       setOperationAction(ISD::MULHU, VT, Expand);
878       setOperationAction(ISD::MULHS, VT, Expand);
879       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
880       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
881       setOperationAction(ISD::UDIVREM, VT, Expand);
882       setOperationAction(ISD::SDIVREM, VT, Expand);
883       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
884       setOperationAction(ISD::FPOW, VT, Expand);
885       setOperationAction(ISD::BSWAP, VT, Expand);
886       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
887       setOperationAction(ISD::ROTL, VT, Expand);
888       setOperationAction(ISD::ROTR, VT, Expand);
889 
890       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
891         setTruncStoreAction(VT, InnerVT, Expand);
892         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
893         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
894         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
895       }
896     }
897     setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
898     if (!Subtarget.hasP8Vector()) {
899       setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
900       setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
901       setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
902       setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
903     }
904 
905     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
906     // with merges, splats, etc.
907     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
908 
909     // Vector truncates to sub-word integer that fit in an Altivec/VSX register
910     // are cheap, so handle them before they get expanded to scalar.
911     setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
912     setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
913     setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
914     setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
915     setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
916 
917     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
918     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
919     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
920     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
921     setOperationAction(ISD::SELECT, MVT::v4i32,
922                        Subtarget.useCRBits() ? Legal : Expand);
923     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
924     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
925     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
926     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
927     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
928     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
929     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
930     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
931     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
932     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
933     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
934     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
935     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
936 
937     // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
938     setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
939     // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
940     if (Subtarget.hasAltivec())
941       for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
942         setOperationAction(ISD::ROTL, VT, Legal);
943     // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
944     if (Subtarget.hasP8Altivec())
945       setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
946 
947     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
948     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
949     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
950     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
951 
952     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
953     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
954 
955     if (Subtarget.hasVSX()) {
956       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
957       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
958       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
959     }
960 
961     if (Subtarget.hasP8Altivec())
962       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
963     else
964       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
965 
966     if (Subtarget.isISA3_1()) {
967       setOperationAction(ISD::MUL, MVT::v2i64, Legal);
968       setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
969       setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
970       setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
971       setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
972       setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
973       setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
974       setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
975       setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
976       setOperationAction(ISD::UREM, MVT::v2i64, Legal);
977       setOperationAction(ISD::SREM, MVT::v2i64, Legal);
978       setOperationAction(ISD::UREM, MVT::v4i32, Legal);
979       setOperationAction(ISD::SREM, MVT::v4i32, Legal);
980       setOperationAction(ISD::UREM, MVT::v1i128, Legal);
981       setOperationAction(ISD::SREM, MVT::v1i128, Legal);
982       setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
983       setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
984       setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
985     }
986 
987     setOperationAction(ISD::MUL, MVT::v8i16, Legal);
988     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
989 
990     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
991     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
992     // LE is P8+/64-bit so direct moves are supported and these operations
993     // are legal. The custom transformation requires 64-bit since we need a
994     // pair of stores that will cover a 128-bit load for P10.
995     if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
996       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Custom);
997       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
998       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
999     }
1000 
1001     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
1002     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
1003     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
1004     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
1005 
1006     // Altivec does not contain unordered floating-point compare instructions
1007     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1008     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1009     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
1010     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1011 
1012     if (Subtarget.hasVSX()) {
1013       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
1014       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
1015       if (Subtarget.hasP8Vector()) {
1016         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
1017         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
1018       }
1019       if (Subtarget.hasDirectMove() && isPPC64) {
1020         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
1021         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
1022         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
1023         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
1024         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
1025         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
1026         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
1027         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
1028       }
1029       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
1030 
1031       // The nearbyint variants are not allowed to raise the inexact exception
1032       // so we can only code-gen them with unsafe math.
1033       if (TM.Options.UnsafeFPMath) {
1034         setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1035         setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1036       }
1037 
1038       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1039       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1040       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1041       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1042       setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1043       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1044       setOperationAction(ISD::FROUND, MVT::f64, Legal);
1045       setOperationAction(ISD::FRINT, MVT::f64, Legal);
1046 
1047       setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1048       setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1049       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1050       setOperationAction(ISD::FROUND, MVT::f32, Legal);
1051       setOperationAction(ISD::FRINT, MVT::f32, Legal);
1052 
1053       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1054       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1055 
1056       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1057       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1058 
1059       // Share the Altivec comparison restrictions.
1060       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1061       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1062       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
1063       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1064 
1065       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1066       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1067 
1068       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
1069 
1070       if (Subtarget.hasP8Vector())
1071         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1072 
1073       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1074 
1075       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1076       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1077       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1078 
1079       if (Subtarget.hasP8Altivec()) {
1080         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1081         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1082         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1083 
1084         // 128 bit shifts can be accomplished via 3 instructions for SHL and
1085         // SRL, but not for SRA because of the instructions available:
1086         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1087         // doing
1088         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1089         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1090         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1091 
1092         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1093       }
1094       else {
1095         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1096         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1097         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1098 
1099         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1100 
1101         // VSX v2i64 only supports non-arithmetic operations.
1102         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1103         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1104       }
1105 
1106       if (Subtarget.isISA3_1())
1107         setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1108       else
1109         setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1110 
1111       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1112       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1113       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1114       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1115 
1116       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
1117 
1118       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1119       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1120       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1121       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1122       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1123       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1124       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1125       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1126 
1127       // Custom handling for partial vectors of integers converted to
1128       // floating point. We already have optimal handling for v2i32 through
1129       // the DAG combine, so those aren't necessary.
1130       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1131       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1132       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1133       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1134       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1135       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1136       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1137       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1138       setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1139       setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1140       setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1141       setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1142       setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1143       setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1144       setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1145       setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1146 
1147       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1148       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1149       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1150       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1151       setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1152       setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1153 
1154       setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1155       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1156 
1157       // Handle constrained floating-point operations of vector.
1158       // The predictor is `hasVSX` because altivec instruction has
1159       // no exception but VSX vector instruction has.
1160       setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1161       setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1162       setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1163       setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1164       setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1165       setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1166       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1167       setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1168       setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1169       setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1170       setOperationAction(ISD::STRICT_FCEIL,  MVT::v4f32, Legal);
1171       setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1172       setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1173 
1174       setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1175       setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1176       setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1177       setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1178       setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1179       setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1180       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1181       setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1182       setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1183       setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1184       setOperationAction(ISD::STRICT_FCEIL,  MVT::v2f64, Legal);
1185       setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1186       setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1187 
1188       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1189       addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1190 
1191       for (MVT FPT : MVT::fp_valuetypes())
1192         setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1193 
1194       // Expand the SELECT to SELECT_CC
1195       setOperationAction(ISD::SELECT, MVT::f128, Expand);
1196 
1197       setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1198       setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1199 
1200       // No implementation for these ops for PowerPC.
1201       setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
1202       setOperationAction(ISD::FSIN, MVT::f128, Expand);
1203       setOperationAction(ISD::FCOS, MVT::f128, Expand);
1204       setOperationAction(ISD::FPOW, MVT::f128, Expand);
1205       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1206       setOperationAction(ISD::FREM, MVT::f128, Expand);
1207     }
1208 
1209     if (Subtarget.hasP8Altivec()) {
1210       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1211       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1212     }
1213 
1214     if (Subtarget.hasP9Vector()) {
1215       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1216       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1217 
1218       // Test data class instructions store results in CR bits.
1219       if (Subtarget.useCRBits()) {
1220         setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
1221         setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
1222         setOperationAction(ISD::IS_FPCLASS, MVT::f128, Custom);
1223         setOperationAction(ISD::IS_FPCLASS, MVT::ppcf128, Custom);
1224       }
1225 
1226       // 128 bit shifts can be accomplished via 3 instructions for SHL and
1227       // SRL, but not for SRA because of the instructions available:
1228       // VS{RL} and VS{RL}O.
1229       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1230       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1231       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1232 
1233       setOperationAction(ISD::FADD, MVT::f128, Legal);
1234       setOperationAction(ISD::FSUB, MVT::f128, Legal);
1235       setOperationAction(ISD::FDIV, MVT::f128, Legal);
1236       setOperationAction(ISD::FMUL, MVT::f128, Legal);
1237       setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1238 
1239       setOperationAction(ISD::FMA, MVT::f128, Legal);
1240       setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1241       setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1242       setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1243       setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1244       setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1245       setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1246 
1247       setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1248       setOperationAction(ISD::FRINT, MVT::f128, Legal);
1249       setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1250       setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1251       setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1252       setOperationAction(ISD::FROUND, MVT::f128, Legal);
1253 
1254       setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1255       setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1256       setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1257 
1258       // Handle constrained floating-point operations of fp128
1259       setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1260       setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1261       setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1262       setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1263       setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1264       setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1265       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1266       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1267       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1268       setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1269       setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1270       setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1271       setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1272       setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1273       setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1274       setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1275       setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1276       setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1277       setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1278       setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1279     } else if (Subtarget.hasVSX()) {
1280       setOperationAction(ISD::LOAD, MVT::f128, Promote);
1281       setOperationAction(ISD::STORE, MVT::f128, Promote);
1282 
1283       AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1284       AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1285 
1286       // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1287       // fp_to_uint and int_to_fp.
1288       setOperationAction(ISD::FADD, MVT::f128, LibCall);
1289       setOperationAction(ISD::FSUB, MVT::f128, LibCall);
1290 
1291       setOperationAction(ISD::FMUL, MVT::f128, Expand);
1292       setOperationAction(ISD::FDIV, MVT::f128, Expand);
1293       setOperationAction(ISD::FNEG, MVT::f128, Expand);
1294       setOperationAction(ISD::FABS, MVT::f128, Expand);
1295       setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1296       setOperationAction(ISD::FMA, MVT::f128, Expand);
1297       setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
1298 
1299       // Expand the fp_extend if the target type is fp128.
1300       setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1301       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand);
1302 
1303       // Expand the fp_round if the source type is fp128.
1304       for (MVT VT : {MVT::f32, MVT::f64}) {
1305         setOperationAction(ISD::FP_ROUND, VT, Custom);
1306         setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
1307       }
1308 
1309       setOperationAction(ISD::SETCC, MVT::f128, Custom);
1310       setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
1311       setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
1312       setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1313 
1314       // Lower following f128 select_cc pattern:
1315       // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1316       setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
1317 
1318       // We need to handle f128 SELECT_CC with integer result type.
1319       setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1320       setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1321     }
1322 
1323     if (Subtarget.hasP9Altivec()) {
1324       if (Subtarget.isISA3_1()) {
1325         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
1326         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
1327         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
1328         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
1329       } else {
1330         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1331         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1332       }
1333       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8,  Legal);
1334       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1335       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1336       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1337       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1338       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1339       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1340 
1341       setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1342       setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1343       setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1344       setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1345     }
1346 
1347     if (Subtarget.hasP10Vector()) {
1348       setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
1349     }
1350   }
1351 
1352   if (Subtarget.pairedVectorMemops()) {
1353     addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1354     setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1355     setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1356   }
1357   if (Subtarget.hasMMA()) {
1358     if (Subtarget.isISAFuture())
1359       addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1360     else
1361       addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1362     setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1363     setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1364     setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1365   }
1366 
1367   if (Subtarget.has64BitSupport())
1368     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1369 
1370   if (Subtarget.isISA3_1())
1371     setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1372 
1373   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1374 
1375   if (!isPPC64) {
1376     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1377     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1378   }
1379 
1380   if (shouldInlineQuadwordAtomics()) {
1381     setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
1382     setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
1383     setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom);
1384   }
1385 
1386   setBooleanContents(ZeroOrOneBooleanContent);
1387 
1388   if (Subtarget.hasAltivec()) {
1389     // Altivec instructions set fields to all zeros or all ones.
1390     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1391   }
1392 
1393   if (shouldInlineQuadwordAtomics())
1394     setMaxAtomicSizeInBitsSupported(128);
1395   else if (isPPC64)
1396     setMaxAtomicSizeInBitsSupported(64);
1397   else
1398     setMaxAtomicSizeInBitsSupported(32);
1399 
1400   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1401 
1402   // We have target-specific dag combine patterns for the following nodes:
1403   setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
1404                        ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
1405   if (Subtarget.hasFPCVT())
1406     setTargetDAGCombine(ISD::UINT_TO_FP);
1407   setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1408   if (Subtarget.useCRBits())
1409     setTargetDAGCombine(ISD::BRCOND);
1410   setTargetDAGCombine({ISD::BSWAP, ISD::INTRINSIC_WO_CHAIN,
1411                        ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID});
1412 
1413   setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, ISD::ANY_EXTEND});
1414 
1415   setTargetDAGCombine({ISD::TRUNCATE, ISD::VECTOR_SHUFFLE});
1416 
1417   if (Subtarget.useCRBits()) {
1418     setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
1419   }
1420 
1421   setLibcallName(RTLIB::LOG_F128, "logf128");
1422   setLibcallName(RTLIB::LOG2_F128, "log2f128");
1423   setLibcallName(RTLIB::LOG10_F128, "log10f128");
1424   setLibcallName(RTLIB::EXP_F128, "expf128");
1425   setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1426   setLibcallName(RTLIB::SIN_F128, "sinf128");
1427   setLibcallName(RTLIB::COS_F128, "cosf128");
1428   setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1429   setLibcallName(RTLIB::POW_F128, "powf128");
1430   setLibcallName(RTLIB::FMIN_F128, "fminf128");
1431   setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1432   setLibcallName(RTLIB::REM_F128, "fmodf128");
1433   setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1434   setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1435   setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1436   setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1437   setLibcallName(RTLIB::ROUND_F128, "roundf128");
1438   setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1439   setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1440   setLibcallName(RTLIB::RINT_F128, "rintf128");
1441   setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1442   setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1443   setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1444   setLibcallName(RTLIB::FMA_F128, "fmaf128");
1445   setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1446 
1447   if (Subtarget.isAIXABI()) {
1448     setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1449     setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1450     setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1451     setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1452   }
1453 
1454   // With 32 condition bits, we don't need to sink (and duplicate) compares
1455   // aggressively in CodeGenPrep.
1456   if (Subtarget.useCRBits()) {
1457     setHasMultipleConditionRegisters();
1458     setJumpIsExpensive();
1459   }
1460 
1461   // TODO: The default entry number is set to 64. This stops most jump table
1462   // generation on PPC. But it is good for current PPC HWs because the indirect
1463   // branch instruction mtctr to the jump table may lead to bad branch predict.
1464   // Re-evaluate this value on future HWs that can do better with mtctr.
1465   setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
1466 
1467   setMinFunctionAlignment(Align(4));
1468 
1469   switch (Subtarget.getCPUDirective()) {
1470   default: break;
1471   case PPC::DIR_970:
1472   case PPC::DIR_A2:
1473   case PPC::DIR_E500:
1474   case PPC::DIR_E500mc:
1475   case PPC::DIR_E5500:
1476   case PPC::DIR_PWR4:
1477   case PPC::DIR_PWR5:
1478   case PPC::DIR_PWR5X:
1479   case PPC::DIR_PWR6:
1480   case PPC::DIR_PWR6X:
1481   case PPC::DIR_PWR7:
1482   case PPC::DIR_PWR8:
1483   case PPC::DIR_PWR9:
1484   case PPC::DIR_PWR10:
1485   case PPC::DIR_PWR11:
1486   case PPC::DIR_PWR_FUTURE:
1487     setPrefLoopAlignment(Align(16));
1488     setPrefFunctionAlignment(Align(16));
1489     break;
1490   }
1491 
1492   if (Subtarget.enableMachineScheduler())
1493     setSchedulingPreference(Sched::Source);
1494   else
1495     setSchedulingPreference(Sched::Hybrid);
1496 
1497   computeRegisterProperties(STI.getRegisterInfo());
1498 
1499   // The Freescale cores do better with aggressive inlining of memcpy and
1500   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1501   if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1502       Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1503     MaxStoresPerMemset = 32;
1504     MaxStoresPerMemsetOptSize = 16;
1505     MaxStoresPerMemcpy = 32;
1506     MaxStoresPerMemcpyOptSize = 8;
1507     MaxStoresPerMemmove = 32;
1508     MaxStoresPerMemmoveOptSize = 8;
1509   } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1510     // The A2 also benefits from (very) aggressive inlining of memcpy and
1511     // friends. The overhead of a the function call, even when warm, can be
1512     // over one hundred cycles.
1513     MaxStoresPerMemset = 128;
1514     MaxStoresPerMemcpy = 128;
1515     MaxStoresPerMemmove = 128;
1516     MaxLoadsPerMemcmp = 128;
1517   } else {
1518     MaxLoadsPerMemcmp = 8;
1519     MaxLoadsPerMemcmpOptSize = 4;
1520   }
1521 
1522   IsStrictFPEnabled = true;
1523 
1524   // Let the subtarget (CPU) decide if a predictable select is more expensive
1525   // than the corresponding branch. This information is used in CGP to decide
1526   // when to convert selects into branches.
1527   PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1528 
1529   GatherAllAliasesMaxDepth = PPCGatherAllAliasesMaxDepth;
1530 }
1531 
1532 // *********************************** NOTE ************************************
1533 // For selecting load and store instructions, the addressing modes are defined
1534 // as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1535 // patterns to match the load the store instructions.
1536 //
1537 // The TD definitions for the addressing modes correspond to their respective
1538 // Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1539 // on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1540 // address mode flags of a particular node. Afterwards, the computed address
1541 // flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1542 // addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1543 // accordingly, based on the preferred addressing mode.
1544 //
1545 // Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1546 // MemOpFlags contains all the possible flags that can be used to compute the
1547 // optimal addressing mode for load and store instructions.
1548 // AddrMode contains all the possible load and store addressing modes available
1549 // on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1550 //
1551 // When adding new load and store instructions, it is possible that new address
1552 // flags may need to be added into MemOpFlags, and a new addressing mode will
1553 // need to be added to AddrMode. An entry of the new addressing mode (consisting
1554 // of the minimal and main distinguishing address flags for the new load/store
1555 // instructions) will need to be added into initializeAddrModeMap() below.
1556 // Finally, when adding new addressing modes, the getAddrModeForFlags() will
1557 // need to be updated to account for selecting the optimal addressing mode.
1558 // *****************************************************************************
1559 /// Initialize the map that relates the different addressing modes of the load
1560 /// and store instructions to a set of flags. This ensures the load/store
1561 /// instruction is correctly matched during instruction selection.
1562 void PPCTargetLowering::initializeAddrModeMap() {
1563   AddrModesMap[PPC::AM_DForm] = {
1564       // LWZ, STW
1565       PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_WordInt,
1566       PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_WordInt,
1567       PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1568       PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1569       // LBZ, LHZ, STB, STH
1570       PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1571       PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1572       PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1573       PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1574       // LHA
1575       PPC::MOF_SExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1576       PPC::MOF_SExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1577       PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1578       PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1579       // LFS, LFD, STFS, STFD
1580       PPC::MOF_RPlusSImm16 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1581       PPC::MOF_RPlusLo | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1582       PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1583       PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1584   };
1585   AddrModesMap[PPC::AM_DSForm] = {
1586       // LWA
1587       PPC::MOF_SExt | PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_WordInt,
1588       PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1589       PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1590       // LD, STD
1591       PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_DoubleWordInt,
1592       PPC::MOF_NotAddNorCst | PPC::MOF_DoubleWordInt,
1593       PPC::MOF_AddrIsSImm32 | PPC::MOF_DoubleWordInt,
1594       // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1595       PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1596       PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1597       PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1598   };
1599   AddrModesMap[PPC::AM_DQForm] = {
1600       // LXV, STXV
1601       PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1602       PPC::MOF_NotAddNorCst | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1603       PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1604   };
1605   AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1606                                        PPC::MOF_SubtargetP10};
1607   // TODO: Add mapping for quadword load/store.
1608 }
1609 
1610 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1611 /// the desired ByVal argument alignment.
1612 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1613   if (MaxAlign == MaxMaxAlign)
1614     return;
1615   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1616     if (MaxMaxAlign >= 32 &&
1617         VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1618       MaxAlign = Align(32);
1619     else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1620              MaxAlign < 16)
1621       MaxAlign = Align(16);
1622   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1623     Align EltAlign;
1624     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1625     if (EltAlign > MaxAlign)
1626       MaxAlign = EltAlign;
1627   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1628     for (auto *EltTy : STy->elements()) {
1629       Align EltAlign;
1630       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1631       if (EltAlign > MaxAlign)
1632         MaxAlign = EltAlign;
1633       if (MaxAlign == MaxMaxAlign)
1634         break;
1635     }
1636   }
1637 }
1638 
1639 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1640 /// function arguments in the caller parameter area.
1641 Align PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1642                                                const DataLayout &DL) const {
1643   // 16byte and wider vectors are passed on 16byte boundary.
1644   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1645   Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1646   if (Subtarget.hasAltivec())
1647     getMaxByValAlign(Ty, Alignment, Align(16));
1648   return Alignment;
1649 }
1650 
1651 bool PPCTargetLowering::useSoftFloat() const {
1652   return Subtarget.useSoftFloat();
1653 }
1654 
1655 bool PPCTargetLowering::hasSPE() const {
1656   return Subtarget.hasSPE();
1657 }
1658 
1659 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1660   return VT.isScalarInteger();
1661 }
1662 
1663 bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore(
1664     Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1665   if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1666     return false;
1667 
1668   if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1669     if (VTy->getScalarType()->isIntegerTy()) {
1670       // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1671       if (ElemSizeInBits == 32) {
1672         Index = Subtarget.isLittleEndian() ? 2 : 1;
1673         return true;
1674       }
1675       if (ElemSizeInBits == 64) {
1676         Index = Subtarget.isLittleEndian() ? 1 : 0;
1677         return true;
1678       }
1679     }
1680   }
1681   return false;
1682 }
1683 
1684 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1685   switch ((PPCISD::NodeType)Opcode) {
1686   case PPCISD::FIRST_NUMBER:    break;
1687   case PPCISD::FSEL:            return "PPCISD::FSEL";
1688   case PPCISD::XSMAXC:          return "PPCISD::XSMAXC";
1689   case PPCISD::XSMINC:          return "PPCISD::XSMINC";
1690   case PPCISD::FCFID:           return "PPCISD::FCFID";
1691   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1692   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1693   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1694   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1695   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1696   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1697   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1698   case PPCISD::FRE:             return "PPCISD::FRE";
1699   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1700   case PPCISD::FTSQRT:
1701     return "PPCISD::FTSQRT";
1702   case PPCISD::FSQRT:
1703     return "PPCISD::FSQRT";
1704   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1705   case PPCISD::VPERM:           return "PPCISD::VPERM";
1706   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1707   case PPCISD::XXSPLTI_SP_TO_DP:
1708     return "PPCISD::XXSPLTI_SP_TO_DP";
1709   case PPCISD::XXSPLTI32DX:
1710     return "PPCISD::XXSPLTI32DX";
1711   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1712   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1713   case PPCISD::XXPERM:
1714     return "PPCISD::XXPERM";
1715   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1716   case PPCISD::CMPB:            return "PPCISD::CMPB";
1717   case PPCISD::Hi:              return "PPCISD::Hi";
1718   case PPCISD::Lo:              return "PPCISD::Lo";
1719   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1720   case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1721   case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1722   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1723   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1724   case PPCISD::PROBED_ALLOCA:   return "PPCISD::PROBED_ALLOCA";
1725   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1726   case PPCISD::SRL:             return "PPCISD::SRL";
1727   case PPCISD::SRA:             return "PPCISD::SRA";
1728   case PPCISD::SHL:             return "PPCISD::SHL";
1729   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1730   case PPCISD::CALL:            return "PPCISD::CALL";
1731   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1732   case PPCISD::CALL_NOTOC:      return "PPCISD::CALL_NOTOC";
1733   case PPCISD::CALL_RM:
1734     return "PPCISD::CALL_RM";
1735   case PPCISD::CALL_NOP_RM:
1736     return "PPCISD::CALL_NOP_RM";
1737   case PPCISD::CALL_NOTOC_RM:
1738     return "PPCISD::CALL_NOTOC_RM";
1739   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1740   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1741   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1742   case PPCISD::BCTRL_RM:
1743     return "PPCISD::BCTRL_RM";
1744   case PPCISD::BCTRL_LOAD_TOC_RM:
1745     return "PPCISD::BCTRL_LOAD_TOC_RM";
1746   case PPCISD::RET_GLUE:        return "PPCISD::RET_GLUE";
1747   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1748   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1749   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1750   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1751   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1752   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1753   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1754   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1755   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1756   case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1757     return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1758   case PPCISD::ANDI_rec_1_EQ_BIT:
1759     return "PPCISD::ANDI_rec_1_EQ_BIT";
1760   case PPCISD::ANDI_rec_1_GT_BIT:
1761     return "PPCISD::ANDI_rec_1_GT_BIT";
1762   case PPCISD::VCMP:            return "PPCISD::VCMP";
1763   case PPCISD::VCMP_rec:        return "PPCISD::VCMP_rec";
1764   case PPCISD::LBRX:            return "PPCISD::LBRX";
1765   case PPCISD::STBRX:           return "PPCISD::STBRX";
1766   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1767   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1768   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1769   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1770   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1771   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1772   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1773   case PPCISD::LOAD_VEC_BE:     return "PPCISD::LOAD_VEC_BE";
1774   case PPCISD::STORE_VEC_BE:    return "PPCISD::STORE_VEC_BE";
1775   case PPCISD::ST_VSR_SCAL_INT:
1776                                 return "PPCISD::ST_VSR_SCAL_INT";
1777   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1778   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1779   case PPCISD::BDZ:             return "PPCISD::BDZ";
1780   case PPCISD::MFFS:            return "PPCISD::MFFS";
1781   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1782   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1783   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1784   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1785   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1786   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1787   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1788   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1789   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1790   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1791   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1792   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1793   case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1794   case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
1795   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1796   case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
1797   case PPCISD::TLSLD_AIX:       return "PPCISD::TLSLD_AIX";
1798   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1799   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1800   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1801   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1802   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1803   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1804   case PPCISD::PADDI_DTPREL:
1805     return "PPCISD::PADDI_DTPREL";
1806   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1807   case PPCISD::SC:              return "PPCISD::SC";
1808   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1809   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1810   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1811   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1812   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1813   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1814   case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1815   case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1816   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1817   case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1818   case PPCISD::FP_EXTEND_HALF:  return "PPCISD::FP_EXTEND_HALF";
1819   case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
1820   case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1821     return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1822   case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1823     return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1824   case PPCISD::ACC_BUILD:       return "PPCISD::ACC_BUILD";
1825   case PPCISD::PAIR_BUILD:      return "PPCISD::PAIR_BUILD";
1826   case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1827   case PPCISD::XXMFACC:         return "PPCISD::XXMFACC";
1828   case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
1829   case PPCISD::ZEXT_LD_SPLAT:   return "PPCISD::ZEXT_LD_SPLAT";
1830   case PPCISD::SEXT_LD_SPLAT:   return "PPCISD::SEXT_LD_SPLAT";
1831   case PPCISD::FNMSUB:          return "PPCISD::FNMSUB";
1832   case PPCISD::STRICT_FADDRTZ:
1833     return "PPCISD::STRICT_FADDRTZ";
1834   case PPCISD::STRICT_FCTIDZ:
1835     return "PPCISD::STRICT_FCTIDZ";
1836   case PPCISD::STRICT_FCTIWZ:
1837     return "PPCISD::STRICT_FCTIWZ";
1838   case PPCISD::STRICT_FCTIDUZ:
1839     return "PPCISD::STRICT_FCTIDUZ";
1840   case PPCISD::STRICT_FCTIWUZ:
1841     return "PPCISD::STRICT_FCTIWUZ";
1842   case PPCISD::STRICT_FCFID:
1843     return "PPCISD::STRICT_FCFID";
1844   case PPCISD::STRICT_FCFIDU:
1845     return "PPCISD::STRICT_FCFIDU";
1846   case PPCISD::STRICT_FCFIDS:
1847     return "PPCISD::STRICT_FCFIDS";
1848   case PPCISD::STRICT_FCFIDUS:
1849     return "PPCISD::STRICT_FCFIDUS";
1850   case PPCISD::LXVRZX:          return "PPCISD::LXVRZX";
1851   case PPCISD::STORE_COND:
1852     return "PPCISD::STORE_COND";
1853   case PPCISD::SETBC:
1854     return "PPCISD::SETBC";
1855   case PPCISD::SETBCR:
1856     return "PPCISD::SETBCR";
1857   }
1858   return nullptr;
1859 }
1860 
1861 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1862                                           EVT VT) const {
1863   if (!VT.isVector())
1864     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1865 
1866   return VT.changeVectorElementTypeToInteger();
1867 }
1868 
1869 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1870   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1871   return true;
1872 }
1873 
1874 //===----------------------------------------------------------------------===//
1875 // Node matching predicates, for use by the tblgen matching code.
1876 //===----------------------------------------------------------------------===//
1877 
1878 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1879 static bool isFloatingPointZero(SDValue Op) {
1880   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1881     return CFP->getValueAPF().isZero();
1882   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1883     // Maybe this has already been legalized into the constant pool?
1884     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1885       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1886         return CFP->getValueAPF().isZero();
1887   }
1888   return false;
1889 }
1890 
1891 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1892 /// true if Op is undef or if it matches the specified value.
1893 static bool isConstantOrUndef(int Op, int Val) {
1894   return Op < 0 || Op == Val;
1895 }
1896 
1897 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1898 /// VPKUHUM instruction.
1899 /// The ShuffleKind distinguishes between big-endian operations with
1900 /// two different inputs (0), either-endian operations with two identical
1901 /// inputs (1), and little-endian operations with two different inputs (2).
1902 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1903 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1904                                SelectionDAG &DAG) {
1905   bool IsLE = DAG.getDataLayout().isLittleEndian();
1906   if (ShuffleKind == 0) {
1907     if (IsLE)
1908       return false;
1909     for (unsigned i = 0; i != 16; ++i)
1910       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1911         return false;
1912   } else if (ShuffleKind == 2) {
1913     if (!IsLE)
1914       return false;
1915     for (unsigned i = 0; i != 16; ++i)
1916       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1917         return false;
1918   } else if (ShuffleKind == 1) {
1919     unsigned j = IsLE ? 0 : 1;
1920     for (unsigned i = 0; i != 8; ++i)
1921       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1922           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1923         return false;
1924   }
1925   return true;
1926 }
1927 
1928 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1929 /// VPKUWUM instruction.
1930 /// The ShuffleKind distinguishes between big-endian operations with
1931 /// two different inputs (0), either-endian operations with two identical
1932 /// inputs (1), and little-endian operations with two different inputs (2).
1933 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1934 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1935                                SelectionDAG &DAG) {
1936   bool IsLE = DAG.getDataLayout().isLittleEndian();
1937   if (ShuffleKind == 0) {
1938     if (IsLE)
1939       return false;
1940     for (unsigned i = 0; i != 16; i += 2)
1941       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1942           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1943         return false;
1944   } else if (ShuffleKind == 2) {
1945     if (!IsLE)
1946       return false;
1947     for (unsigned i = 0; i != 16; i += 2)
1948       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1949           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1950         return false;
1951   } else if (ShuffleKind == 1) {
1952     unsigned j = IsLE ? 0 : 2;
1953     for (unsigned i = 0; i != 8; i += 2)
1954       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1955           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1956           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1957           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1958         return false;
1959   }
1960   return true;
1961 }
1962 
1963 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1964 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1965 /// current subtarget.
1966 ///
1967 /// The ShuffleKind distinguishes between big-endian operations with
1968 /// two different inputs (0), either-endian operations with two identical
1969 /// inputs (1), and little-endian operations with two different inputs (2).
1970 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1971 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1972                                SelectionDAG &DAG) {
1973   const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1974   if (!Subtarget.hasP8Vector())
1975     return false;
1976 
1977   bool IsLE = DAG.getDataLayout().isLittleEndian();
1978   if (ShuffleKind == 0) {
1979     if (IsLE)
1980       return false;
1981     for (unsigned i = 0; i != 16; i += 4)
1982       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1983           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1984           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1985           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1986         return false;
1987   } else if (ShuffleKind == 2) {
1988     if (!IsLE)
1989       return false;
1990     for (unsigned i = 0; i != 16; i += 4)
1991       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1992           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1993           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1994           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1995         return false;
1996   } else if (ShuffleKind == 1) {
1997     unsigned j = IsLE ? 0 : 4;
1998     for (unsigned i = 0; i != 8; i += 4)
1999       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
2000           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
2001           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
2002           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
2003           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
2004           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
2005           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
2006           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
2007         return false;
2008   }
2009   return true;
2010 }
2011 
2012 /// isVMerge - Common function, used to match vmrg* shuffles.
2013 ///
2014 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2015                      unsigned LHSStart, unsigned RHSStart) {
2016   if (N->getValueType(0) != MVT::v16i8)
2017     return false;
2018   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2019          "Unsupported merge size!");
2020 
2021   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
2022     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
2023       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2024                              LHSStart+j+i*UnitSize) ||
2025           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2026                              RHSStart+j+i*UnitSize))
2027         return false;
2028     }
2029   return true;
2030 }
2031 
2032 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2033 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2034 /// The ShuffleKind distinguishes between big-endian merges with two
2035 /// different inputs (0), either-endian merges with two identical inputs (1),
2036 /// and little-endian merges with two different inputs (2).  For the latter,
2037 /// the input operands are swapped (see PPCInstrAltivec.td).
2038 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
2039                              unsigned ShuffleKind, SelectionDAG &DAG) {
2040   if (DAG.getDataLayout().isLittleEndian()) {
2041     if (ShuffleKind == 1) // unary
2042       return isVMerge(N, UnitSize, 0, 0);
2043     else if (ShuffleKind == 2) // swapped
2044       return isVMerge(N, UnitSize, 0, 16);
2045     else
2046       return false;
2047   } else {
2048     if (ShuffleKind == 1) // unary
2049       return isVMerge(N, UnitSize, 8, 8);
2050     else if (ShuffleKind == 0) // normal
2051       return isVMerge(N, UnitSize, 8, 24);
2052     else
2053       return false;
2054   }
2055 }
2056 
2057 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2058 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2059 /// The ShuffleKind distinguishes between big-endian merges with two
2060 /// different inputs (0), either-endian merges with two identical inputs (1),
2061 /// and little-endian merges with two different inputs (2).  For the latter,
2062 /// the input operands are swapped (see PPCInstrAltivec.td).
2063 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
2064                              unsigned ShuffleKind, SelectionDAG &DAG) {
2065   if (DAG.getDataLayout().isLittleEndian()) {
2066     if (ShuffleKind == 1) // unary
2067       return isVMerge(N, UnitSize, 8, 8);
2068     else if (ShuffleKind == 2) // swapped
2069       return isVMerge(N, UnitSize, 8, 24);
2070     else
2071       return false;
2072   } else {
2073     if (ShuffleKind == 1) // unary
2074       return isVMerge(N, UnitSize, 0, 0);
2075     else if (ShuffleKind == 0) // normal
2076       return isVMerge(N, UnitSize, 0, 16);
2077     else
2078       return false;
2079   }
2080 }
2081 
2082 /**
2083  * Common function used to match vmrgew and vmrgow shuffles
2084  *
2085  * The indexOffset determines whether to look for even or odd words in
2086  * the shuffle mask. This is based on the of the endianness of the target
2087  * machine.
2088  *   - Little Endian:
2089  *     - Use offset of 0 to check for odd elements
2090  *     - Use offset of 4 to check for even elements
2091  *   - Big Endian:
2092  *     - Use offset of 0 to check for even elements
2093  *     - Use offset of 4 to check for odd elements
2094  * A detailed description of the vector element ordering for little endian and
2095  * big endian can be found at
2096  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2097  * Targeting your applications - what little endian and big endian IBM XL C/C++
2098  * compiler differences mean to you
2099  *
2100  * The mask to the shuffle vector instruction specifies the indices of the
2101  * elements from the two input vectors to place in the result. The elements are
2102  * numbered in array-access order, starting with the first vector. These vectors
2103  * are always of type v16i8, thus each vector will contain 16 elements of size
2104  * 8. More info on the shuffle vector can be found in the
2105  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2106  * Language Reference.
2107  *
2108  * The RHSStartValue indicates whether the same input vectors are used (unary)
2109  * or two different input vectors are used, based on the following:
2110  *   - If the instruction uses the same vector for both inputs, the range of the
2111  *     indices will be 0 to 15. In this case, the RHSStart value passed should
2112  *     be 0.
2113  *   - If the instruction has two different vectors then the range of the
2114  *     indices will be 0 to 31. In this case, the RHSStart value passed should
2115  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
2116  *     to 31 specify elements in the second vector).
2117  *
2118  * \param[in] N The shuffle vector SD Node to analyze
2119  * \param[in] IndexOffset Specifies whether to look for even or odd elements
2120  * \param[in] RHSStartValue Specifies the starting index for the righthand input
2121  * vector to the shuffle_vector instruction
2122  * \return true iff this shuffle vector represents an even or odd word merge
2123  */
2124 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2125                      unsigned RHSStartValue) {
2126   if (N->getValueType(0) != MVT::v16i8)
2127     return false;
2128 
2129   for (unsigned i = 0; i < 2; ++i)
2130     for (unsigned j = 0; j < 4; ++j)
2131       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2132                              i*RHSStartValue+j+IndexOffset) ||
2133           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2134                              i*RHSStartValue+j+IndexOffset+8))
2135         return false;
2136   return true;
2137 }
2138 
2139 /**
2140  * Determine if the specified shuffle mask is suitable for the vmrgew or
2141  * vmrgow instructions.
2142  *
2143  * \param[in] N The shuffle vector SD Node to analyze
2144  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2145  * \param[in] ShuffleKind Identify the type of merge:
2146  *   - 0 = big-endian merge with two different inputs;
2147  *   - 1 = either-endian merge with two identical inputs;
2148  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
2149  *     little-endian merges).
2150  * \param[in] DAG The current SelectionDAG
2151  * \return true iff this shuffle mask
2152  */
2153 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
2154                               unsigned ShuffleKind, SelectionDAG &DAG) {
2155   if (DAG.getDataLayout().isLittleEndian()) {
2156     unsigned indexOffset = CheckEven ? 4 : 0;
2157     if (ShuffleKind == 1) // Unary
2158       return isVMerge(N, indexOffset, 0);
2159     else if (ShuffleKind == 2) // swapped
2160       return isVMerge(N, indexOffset, 16);
2161     else
2162       return false;
2163   }
2164   else {
2165     unsigned indexOffset = CheckEven ? 0 : 4;
2166     if (ShuffleKind == 1) // Unary
2167       return isVMerge(N, indexOffset, 0);
2168     else if (ShuffleKind == 0) // Normal
2169       return isVMerge(N, indexOffset, 16);
2170     else
2171       return false;
2172   }
2173   return false;
2174 }
2175 
2176 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2177 /// amount, otherwise return -1.
2178 /// The ShuffleKind distinguishes between big-endian operations with two
2179 /// different inputs (0), either-endian operations with two identical inputs
2180 /// (1), and little-endian operations with two different inputs (2).  For the
2181 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
2182 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2183                              SelectionDAG &DAG) {
2184   if (N->getValueType(0) != MVT::v16i8)
2185     return -1;
2186 
2187   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2188 
2189   // Find the first non-undef value in the shuffle mask.
2190   unsigned i;
2191   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2192     /*search*/;
2193 
2194   if (i == 16) return -1;  // all undef.
2195 
2196   // Otherwise, check to see if the rest of the elements are consecutively
2197   // numbered from this value.
2198   unsigned ShiftAmt = SVOp->getMaskElt(i);
2199   if (ShiftAmt < i) return -1;
2200 
2201   ShiftAmt -= i;
2202   bool isLE = DAG.getDataLayout().isLittleEndian();
2203 
2204   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2205     // Check the rest of the elements to see if they are consecutive.
2206     for (++i; i != 16; ++i)
2207       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2208         return -1;
2209   } else if (ShuffleKind == 1) {
2210     // Check the rest of the elements to see if they are consecutive.
2211     for (++i; i != 16; ++i)
2212       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2213         return -1;
2214   } else
2215     return -1;
2216 
2217   if (isLE)
2218     ShiftAmt = 16 - ShiftAmt;
2219 
2220   return ShiftAmt;
2221 }
2222 
2223 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2224 /// specifies a splat of a single element that is suitable for input to
2225 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2226 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
2227   EVT VT = N->getValueType(0);
2228   if (VT == MVT::v2i64 || VT == MVT::v2f64)
2229     return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2230 
2231   assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2232          EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2233 
2234   // The consecutive indices need to specify an element, not part of two
2235   // different elements.  So abandon ship early if this isn't the case.
2236   if (N->getMaskElt(0) % EltSize != 0)
2237     return false;
2238 
2239   // This is a splat operation if each element of the permute is the same, and
2240   // if the value doesn't reference the second vector.
2241   unsigned ElementBase = N->getMaskElt(0);
2242 
2243   // FIXME: Handle UNDEF elements too!
2244   if (ElementBase >= 16)
2245     return false;
2246 
2247   // Check that the indices are consecutive, in the case of a multi-byte element
2248   // splatted with a v16i8 mask.
2249   for (unsigned i = 1; i != EltSize; ++i)
2250     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2251       return false;
2252 
2253   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2254     if (N->getMaskElt(i) < 0) continue;
2255     for (unsigned j = 0; j != EltSize; ++j)
2256       if (N->getMaskElt(i+j) != N->getMaskElt(j))
2257         return false;
2258   }
2259   return true;
2260 }
2261 
2262 /// Check that the mask is shuffling N byte elements. Within each N byte
2263 /// element of the mask, the indices could be either in increasing or
2264 /// decreasing order as long as they are consecutive.
2265 /// \param[in] N the shuffle vector SD Node to analyze
2266 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2267 /// Word/DoubleWord/QuadWord).
2268 /// \param[in] StepLen the delta indices number among the N byte element, if
2269 /// the mask is in increasing/decreasing order then it is 1/-1.
2270 /// \return true iff the mask is shuffling N byte elements.
2271 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2272                                    int StepLen) {
2273   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2274          "Unexpected element width.");
2275   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2276 
2277   unsigned NumOfElem = 16 / Width;
2278   unsigned MaskVal[16]; //  Width is never greater than 16
2279   for (unsigned i = 0; i < NumOfElem; ++i) {
2280     MaskVal[0] = N->getMaskElt(i * Width);
2281     if ((StepLen == 1) && (MaskVal[0] % Width)) {
2282       return false;
2283     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2284       return false;
2285     }
2286 
2287     for (unsigned int j = 1; j < Width; ++j) {
2288       MaskVal[j] = N->getMaskElt(i * Width + j);
2289       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2290         return false;
2291       }
2292     }
2293   }
2294 
2295   return true;
2296 }
2297 
2298 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2299                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2300   if (!isNByteElemShuffleMask(N, 4, 1))
2301     return false;
2302 
2303   // Now we look at mask elements 0,4,8,12
2304   unsigned M0 = N->getMaskElt(0) / 4;
2305   unsigned M1 = N->getMaskElt(4) / 4;
2306   unsigned M2 = N->getMaskElt(8) / 4;
2307   unsigned M3 = N->getMaskElt(12) / 4;
2308   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2309   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2310 
2311   // Below, let H and L be arbitrary elements of the shuffle mask
2312   // where H is in the range [4,7] and L is in the range [0,3].
2313   // H, 1, 2, 3 or L, 5, 6, 7
2314   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2315       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2316     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2317     InsertAtByte = IsLE ? 12 : 0;
2318     Swap = M0 < 4;
2319     return true;
2320   }
2321   // 0, H, 2, 3 or 4, L, 6, 7
2322   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2323       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2324     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2325     InsertAtByte = IsLE ? 8 : 4;
2326     Swap = M1 < 4;
2327     return true;
2328   }
2329   // 0, 1, H, 3 or 4, 5, L, 7
2330   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2331       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2332     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2333     InsertAtByte = IsLE ? 4 : 8;
2334     Swap = M2 < 4;
2335     return true;
2336   }
2337   // 0, 1, 2, H or 4, 5, 6, L
2338   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2339       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2340     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2341     InsertAtByte = IsLE ? 0 : 12;
2342     Swap = M3 < 4;
2343     return true;
2344   }
2345 
2346   // If both vector operands for the shuffle are the same vector, the mask will
2347   // contain only elements from the first one and the second one will be undef.
2348   if (N->getOperand(1).isUndef()) {
2349     ShiftElts = 0;
2350     Swap = true;
2351     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2352     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2353       InsertAtByte = IsLE ? 12 : 0;
2354       return true;
2355     }
2356     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2357       InsertAtByte = IsLE ? 8 : 4;
2358       return true;
2359     }
2360     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2361       InsertAtByte = IsLE ? 4 : 8;
2362       return true;
2363     }
2364     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2365       InsertAtByte = IsLE ? 0 : 12;
2366       return true;
2367     }
2368   }
2369 
2370   return false;
2371 }
2372 
2373 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2374                                bool &Swap, bool IsLE) {
2375   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2376   // Ensure each byte index of the word is consecutive.
2377   if (!isNByteElemShuffleMask(N, 4, 1))
2378     return false;
2379 
2380   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2381   unsigned M0 = N->getMaskElt(0) / 4;
2382   unsigned M1 = N->getMaskElt(4) / 4;
2383   unsigned M2 = N->getMaskElt(8) / 4;
2384   unsigned M3 = N->getMaskElt(12) / 4;
2385 
2386   // If both vector operands for the shuffle are the same vector, the mask will
2387   // contain only elements from the first one and the second one will be undef.
2388   if (N->getOperand(1).isUndef()) {
2389     assert(M0 < 4 && "Indexing into an undef vector?");
2390     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2391       return false;
2392 
2393     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2394     Swap = false;
2395     return true;
2396   }
2397 
2398   // Ensure each word index of the ShuffleVector Mask is consecutive.
2399   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2400     return false;
2401 
2402   if (IsLE) {
2403     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2404       // Input vectors don't need to be swapped if the leading element
2405       // of the result is one of the 3 left elements of the second vector
2406       // (or if there is no shift to be done at all).
2407       Swap = false;
2408       ShiftElts = (8 - M0) % 8;
2409     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2410       // Input vectors need to be swapped if the leading element
2411       // of the result is one of the 3 left elements of the first vector
2412       // (or if we're shifting by 4 - thereby simply swapping the vectors).
2413       Swap = true;
2414       ShiftElts = (4 - M0) % 4;
2415     }
2416 
2417     return true;
2418   } else {                                          // BE
2419     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2420       // Input vectors don't need to be swapped if the leading element
2421       // of the result is one of the 4 elements of the first vector.
2422       Swap = false;
2423       ShiftElts = M0;
2424     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2425       // Input vectors need to be swapped if the leading element
2426       // of the result is one of the 4 elements of the right vector.
2427       Swap = true;
2428       ShiftElts = M0 - 4;
2429     }
2430 
2431     return true;
2432   }
2433 }
2434 
2435 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2436   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2437 
2438   if (!isNByteElemShuffleMask(N, Width, -1))
2439     return false;
2440 
2441   for (int i = 0; i < 16; i += Width)
2442     if (N->getMaskElt(i) != i + Width - 1)
2443       return false;
2444 
2445   return true;
2446 }
2447 
2448 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2449   return isXXBRShuffleMaskHelper(N, 2);
2450 }
2451 
2452 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2453   return isXXBRShuffleMaskHelper(N, 4);
2454 }
2455 
2456 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2457   return isXXBRShuffleMaskHelper(N, 8);
2458 }
2459 
2460 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2461   return isXXBRShuffleMaskHelper(N, 16);
2462 }
2463 
2464 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2465 /// if the inputs to the instruction should be swapped and set \p DM to the
2466 /// value for the immediate.
2467 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2468 /// AND element 0 of the result comes from the first input (LE) or second input
2469 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2470 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2471 /// mask.
2472 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2473                                bool &Swap, bool IsLE) {
2474   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2475 
2476   // Ensure each byte index of the double word is consecutive.
2477   if (!isNByteElemShuffleMask(N, 8, 1))
2478     return false;
2479 
2480   unsigned M0 = N->getMaskElt(0) / 8;
2481   unsigned M1 = N->getMaskElt(8) / 8;
2482   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2483 
2484   // If both vector operands for the shuffle are the same vector, the mask will
2485   // contain only elements from the first one and the second one will be undef.
2486   if (N->getOperand(1).isUndef()) {
2487     if ((M0 | M1) < 2) {
2488       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2489       Swap = false;
2490       return true;
2491     } else
2492       return false;
2493   }
2494 
2495   if (IsLE) {
2496     if (M0 > 1 && M1 < 2) {
2497       Swap = false;
2498     } else if (M0 < 2 && M1 > 1) {
2499       M0 = (M0 + 2) % 4;
2500       M1 = (M1 + 2) % 4;
2501       Swap = true;
2502     } else
2503       return false;
2504 
2505     // Note: if control flow comes here that means Swap is already set above
2506     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2507     return true;
2508   } else { // BE
2509     if (M0 < 2 && M1 > 1) {
2510       Swap = false;
2511     } else if (M0 > 1 && M1 < 2) {
2512       M0 = (M0 + 2) % 4;
2513       M1 = (M1 + 2) % 4;
2514       Swap = true;
2515     } else
2516       return false;
2517 
2518     // Note: if control flow comes here that means Swap is already set above
2519     DM = (M0 << 1) + (M1 & 1);
2520     return true;
2521   }
2522 }
2523 
2524 
2525 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2526 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2527 /// elements are counted from the left of the vector register).
2528 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2529                                          SelectionDAG &DAG) {
2530   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2531   assert(isSplatShuffleMask(SVOp, EltSize));
2532   EVT VT = SVOp->getValueType(0);
2533 
2534   if (VT == MVT::v2i64 || VT == MVT::v2f64)
2535     return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2536                                                 : SVOp->getMaskElt(0);
2537 
2538   if (DAG.getDataLayout().isLittleEndian())
2539     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2540   else
2541     return SVOp->getMaskElt(0) / EltSize;
2542 }
2543 
2544 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2545 /// by using a vspltis[bhw] instruction of the specified element size, return
2546 /// the constant being splatted.  The ByteSize field indicates the number of
2547 /// bytes of each element [124] -> [bhw].
2548 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2549   SDValue OpVal;
2550 
2551   // If ByteSize of the splat is bigger than the element size of the
2552   // build_vector, then we have a case where we are checking for a splat where
2553   // multiple elements of the buildvector are folded together into a single
2554   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2555   unsigned EltSize = 16/N->getNumOperands();
2556   if (EltSize < ByteSize) {
2557     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2558     SDValue UniquedVals[4];
2559     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2560 
2561     // See if all of the elements in the buildvector agree across.
2562     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2563       if (N->getOperand(i).isUndef()) continue;
2564       // If the element isn't a constant, bail fully out.
2565       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2566 
2567       if (!UniquedVals[i&(Multiple-1)].getNode())
2568         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2569       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2570         return SDValue();  // no match.
2571     }
2572 
2573     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2574     // either constant or undef values that are identical for each chunk.  See
2575     // if these chunks can form into a larger vspltis*.
2576 
2577     // Check to see if all of the leading entries are either 0 or -1.  If
2578     // neither, then this won't fit into the immediate field.
2579     bool LeadingZero = true;
2580     bool LeadingOnes = true;
2581     for (unsigned i = 0; i != Multiple-1; ++i) {
2582       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2583 
2584       LeadingZero &= isNullConstant(UniquedVals[i]);
2585       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2586     }
2587     // Finally, check the least significant entry.
2588     if (LeadingZero) {
2589       if (!UniquedVals[Multiple-1].getNode())
2590         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2591       int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2592       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2593         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2594     }
2595     if (LeadingOnes) {
2596       if (!UniquedVals[Multiple-1].getNode())
2597         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2598       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2599       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2600         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2601     }
2602 
2603     return SDValue();
2604   }
2605 
2606   // Check to see if this buildvec has a single non-undef value in its elements.
2607   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2608     if (N->getOperand(i).isUndef()) continue;
2609     if (!OpVal.getNode())
2610       OpVal = N->getOperand(i);
2611     else if (OpVal != N->getOperand(i))
2612       return SDValue();
2613   }
2614 
2615   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
2616 
2617   unsigned ValSizeInBytes = EltSize;
2618   uint64_t Value = 0;
2619   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2620     Value = CN->getZExtValue();
2621   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2622     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2623     Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2624   }
2625 
2626   // If the splat value is larger than the element value, then we can never do
2627   // this splat.  The only case that we could fit the replicated bits into our
2628   // immediate field for would be zero, and we prefer to use vxor for it.
2629   if (ValSizeInBytes < ByteSize) return SDValue();
2630 
2631   // If the element value is larger than the splat value, check if it consists
2632   // of a repeated bit pattern of size ByteSize.
2633   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2634     return SDValue();
2635 
2636   // Properly sign extend the value.
2637   int MaskVal = SignExtend32(Value, ByteSize * 8);
2638 
2639   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2640   if (MaskVal == 0) return SDValue();
2641 
2642   // Finally, if this value fits in a 5 bit sext field, return it
2643   if (SignExtend32<5>(MaskVal) == MaskVal)
2644     return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2645   return SDValue();
2646 }
2647 
2648 //===----------------------------------------------------------------------===//
2649 //  Addressing Mode Selection
2650 //===----------------------------------------------------------------------===//
2651 
2652 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2653 /// or 64-bit immediate, and if the value can be accurately represented as a
2654 /// sign extension from a 16-bit value.  If so, this returns true and the
2655 /// immediate.
2656 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2657   if (!isa<ConstantSDNode>(N))
2658     return false;
2659 
2660   Imm = (int16_t)N->getAsZExtVal();
2661   if (N->getValueType(0) == MVT::i32)
2662     return Imm == (int32_t)N->getAsZExtVal();
2663   else
2664     return Imm == (int64_t)N->getAsZExtVal();
2665 }
2666 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2667   return isIntS16Immediate(Op.getNode(), Imm);
2668 }
2669 
2670 /// Used when computing address flags for selecting loads and stores.
2671 /// If we have an OR, check if the LHS and RHS are provably disjoint.
2672 /// An OR of two provably disjoint values is equivalent to an ADD.
2673 /// Most PPC load/store instructions compute the effective address as a sum,
2674 /// so doing this conversion is useful.
2675 static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2676   if (N.getOpcode() != ISD::OR)
2677     return false;
2678   KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2679   if (!LHSKnown.Zero.getBoolValue())
2680     return false;
2681   KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2682   return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2683 }
2684 
2685 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2686 /// be represented as an indexed [r+r] operation.
2687 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2688                                                SDValue &Index,
2689                                                SelectionDAG &DAG) const {
2690   for (SDNode *U : N->users()) {
2691     if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2692       if (Memop->getMemoryVT() == MVT::f64) {
2693           Base = N.getOperand(0);
2694           Index = N.getOperand(1);
2695           return true;
2696       }
2697     }
2698   }
2699   return false;
2700 }
2701 
2702 /// isIntS34Immediate - This method tests if value of node given can be
2703 /// accurately represented as a sign extension from a 34-bit value.  If so,
2704 /// this returns true and the immediate.
2705 bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2706   if (!isa<ConstantSDNode>(N))
2707     return false;
2708 
2709   Imm = (int64_t)cast<ConstantSDNode>(N)->getSExtValue();
2710   return isInt<34>(Imm);
2711 }
2712 bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2713   return isIntS34Immediate(Op.getNode(), Imm);
2714 }
2715 
2716 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2717 /// can be represented as an indexed [r+r] operation.  Returns false if it
2718 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2719 /// non-zero and N can be represented by a base register plus a signed 16-bit
2720 /// displacement, make a more precise judgement by checking (displacement % \p
2721 /// EncodingAlignment).
2722 bool PPCTargetLowering::SelectAddressRegReg(
2723     SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2724     MaybeAlign EncodingAlignment) const {
2725   // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2726   // a [pc+imm].
2727   if (SelectAddressPCRel(N, Base))
2728     return false;
2729 
2730   int16_t Imm = 0;
2731   if (N.getOpcode() == ISD::ADD) {
2732     // Is there any SPE load/store (f64), which can't handle 16bit offset?
2733     // SPE load/store can only handle 8-bit offsets.
2734     if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2735         return true;
2736     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2737         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2738       return false; // r+i
2739     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2740       return false;    // r+i
2741 
2742     Base = N.getOperand(0);
2743     Index = N.getOperand(1);
2744     return true;
2745   } else if (N.getOpcode() == ISD::OR) {
2746     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2747         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2748       return false; // r+i can fold it if we can.
2749 
2750     // If this is an or of disjoint bitfields, we can codegen this as an add
2751     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2752     // disjoint.
2753     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2754 
2755     if (LHSKnown.Zero.getBoolValue()) {
2756       KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2757       // If all of the bits are known zero on the LHS or RHS, the add won't
2758       // carry.
2759       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2760         Base = N.getOperand(0);
2761         Index = N.getOperand(1);
2762         return true;
2763       }
2764     }
2765   }
2766 
2767   return false;
2768 }
2769 
2770 // If we happen to be doing an i64 load or store into a stack slot that has
2771 // less than a 4-byte alignment, then the frame-index elimination may need to
2772 // use an indexed load or store instruction (because the offset may not be a
2773 // multiple of 4). The extra register needed to hold the offset comes from the
2774 // register scavenger, and it is possible that the scavenger will need to use
2775 // an emergency spill slot. As a result, we need to make sure that a spill slot
2776 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2777 // stack slot.
2778 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2779   // FIXME: This does not handle the LWA case.
2780   if (VT != MVT::i64)
2781     return;
2782 
2783   // NOTE: We'll exclude negative FIs here, which come from argument
2784   // lowering, because there are no known test cases triggering this problem
2785   // using packed structures (or similar). We can remove this exclusion if
2786   // we find such a test case. The reason why this is so test-case driven is
2787   // because this entire 'fixup' is only to prevent crashes (from the
2788   // register scavenger) on not-really-valid inputs. For example, if we have:
2789   //   %a = alloca i1
2790   //   %b = bitcast i1* %a to i64*
2791   //   store i64* a, i64 b
2792   // then the store should really be marked as 'align 1', but is not. If it
2793   // were marked as 'align 1' then the indexed form would have been
2794   // instruction-selected initially, and the problem this 'fixup' is preventing
2795   // won't happen regardless.
2796   if (FrameIdx < 0)
2797     return;
2798 
2799   MachineFunction &MF = DAG.getMachineFunction();
2800   MachineFrameInfo &MFI = MF.getFrameInfo();
2801 
2802   if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2803     return;
2804 
2805   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2806   FuncInfo->setHasNonRISpills();
2807 }
2808 
2809 /// Returns true if the address N can be represented by a base register plus
2810 /// a signed 16-bit displacement [r+imm], and if it is not better
2811 /// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2812 /// displacements that are multiples of that value.
2813 bool PPCTargetLowering::SelectAddressRegImm(
2814     SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2815     MaybeAlign EncodingAlignment) const {
2816   // FIXME dl should come from parent load or store, not from address
2817   SDLoc dl(N);
2818 
2819   // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2820   // a [pc+imm].
2821   if (SelectAddressPCRel(N, Base))
2822     return false;
2823 
2824   // If this can be more profitably realized as r+r, fail.
2825   if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2826     return false;
2827 
2828   if (N.getOpcode() == ISD::ADD) {
2829     int16_t imm = 0;
2830     if (isIntS16Immediate(N.getOperand(1), imm) &&
2831         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2832       Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2833       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2834         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2835         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2836       } else {
2837         Base = N.getOperand(0);
2838       }
2839       return true; // [r+i]
2840     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2841       // Match LOAD (ADD (X, Lo(G))).
2842       assert(!N.getOperand(1).getConstantOperandVal(1) &&
2843              "Cannot handle constant offsets yet!");
2844       Disp = N.getOperand(1).getOperand(0);  // The global address.
2845       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2846              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2847              Disp.getOpcode() == ISD::TargetConstantPool ||
2848              Disp.getOpcode() == ISD::TargetJumpTable);
2849       Base = N.getOperand(0);
2850       return true;  // [&g+r]
2851     }
2852   } else if (N.getOpcode() == ISD::OR) {
2853     int16_t imm = 0;
2854     if (isIntS16Immediate(N.getOperand(1), imm) &&
2855         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2856       // If this is an or of disjoint bitfields, we can codegen this as an add
2857       // (for better address arithmetic) if the LHS and RHS of the OR are
2858       // provably disjoint.
2859       KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2860 
2861       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2862         // If all of the bits are known zero on the LHS or RHS, the add won't
2863         // carry.
2864         if (FrameIndexSDNode *FI =
2865               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2866           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2867           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2868         } else {
2869           Base = N.getOperand(0);
2870         }
2871         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2872         return true;
2873       }
2874     }
2875   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2876     // Loading from a constant address.
2877 
2878     // If this address fits entirely in a 16-bit sext immediate field, codegen
2879     // this as "d, 0"
2880     int16_t Imm;
2881     if (isIntS16Immediate(CN, Imm) &&
2882         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2883       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2884       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2885                              CN->getValueType(0));
2886       return true;
2887     }
2888 
2889     // Handle 32-bit sext immediates with LIS + addr mode.
2890     if ((CN->getValueType(0) == MVT::i32 ||
2891          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2892         (!EncodingAlignment ||
2893          isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2894       int Addr = (int)CN->getZExtValue();
2895 
2896       // Otherwise, break this down into an LIS + disp.
2897       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2898 
2899       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2900                                    MVT::i32);
2901       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2902       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2903       return true;
2904     }
2905   }
2906 
2907   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2908   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2909     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2910     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2911   } else
2912     Base = N;
2913   return true;      // [r+0]
2914 }
2915 
2916 /// Similar to the 16-bit case but for instructions that take a 34-bit
2917 /// displacement field (prefixed loads/stores).
2918 bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2919                                               SDValue &Base,
2920                                               SelectionDAG &DAG) const {
2921   // Only on 64-bit targets.
2922   if (N.getValueType() != MVT::i64)
2923     return false;
2924 
2925   SDLoc dl(N);
2926   int64_t Imm = 0;
2927 
2928   if (N.getOpcode() == ISD::ADD) {
2929     if (!isIntS34Immediate(N.getOperand(1), Imm))
2930       return false;
2931     Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2932     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2933       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2934     else
2935       Base = N.getOperand(0);
2936     return true;
2937   }
2938 
2939   if (N.getOpcode() == ISD::OR) {
2940     if (!isIntS34Immediate(N.getOperand(1), Imm))
2941       return false;
2942     // If this is an or of disjoint bitfields, we can codegen this as an add
2943     // (for better address arithmetic) if the LHS and RHS of the OR are
2944     // provably disjoint.
2945     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2946     if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2947       return false;
2948     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2949       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2950     else
2951       Base = N.getOperand(0);
2952     Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2953     return true;
2954   }
2955 
2956   if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2957     Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2958     Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2959     return true;
2960   }
2961 
2962   return false;
2963 }
2964 
2965 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2966 /// represented as an indexed [r+r] operation.
2967 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2968                                                 SDValue &Index,
2969                                                 SelectionDAG &DAG) const {
2970   // Check to see if we can easily represent this as an [r+r] address.  This
2971   // will fail if it thinks that the address is more profitably represented as
2972   // reg+imm, e.g. where imm = 0.
2973   if (SelectAddressRegReg(N, Base, Index, DAG))
2974     return true;
2975 
2976   // If the address is the result of an add, we will utilize the fact that the
2977   // address calculation includes an implicit add.  However, we can reduce
2978   // register pressure if we do not materialize a constant just for use as the
2979   // index register.  We only get rid of the add if it is not an add of a
2980   // value and a 16-bit signed constant and both have a single use.
2981   int16_t imm = 0;
2982   if (N.getOpcode() == ISD::ADD &&
2983       (!isIntS16Immediate(N.getOperand(1), imm) ||
2984        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2985     Base = N.getOperand(0);
2986     Index = N.getOperand(1);
2987     return true;
2988   }
2989 
2990   // Otherwise, do it the hard way, using R0 as the base register.
2991   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2992                          N.getValueType());
2993   Index = N;
2994   return true;
2995 }
2996 
2997 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2998   Ty *PCRelCand = dyn_cast<Ty>(N);
2999   return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
3000 }
3001 
3002 /// Returns true if this address is a PC Relative address.
3003 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
3004 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
3005 bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
3006   // This is a materialize PC Relative node. Always select this as PC Relative.
3007   Base = N;
3008   if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3009     return true;
3010   if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
3011       isValidPCRelNode<GlobalAddressSDNode>(N) ||
3012       isValidPCRelNode<JumpTableSDNode>(N) ||
3013       isValidPCRelNode<BlockAddressSDNode>(N))
3014     return true;
3015   return false;
3016 }
3017 
3018 /// Returns true if we should use a direct load into vector instruction
3019 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3020 static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3021 
3022   // If there are any other uses other than scalar to vector, then we should
3023   // keep it as a scalar load -> direct move pattern to prevent multiple
3024   // loads.
3025   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3026   if (!LD)
3027     return false;
3028 
3029   EVT MemVT = LD->getMemoryVT();
3030   if (!MemVT.isSimple())
3031     return false;
3032   switch(MemVT.getSimpleVT().SimpleTy) {
3033   case MVT::i64:
3034     break;
3035   case MVT::i32:
3036     if (!ST.hasP8Vector())
3037       return false;
3038     break;
3039   case MVT::i16:
3040   case MVT::i8:
3041     if (!ST.hasP9Vector())
3042       return false;
3043     break;
3044   default:
3045     return false;
3046   }
3047 
3048   SDValue LoadedVal(N, 0);
3049   if (!LoadedVal.hasOneUse())
3050     return false;
3051 
3052   for (SDUse &Use : LD->uses())
3053     if (Use.getResNo() == 0 &&
3054         Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3055         Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3056       return false;
3057 
3058   return true;
3059 }
3060 
3061 /// getPreIndexedAddressParts - returns true by value, base pointer and
3062 /// offset pointer and addressing mode by reference if the node's address
3063 /// can be legally represented as pre-indexed load / store address.
3064 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
3065                                                   SDValue &Offset,
3066                                                   ISD::MemIndexedMode &AM,
3067                                                   SelectionDAG &DAG) const {
3068   if (DisablePPCPreinc) return false;
3069 
3070   bool isLoad = true;
3071   SDValue Ptr;
3072   EVT VT;
3073   Align Alignment;
3074   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3075     Ptr = LD->getBasePtr();
3076     VT = LD->getMemoryVT();
3077     Alignment = LD->getAlign();
3078   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3079     Ptr = ST->getBasePtr();
3080     VT  = ST->getMemoryVT();
3081     Alignment = ST->getAlign();
3082     isLoad = false;
3083   } else
3084     return false;
3085 
3086   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3087   // instructions because we can fold these into a more efficient instruction
3088   // instead, (such as LXSD).
3089   if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3090     return false;
3091   }
3092 
3093   // PowerPC doesn't have preinc load/store instructions for vectors
3094   if (VT.isVector())
3095     return false;
3096 
3097   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3098     // Common code will reject creating a pre-inc form if the base pointer
3099     // is a frame index, or if N is a store and the base pointer is either
3100     // the same as or a predecessor of the value being stored.  Check for
3101     // those situations here, and try with swapped Base/Offset instead.
3102     bool Swap = false;
3103 
3104     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3105       Swap = true;
3106     else if (!isLoad) {
3107       SDValue Val = cast<StoreSDNode>(N)->getValue();
3108       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3109         Swap = true;
3110     }
3111 
3112     if (Swap)
3113       std::swap(Base, Offset);
3114 
3115     AM = ISD::PRE_INC;
3116     return true;
3117   }
3118 
3119   // LDU/STU can only handle immediates that are a multiple of 4.
3120   if (VT != MVT::i64) {
3121     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3122       return false;
3123   } else {
3124     // LDU/STU need an address with at least 4-byte alignment.
3125     if (Alignment < Align(4))
3126       return false;
3127 
3128     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3129       return false;
3130   }
3131 
3132   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3133     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
3134     // sext i32 to i64 when addr mode is r+i.
3135     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3136         LD->getExtensionType() == ISD::SEXTLOAD &&
3137         isa<ConstantSDNode>(Offset))
3138       return false;
3139   }
3140 
3141   AM = ISD::PRE_INC;
3142   return true;
3143 }
3144 
3145 //===----------------------------------------------------------------------===//
3146 //  LowerOperation implementation
3147 //===----------------------------------------------------------------------===//
3148 
3149 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
3150 /// and LoOpFlags to the target MO flags.
3151 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3152                                unsigned &HiOpFlags, unsigned &LoOpFlags,
3153                                const GlobalValue *GV = nullptr) {
3154   HiOpFlags = PPCII::MO_HA;
3155   LoOpFlags = PPCII::MO_LO;
3156 
3157   // Don't use the pic base if not in PIC relocation model.
3158   if (IsPIC) {
3159     HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3160     LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3161   }
3162 }
3163 
3164 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3165                              SelectionDAG &DAG) {
3166   SDLoc DL(HiPart);
3167   EVT PtrVT = HiPart.getValueType();
3168   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3169 
3170   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3171   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3172 
3173   // With PIC, the first instruction is actually "GR+hi(&G)".
3174   if (isPIC)
3175     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3176                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3177 
3178   // Generate non-pic code that has direct accesses to the constant pool.
3179   // The address of the global is just (hi(&g)+lo(&g)).
3180   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3181 }
3182 
3183 static void setUsesTOCBasePtr(MachineFunction &MF) {
3184   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3185   FuncInfo->setUsesTOCBasePtr();
3186 }
3187 
3188 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3189   setUsesTOCBasePtr(DAG.getMachineFunction());
3190 }
3191 
3192 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3193                                        SDValue GA) const {
3194   EVT VT = Subtarget.getScalarIntVT();
3195   SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3196                 : Subtarget.isAIXABI()
3197                     ? DAG.getRegister(PPC::R2, VT)
3198                     : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3199   SDValue Ops[] = { GA, Reg };
3200   return DAG.getMemIntrinsicNode(
3201       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3202       MachinePointerInfo::getGOT(DAG.getMachineFunction()), std::nullopt,
3203       MachineMemOperand::MOLoad);
3204 }
3205 
3206 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3207                                              SelectionDAG &DAG) const {
3208   EVT PtrVT = Op.getValueType();
3209   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3210   const Constant *C = CP->getConstVal();
3211 
3212   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3213   // The actual address of the GlobalValue is stored in the TOC.
3214   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3215     if (Subtarget.isUsingPCRelativeCalls()) {
3216       SDLoc DL(CP);
3217       EVT Ty = getPointerTy(DAG.getDataLayout());
3218       SDValue ConstPool = DAG.getTargetConstantPool(
3219           C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3220       return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3221     }
3222     setUsesTOCBasePtr(DAG);
3223     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3224     return getTOCEntry(DAG, SDLoc(CP), GA);
3225   }
3226 
3227   unsigned MOHiFlag, MOLoFlag;
3228   bool IsPIC = isPositionIndependent();
3229   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3230 
3231   if (IsPIC && Subtarget.isSVR4ABI()) {
3232     SDValue GA =
3233         DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3234     return getTOCEntry(DAG, SDLoc(CP), GA);
3235   }
3236 
3237   SDValue CPIHi =
3238       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3239   SDValue CPILo =
3240       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3241   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3242 }
3243 
3244 // For 64-bit PowerPC, prefer the more compact relative encodings.
3245 // This trades 32 bits per jump table entry for one or two instructions
3246 // on the jump site.
3247 unsigned PPCTargetLowering::getJumpTableEncoding() const {
3248   if (isJumpTableRelative())
3249     return MachineJumpTableInfo::EK_LabelDifference32;
3250 
3251   return TargetLowering::getJumpTableEncoding();
3252 }
3253 
3254 bool PPCTargetLowering::isJumpTableRelative() const {
3255   if (UseAbsoluteJumpTables)
3256     return false;
3257   if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3258     return true;
3259   return TargetLowering::isJumpTableRelative();
3260 }
3261 
3262 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3263                                                     SelectionDAG &DAG) const {
3264   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3265     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3266 
3267   switch (getTargetMachine().getCodeModel()) {
3268   case CodeModel::Small:
3269   case CodeModel::Medium:
3270     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3271   default:
3272     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3273                        getPointerTy(DAG.getDataLayout()));
3274   }
3275 }
3276 
3277 const MCExpr *
3278 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3279                                                 unsigned JTI,
3280                                                 MCContext &Ctx) const {
3281   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3282     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3283 
3284   switch (getTargetMachine().getCodeModel()) {
3285   case CodeModel::Small:
3286   case CodeModel::Medium:
3287     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3288   default:
3289     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3290   }
3291 }
3292 
3293 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3294   EVT PtrVT = Op.getValueType();
3295   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3296 
3297   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3298   if (Subtarget.isUsingPCRelativeCalls()) {
3299     SDLoc DL(JT);
3300     EVT Ty = getPointerTy(DAG.getDataLayout());
3301     SDValue GA =
3302         DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3303     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3304     return MatAddr;
3305   }
3306 
3307   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3308   // The actual address of the GlobalValue is stored in the TOC.
3309   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3310     setUsesTOCBasePtr(DAG);
3311     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3312     return getTOCEntry(DAG, SDLoc(JT), GA);
3313   }
3314 
3315   unsigned MOHiFlag, MOLoFlag;
3316   bool IsPIC = isPositionIndependent();
3317   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3318 
3319   if (IsPIC && Subtarget.isSVR4ABI()) {
3320     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3321                                         PPCII::MO_PIC_FLAG);
3322     return getTOCEntry(DAG, SDLoc(GA), GA);
3323   }
3324 
3325   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3326   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3327   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3328 }
3329 
3330 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3331                                              SelectionDAG &DAG) const {
3332   EVT PtrVT = Op.getValueType();
3333   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3334   const BlockAddress *BA = BASDN->getBlockAddress();
3335 
3336   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3337   if (Subtarget.isUsingPCRelativeCalls()) {
3338     SDLoc DL(BASDN);
3339     EVT Ty = getPointerTy(DAG.getDataLayout());
3340     SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3341                                            PPCII::MO_PCREL_FLAG);
3342     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3343     return MatAddr;
3344   }
3345 
3346   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3347   // The actual BlockAddress is stored in the TOC.
3348   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3349     setUsesTOCBasePtr(DAG);
3350     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3351     return getTOCEntry(DAG, SDLoc(BASDN), GA);
3352   }
3353 
3354   // 32-bit position-independent ELF stores the BlockAddress in the .got.
3355   if (Subtarget.is32BitELFABI() && isPositionIndependent())
3356     return getTOCEntry(
3357         DAG, SDLoc(BASDN),
3358         DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3359 
3360   unsigned MOHiFlag, MOLoFlag;
3361   bool IsPIC = isPositionIndependent();
3362   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3363   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3364   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3365   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3366 }
3367 
3368 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3369                                               SelectionDAG &DAG) const {
3370   if (Subtarget.isAIXABI())
3371     return LowerGlobalTLSAddressAIX(Op, DAG);
3372 
3373   return LowerGlobalTLSAddressLinux(Op, DAG);
3374 }
3375 
3376 /// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3377 /// and then apply the update.
3378 static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model,
3379                                          SelectionDAG &DAG,
3380                                          const TargetMachine &TM) {
3381   // Initialize TLS model opt setting lazily:
3382   // (1) Use initial-exec for single TLS var references within current function.
3383   // (2) Use local-dynamic for multiple TLS var references within current
3384   // function.
3385   PPCFunctionInfo *FuncInfo =
3386       DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3387   if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3388     SmallPtrSet<const GlobalValue *, 8> TLSGV;
3389     // Iterate over all instructions within current function, collect all TLS
3390     // global variables (global variables taken as the first parameter to
3391     // Intrinsic::threadlocal_address).
3392     const Function &Func = DAG.getMachineFunction().getFunction();
3393     for (const BasicBlock &BB : Func)
3394       for (const Instruction &I : BB)
3395         if (I.getOpcode() == Instruction::Call)
3396           if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3397             if (Function *CF = CI->getCalledFunction())
3398               if (CF->isDeclaration() &&
3399                   CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3400                 if (const GlobalValue *GV =
3401                         dyn_cast<GlobalValue>(I.getOperand(0))) {
3402                   TLSModel::Model GVModel = TM.getTLSModel(GV);
3403                   if (GVModel == TLSModel::LocalDynamic)
3404                     TLSGV.insert(GV);
3405                 }
3406 
3407     unsigned TLSGVCnt = TLSGV.size();
3408     LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3409     if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3410       FuncInfo->setAIXFuncUseTLSIEForLD();
3411     FuncInfo->setAIXFuncTLSModelOptInitDone();
3412   }
3413 
3414   if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3415     LLVM_DEBUG(
3416         dbgs() << DAG.getMachineFunction().getName()
3417                << " function is using the TLS-IE model for TLS-LD access.\n");
3418     Model = TLSModel::InitialExec;
3419   }
3420 }
3421 
3422 SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3423                                                     SelectionDAG &DAG) const {
3424   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3425 
3426   if (DAG.getTarget().useEmulatedTLS())
3427     report_fatal_error("Emulated TLS is not yet supported on AIX");
3428 
3429   SDLoc dl(GA);
3430   const GlobalValue *GV = GA->getGlobal();
3431   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3432   bool Is64Bit = Subtarget.isPPC64();
3433   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
3434 
3435   // Apply update to the TLS model.
3436   if (Subtarget.hasAIXShLibTLSModelOpt())
3437     updateForAIXShLibTLSModelOpt(Model, DAG, getTargetMachine());
3438 
3439   // TLS variables are accessed through TOC entries.
3440   // To support this, set the DAG to use the TOC base pointer.
3441   setUsesTOCBasePtr(DAG);
3442 
3443   bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3444 
3445   if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3446     bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3447     bool HasAIXSmallTLSGlobalAttr = false;
3448     SDValue VariableOffsetTGA =
3449         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3450     SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3451     SDValue TLSReg;
3452 
3453     if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3454       if (GVar->hasAttribute("aix-small-tls"))
3455         HasAIXSmallTLSGlobalAttr = true;
3456 
3457     if (Is64Bit) {
3458       // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3459       // involves a load of the variable offset (from the TOC), followed by an
3460       // add of the loaded variable offset to R13 (the thread pointer).
3461       // This code sequence looks like:
3462       //    ld reg1,var[TC](2)
3463       //    add reg2, reg1, r13     // r13 contains the thread pointer
3464       TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3465 
3466       // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3467       // global variable attribute, produce a faster access sequence for
3468       // local-exec TLS variables where the offset from the TLS base is encoded
3469       // as an immediate operand.
3470       //
3471       // We only utilize the faster local-exec access sequence when the TLS
3472       // variable has a size within the policy limit. We treat types that are
3473       // not sized or are empty as being over the policy size limit.
3474       if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3475           IsTLSLocalExecModel) {
3476         Type *GVType = GV->getValueType();
3477         if (GVType->isSized() && !GVType->isEmptyTy() &&
3478             GV->getDataLayout().getTypeAllocSize(GVType) <=
3479                 AIXSmallTlsPolicySizeLimit)
3480           return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3481       }
3482     } else {
3483       // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3484       // involves loading the variable offset from the TOC, generating a call to
3485       // .__get_tpointer to get the thread pointer (which will be in R3), and
3486       // adding the two together:
3487       //    lwz reg1,var[TC](2)
3488       //    bla .__get_tpointer
3489       //    add reg2, reg1, r3
3490       TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3491 
3492       // We do not implement the 32-bit version of the faster access sequence
3493       // for local-exec that is controlled by the -maix-small-local-exec-tls
3494       // option, or the "aix-small-tls" global variable attribute.
3495       if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3496         report_fatal_error("The small-local-exec TLS access sequence is "
3497                            "currently only supported on AIX (64-bit mode).");
3498     }
3499     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3500   }
3501 
3502   if (Model == TLSModel::LocalDynamic) {
3503     bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3504 
3505     // We do not implement the 32-bit version of the faster access sequence
3506     // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3507     if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3508       report_fatal_error("The small-local-dynamic TLS access sequence is "
3509                          "currently only supported on AIX (64-bit mode).");
3510 
3511     // For local-dynamic on AIX, we need to generate one TOC entry for each
3512     // variable offset, and a single module-handle TOC entry for the entire
3513     // file.
3514 
3515     SDValue VariableOffsetTGA =
3516         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3517     SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3518 
3519     Module *M = DAG.getMachineFunction().getFunction().getParent();
3520     GlobalVariable *TLSGV =
3521         dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3522             StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3523     TLSGV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
3524     assert(TLSGV && "Not able to create GV for _$TLSML.");
3525     SDValue ModuleHandleTGA =
3526         DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3527     SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3528     SDValue ModuleHandle =
3529         DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3530 
3531     // With the -maix-small-local-dynamic-tls option, produce a faster access
3532     // sequence for local-dynamic TLS variables where the offset from the
3533     // module-handle is encoded as an immediate operand.
3534     //
3535     // We only utilize the faster local-dynamic access sequence when the TLS
3536     // variable has a size within the policy limit. We treat types that are
3537     // not sized or are empty as being over the policy size limit.
3538     if (HasAIXSmallLocalDynamicTLS) {
3539       Type *GVType = GV->getValueType();
3540       if (GVType->isSized() && !GVType->isEmptyTy() &&
3541           GV->getDataLayout().getTypeAllocSize(GVType) <=
3542               AIXSmallTlsPolicySizeLimit)
3543         return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3544                            ModuleHandle);
3545     }
3546 
3547     return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3548   }
3549 
3550   // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3551   // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3552   // need to generate two TOC entries, one for the variable offset, one for the
3553   // region handle. The global address for the TOC entry of the region handle is
3554   // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3555   // entry of the variable offset is created with MO_TLSGD_FLAG.
3556   SDValue VariableOffsetTGA =
3557       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3558   SDValue RegionHandleTGA =
3559       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3560   SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3561   SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3562   return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3563                      RegionHandle);
3564 }
3565 
3566 SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3567                                                       SelectionDAG &DAG) const {
3568   // FIXME: TLS addresses currently use medium model code sequences,
3569   // which is the most useful form.  Eventually support for small and
3570   // large models could be added if users need it, at the cost of
3571   // additional complexity.
3572   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3573   if (DAG.getTarget().useEmulatedTLS())
3574     return LowerToTLSEmulatedModel(GA, DAG);
3575 
3576   SDLoc dl(GA);
3577   const GlobalValue *GV = GA->getGlobal();
3578   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3579   bool is64bit = Subtarget.isPPC64();
3580   const Module *M = DAG.getMachineFunction().getFunction().getParent();
3581   PICLevel::Level picLevel = M->getPICLevel();
3582 
3583   const TargetMachine &TM = getTargetMachine();
3584   TLSModel::Model Model = TM.getTLSModel(GV);
3585 
3586   if (Model == TLSModel::LocalExec) {
3587     if (Subtarget.isUsingPCRelativeCalls()) {
3588       SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3589       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3590                                                PPCII::MO_TPREL_PCREL_FLAG);
3591       SDValue MatAddr =
3592           DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3593       return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3594     }
3595 
3596     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3597                                                PPCII::MO_TPREL_HA);
3598     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3599                                                PPCII::MO_TPREL_LO);
3600     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3601                              : DAG.getRegister(PPC::R2, MVT::i32);
3602 
3603     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3604     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3605   }
3606 
3607   if (Model == TLSModel::InitialExec) {
3608     bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3609     SDValue TGA = DAG.getTargetGlobalAddress(
3610         GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3611     SDValue TGATLS = DAG.getTargetGlobalAddress(
3612         GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3613     SDValue TPOffset;
3614     if (IsPCRel) {
3615       SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3616       TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3617                              MachinePointerInfo());
3618     } else {
3619       SDValue GOTPtr;
3620       if (is64bit) {
3621         setUsesTOCBasePtr(DAG);
3622         SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3623         GOTPtr =
3624             DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3625       } else {
3626         if (!TM.isPositionIndependent())
3627           GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3628         else if (picLevel == PICLevel::SmallPIC)
3629           GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3630         else
3631           GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3632       }
3633       TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3634     }
3635     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3636   }
3637 
3638   if (Model == TLSModel::GeneralDynamic) {
3639     if (Subtarget.isUsingPCRelativeCalls()) {
3640       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3641                                                PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3642       return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3643     }
3644 
3645     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3646     SDValue GOTPtr;
3647     if (is64bit) {
3648       setUsesTOCBasePtr(DAG);
3649       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3650       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3651                                    GOTReg, TGA);
3652     } else {
3653       if (picLevel == PICLevel::SmallPIC)
3654         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3655       else
3656         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3657     }
3658     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3659                        GOTPtr, TGA, TGA);
3660   }
3661 
3662   if (Model == TLSModel::LocalDynamic) {
3663     if (Subtarget.isUsingPCRelativeCalls()) {
3664       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3665                                                PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3666       SDValue MatPCRel =
3667           DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3668       return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3669     }
3670 
3671     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3672     SDValue GOTPtr;
3673     if (is64bit) {
3674       setUsesTOCBasePtr(DAG);
3675       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3676       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3677                            GOTReg, TGA);
3678     } else {
3679       if (picLevel == PICLevel::SmallPIC)
3680         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3681       else
3682         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3683     }
3684     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3685                                   PtrVT, GOTPtr, TGA, TGA);
3686     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3687                                       PtrVT, TLSAddr, TGA);
3688     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3689   }
3690 
3691   llvm_unreachable("Unknown TLS model!");
3692 }
3693 
3694 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3695                                               SelectionDAG &DAG) const {
3696   EVT PtrVT = Op.getValueType();
3697   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3698   SDLoc DL(GSDN);
3699   const GlobalValue *GV = GSDN->getGlobal();
3700 
3701   // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3702   // The actual address of the GlobalValue is stored in the TOC.
3703   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3704     if (Subtarget.isUsingPCRelativeCalls()) {
3705       EVT Ty = getPointerTy(DAG.getDataLayout());
3706       if (isAccessedAsGotIndirect(Op)) {
3707         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3708                                                 PPCII::MO_GOT_PCREL_FLAG);
3709         SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3710         SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3711                                    MachinePointerInfo());
3712         return Load;
3713       } else {
3714         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3715                                                 PPCII::MO_PCREL_FLAG);
3716         return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3717       }
3718     }
3719     setUsesTOCBasePtr(DAG);
3720     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3721     return getTOCEntry(DAG, DL, GA);
3722   }
3723 
3724   unsigned MOHiFlag, MOLoFlag;
3725   bool IsPIC = isPositionIndependent();
3726   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3727 
3728   if (IsPIC && Subtarget.isSVR4ABI()) {
3729     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3730                                             GSDN->getOffset(),
3731                                             PPCII::MO_PIC_FLAG);
3732     return getTOCEntry(DAG, DL, GA);
3733   }
3734 
3735   SDValue GAHi =
3736     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3737   SDValue GALo =
3738     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3739 
3740   return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3741 }
3742 
3743 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3744   bool IsStrict = Op->isStrictFPOpcode();
3745   ISD::CondCode CC =
3746       cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3747   SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3748   SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3749   SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3750   EVT LHSVT = LHS.getValueType();
3751   SDLoc dl(Op);
3752 
3753   // Soften the setcc with libcall if it is fp128.
3754   if (LHSVT == MVT::f128) {
3755     assert(!Subtarget.hasP9Vector() &&
3756            "SETCC for f128 is already legal under Power9!");
3757     softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3758                         Op->getOpcode() == ISD::STRICT_FSETCCS);
3759     if (RHS.getNode())
3760       LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3761                         DAG.getCondCode(CC));
3762     if (IsStrict)
3763       return DAG.getMergeValues({LHS, Chain}, dl);
3764     return LHS;
3765   }
3766 
3767   assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3768 
3769   if (Op.getValueType() == MVT::v2i64) {
3770     // When the operands themselves are v2i64 values, we need to do something
3771     // special because VSX has no underlying comparison operations for these.
3772     if (LHS.getValueType() == MVT::v2i64) {
3773       // Equality can be handled by casting to the legal type for Altivec
3774       // comparisons, everything else needs to be expanded.
3775       if (CC != ISD::SETEQ && CC != ISD::SETNE)
3776         return SDValue();
3777       SDValue SetCC32 = DAG.getSetCC(
3778           dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3779           DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3780       int ShuffV[] = {1, 0, 3, 2};
3781       SDValue Shuff =
3782           DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3783       return DAG.getBitcast(MVT::v2i64,
3784                             DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3785                                         dl, MVT::v4i32, Shuff, SetCC32));
3786     }
3787 
3788     // We handle most of these in the usual way.
3789     return Op;
3790   }
3791 
3792   // If we're comparing for equality to zero, expose the fact that this is
3793   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3794   // fold the new nodes.
3795   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3796     return V;
3797 
3798   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3799     // Leave comparisons against 0 and -1 alone for now, since they're usually
3800     // optimized.  FIXME: revisit this when we can custom lower all setcc
3801     // optimizations.
3802     if (C->isAllOnes() || C->isZero())
3803       return SDValue();
3804   }
3805 
3806   // If we have an integer seteq/setne, turn it into a compare against zero
3807   // by xor'ing the rhs with the lhs, which is faster than setting a
3808   // condition register, reading it back out, and masking the correct bit.  The
3809   // normal approach here uses sub to do this instead of xor.  Using xor exposes
3810   // the result to other bit-twiddling opportunities.
3811   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3812     EVT VT = Op.getValueType();
3813     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3814     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3815   }
3816   return SDValue();
3817 }
3818 
3819 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3820   SDNode *Node = Op.getNode();
3821   EVT VT = Node->getValueType(0);
3822   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3823   SDValue InChain = Node->getOperand(0);
3824   SDValue VAListPtr = Node->getOperand(1);
3825   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3826   SDLoc dl(Node);
3827 
3828   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3829 
3830   // gpr_index
3831   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3832                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
3833   InChain = GprIndex.getValue(1);
3834 
3835   if (VT == MVT::i64) {
3836     // Check if GprIndex is even
3837     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3838                                  DAG.getConstant(1, dl, MVT::i32));
3839     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3840                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3841     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3842                                           DAG.getConstant(1, dl, MVT::i32));
3843     // Align GprIndex to be even if it isn't
3844     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3845                            GprIndex);
3846   }
3847 
3848   // fpr index is 1 byte after gpr
3849   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3850                                DAG.getConstant(1, dl, MVT::i32));
3851 
3852   // fpr
3853   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3854                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
3855   InChain = FprIndex.getValue(1);
3856 
3857   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3858                                        DAG.getConstant(8, dl, MVT::i32));
3859 
3860   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3861                                         DAG.getConstant(4, dl, MVT::i32));
3862 
3863   // areas
3864   SDValue OverflowArea =
3865       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3866   InChain = OverflowArea.getValue(1);
3867 
3868   SDValue RegSaveArea =
3869       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3870   InChain = RegSaveArea.getValue(1);
3871 
3872   // select overflow_area if index > 8
3873   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3874                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3875 
3876   // adjustment constant gpr_index * 4/8
3877   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3878                                     VT.isInteger() ? GprIndex : FprIndex,
3879                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3880                                                     MVT::i32));
3881 
3882   // OurReg = RegSaveArea + RegConstant
3883   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3884                                RegConstant);
3885 
3886   // Floating types are 32 bytes into RegSaveArea
3887   if (VT.isFloatingPoint())
3888     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3889                          DAG.getConstant(32, dl, MVT::i32));
3890 
3891   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3892   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3893                                    VT.isInteger() ? GprIndex : FprIndex,
3894                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3895                                                    MVT::i32));
3896 
3897   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3898                               VT.isInteger() ? VAListPtr : FprPtr,
3899                               MachinePointerInfo(SV), MVT::i8);
3900 
3901   // determine if we should load from reg_save_area or overflow_area
3902   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3903 
3904   // increase overflow_area by 4/8 if gpr/fpr > 8
3905   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3906                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
3907                                           dl, MVT::i32));
3908 
3909   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3910                              OverflowAreaPlusN);
3911 
3912   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3913                               MachinePointerInfo(), MVT::i32);
3914 
3915   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3916 }
3917 
3918 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3919   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3920 
3921   // We have to copy the entire va_list struct:
3922   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3923   return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3924                        DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3925                        false, true, /*CI=*/nullptr, std::nullopt,
3926                        MachinePointerInfo(), MachinePointerInfo());
3927 }
3928 
3929 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3930                                                   SelectionDAG &DAG) const {
3931   if (Subtarget.isAIXABI())
3932     report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3933 
3934   return Op.getOperand(0);
3935 }
3936 
3937 SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3938   MachineFunction &MF = DAG.getMachineFunction();
3939   PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3940 
3941   assert((Op.getOpcode() == ISD::INLINEASM ||
3942           Op.getOpcode() == ISD::INLINEASM_BR) &&
3943          "Expecting Inline ASM node.");
3944 
3945   // If an LR store is already known to be required then there is not point in
3946   // checking this ASM as well.
3947   if (MFI.isLRStoreRequired())
3948     return Op;
3949 
3950   // Inline ASM nodes have an optional last operand that is an incoming Flag of
3951   // type MVT::Glue. We want to ignore this last operand if that is the case.
3952   unsigned NumOps = Op.getNumOperands();
3953   if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3954     --NumOps;
3955 
3956   // Check all operands that may contain the LR.
3957   for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3958     const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3959     unsigned NumVals = Flags.getNumOperandRegisters();
3960     ++i; // Skip the ID value.
3961 
3962     switch (Flags.getKind()) {
3963     default:
3964       llvm_unreachable("Bad flags!");
3965     case InlineAsm::Kind::RegUse:
3966     case InlineAsm::Kind::Imm:
3967     case InlineAsm::Kind::Mem:
3968       i += NumVals;
3969       break;
3970     case InlineAsm::Kind::Clobber:
3971     case InlineAsm::Kind::RegDef:
3972     case InlineAsm::Kind::RegDefEarlyClobber: {
3973       for (; NumVals; --NumVals, ++i) {
3974         Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3975         if (Reg != PPC::LR && Reg != PPC::LR8)
3976           continue;
3977         MFI.setLRStoreRequired();
3978         return Op;
3979       }
3980       break;
3981     }
3982     }
3983   }
3984 
3985   return Op;
3986 }
3987 
3988 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3989                                                 SelectionDAG &DAG) const {
3990   if (Subtarget.isAIXABI())
3991     report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3992 
3993   SDValue Chain = Op.getOperand(0);
3994   SDValue Trmp = Op.getOperand(1); // trampoline
3995   SDValue FPtr = Op.getOperand(2); // nested function
3996   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3997   SDLoc dl(Op);
3998 
3999   EVT PtrVT = getPointerTy(DAG.getDataLayout());
4000   bool isPPC64 = (PtrVT == MVT::i64);
4001   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
4002 
4003   TargetLowering::ArgListTy Args;
4004   TargetLowering::ArgListEntry Entry;
4005 
4006   Entry.Ty = IntPtrTy;
4007   Entry.Node = Trmp; Args.push_back(Entry);
4008 
4009   // TrampSize == (isPPC64 ? 48 : 40);
4010   Entry.Node =
4011       DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT());
4012   Args.push_back(Entry);
4013 
4014   Entry.Node = FPtr; Args.push_back(Entry);
4015   Entry.Node = Nest; Args.push_back(Entry);
4016 
4017   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4018   TargetLowering::CallLoweringInfo CLI(DAG);
4019   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4020       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
4021       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4022 
4023   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4024   return CallResult.second;
4025 }
4026 
4027 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4028   MachineFunction &MF = DAG.getMachineFunction();
4029   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4030   EVT PtrVT = getPointerTy(MF.getDataLayout());
4031 
4032   SDLoc dl(Op);
4033 
4034   if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4035     // vastart just stores the address of the VarArgsFrameIndex slot into the
4036     // memory location argument.
4037     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4038     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4039     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4040                         MachinePointerInfo(SV));
4041   }
4042 
4043   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4044   // We suppose the given va_list is already allocated.
4045   //
4046   // typedef struct {
4047   //  char gpr;     /* index into the array of 8 GPRs
4048   //                 * stored in the register save area
4049   //                 * gpr=0 corresponds to r3,
4050   //                 * gpr=1 to r4, etc.
4051   //                 */
4052   //  char fpr;     /* index into the array of 8 FPRs
4053   //                 * stored in the register save area
4054   //                 * fpr=0 corresponds to f1,
4055   //                 * fpr=1 to f2, etc.
4056   //                 */
4057   //  char *overflow_arg_area;
4058   //                /* location on stack that holds
4059   //                 * the next overflow argument
4060   //                 */
4061   //  char *reg_save_area;
4062   //               /* where r3:r10 and f1:f8 (if saved)
4063   //                * are stored
4064   //                */
4065   // } va_list[1];
4066 
4067   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4068   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4069   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4070                                             PtrVT);
4071   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4072                                  PtrVT);
4073 
4074   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4075   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4076 
4077   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4078   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4079 
4080   uint64_t FPROffset = 1;
4081   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4082 
4083   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4084 
4085   // Store first byte : number of int regs
4086   SDValue firstStore =
4087       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4088                         MachinePointerInfo(SV), MVT::i8);
4089   uint64_t nextOffset = FPROffset;
4090   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4091                                   ConstFPROffset);
4092 
4093   // Store second byte : number of float regs
4094   SDValue secondStore =
4095       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4096                         MachinePointerInfo(SV, nextOffset), MVT::i8);
4097   nextOffset += StackOffset;
4098   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4099 
4100   // Store second word : arguments given on stack
4101   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4102                                     MachinePointerInfo(SV, nextOffset));
4103   nextOffset += FrameOffset;
4104   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4105 
4106   // Store third word : arguments given in registers
4107   return DAG.getStore(thirdStore, dl, FR, nextPtr,
4108                       MachinePointerInfo(SV, nextOffset));
4109 }
4110 
4111 /// FPR - The set of FP registers that should be allocated for arguments
4112 /// on Darwin and AIX.
4113 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
4114                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
4115                                 PPC::F11, PPC::F12, PPC::F13};
4116 
4117 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
4118 /// the stack.
4119 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4120                                        unsigned PtrByteSize) {
4121   unsigned ArgSize = ArgVT.getStoreSize();
4122   if (Flags.isByVal())
4123     ArgSize = Flags.getByValSize();
4124 
4125   // Round up to multiples of the pointer size, except for array members,
4126   // which are always packed.
4127   if (!Flags.isInConsecutiveRegs())
4128     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4129 
4130   return ArgSize;
4131 }
4132 
4133 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
4134 /// on the stack.
4135 static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
4136                                          ISD::ArgFlagsTy Flags,
4137                                          unsigned PtrByteSize) {
4138   Align Alignment(PtrByteSize);
4139 
4140   // Altivec parameters are padded to a 16 byte boundary.
4141   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4142       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4143       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4144       ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4145     Alignment = Align(16);
4146 
4147   // ByVal parameters are aligned as requested.
4148   if (Flags.isByVal()) {
4149     auto BVAlign = Flags.getNonZeroByValAlign();
4150     if (BVAlign > PtrByteSize) {
4151       if (BVAlign.value() % PtrByteSize != 0)
4152         llvm_unreachable(
4153             "ByVal alignment is not a multiple of the pointer size");
4154 
4155       Alignment = BVAlign;
4156     }
4157   }
4158 
4159   // Array members are always packed to their original alignment.
4160   if (Flags.isInConsecutiveRegs()) {
4161     // If the array member was split into multiple registers, the first
4162     // needs to be aligned to the size of the full type.  (Except for
4163     // ppcf128, which is only aligned as its f64 components.)
4164     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4165       Alignment = Align(OrigVT.getStoreSize());
4166     else
4167       Alignment = Align(ArgVT.getStoreSize());
4168   }
4169 
4170   return Alignment;
4171 }
4172 
4173 /// CalculateStackSlotUsed - Return whether this argument will use its
4174 /// stack slot (instead of being passed in registers).  ArgOffset,
4175 /// AvailableFPRs, and AvailableVRs must hold the current argument
4176 /// position, and will be updated to account for this argument.
4177 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4178                                    unsigned PtrByteSize, unsigned LinkageSize,
4179                                    unsigned ParamAreaSize, unsigned &ArgOffset,
4180                                    unsigned &AvailableFPRs,
4181                                    unsigned &AvailableVRs) {
4182   bool UseMemory = false;
4183 
4184   // Respect alignment of argument on the stack.
4185   Align Alignment =
4186       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4187   ArgOffset = alignTo(ArgOffset, Alignment);
4188   // If there's no space left in the argument save area, we must
4189   // use memory (this check also catches zero-sized arguments).
4190   if (ArgOffset >= LinkageSize + ParamAreaSize)
4191     UseMemory = true;
4192 
4193   // Allocate argument on the stack.
4194   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4195   if (Flags.isInConsecutiveRegsLast())
4196     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4197   // If we overran the argument save area, we must use memory
4198   // (this check catches arguments passed partially in memory)
4199   if (ArgOffset > LinkageSize + ParamAreaSize)
4200     UseMemory = true;
4201 
4202   // However, if the argument is actually passed in an FPR or a VR,
4203   // we don't use memory after all.
4204   if (!Flags.isByVal()) {
4205     if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4206       if (AvailableFPRs > 0) {
4207         --AvailableFPRs;
4208         return false;
4209       }
4210     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4211         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4212         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4213         ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4214       if (AvailableVRs > 0) {
4215         --AvailableVRs;
4216         return false;
4217       }
4218   }
4219 
4220   return UseMemory;
4221 }
4222 
4223 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
4224 /// ensure minimum alignment required for target.
4225 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
4226                                      unsigned NumBytes) {
4227   return alignTo(NumBytes, Lowering->getStackAlign());
4228 }
4229 
4230 SDValue PPCTargetLowering::LowerFormalArguments(
4231     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4232     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4233     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4234   if (Subtarget.isAIXABI())
4235     return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4236                                     InVals);
4237   if (Subtarget.is64BitELFABI())
4238     return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4239                                        InVals);
4240   assert(Subtarget.is32BitELFABI());
4241   return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4242                                      InVals);
4243 }
4244 
4245 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4246     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4247     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4248     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4249 
4250   // 32-bit SVR4 ABI Stack Frame Layout:
4251   //              +-----------------------------------+
4252   //        +-->  |            Back chain             |
4253   //        |     +-----------------------------------+
4254   //        |     | Floating-point register save area |
4255   //        |     +-----------------------------------+
4256   //        |     |    General register save area     |
4257   //        |     +-----------------------------------+
4258   //        |     |          CR save word             |
4259   //        |     +-----------------------------------+
4260   //        |     |         VRSAVE save word          |
4261   //        |     +-----------------------------------+
4262   //        |     |         Alignment padding         |
4263   //        |     +-----------------------------------+
4264   //        |     |     Vector register save area     |
4265   //        |     +-----------------------------------+
4266   //        |     |       Local variable space        |
4267   //        |     +-----------------------------------+
4268   //        |     |        Parameter list area        |
4269   //        |     +-----------------------------------+
4270   //        |     |           LR save word            |
4271   //        |     +-----------------------------------+
4272   // SP-->  +---  |            Back chain             |
4273   //              +-----------------------------------+
4274   //
4275   // Specifications:
4276   //   System V Application Binary Interface PowerPC Processor Supplement
4277   //   AltiVec Technology Programming Interface Manual
4278 
4279   MachineFunction &MF = DAG.getMachineFunction();
4280   MachineFrameInfo &MFI = MF.getFrameInfo();
4281   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4282 
4283   EVT PtrVT = getPointerTy(MF.getDataLayout());
4284   // Potential tail calls could cause overwriting of argument stack slots.
4285   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4286                        (CallConv == CallingConv::Fast));
4287   const Align PtrAlign(4);
4288 
4289   // Assign locations to all of the incoming arguments.
4290   SmallVector<CCValAssign, 16> ArgLocs;
4291   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4292                  *DAG.getContext());
4293 
4294   // Reserve space for the linkage area on the stack.
4295   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4296   CCInfo.AllocateStack(LinkageSize, PtrAlign);
4297   if (useSoftFloat())
4298     CCInfo.PreAnalyzeFormalArguments(Ins);
4299 
4300   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4301   CCInfo.clearWasPPCF128();
4302 
4303   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4304     CCValAssign &VA = ArgLocs[i];
4305 
4306     // Arguments stored in registers.
4307     if (VA.isRegLoc()) {
4308       const TargetRegisterClass *RC;
4309       EVT ValVT = VA.getValVT();
4310 
4311       switch (ValVT.getSimpleVT().SimpleTy) {
4312         default:
4313           llvm_unreachable("ValVT not supported by formal arguments Lowering");
4314         case MVT::i1:
4315         case MVT::i32:
4316           RC = &PPC::GPRCRegClass;
4317           break;
4318         case MVT::f32:
4319           if (Subtarget.hasP8Vector())
4320             RC = &PPC::VSSRCRegClass;
4321           else if (Subtarget.hasSPE())
4322             RC = &PPC::GPRCRegClass;
4323           else
4324             RC = &PPC::F4RCRegClass;
4325           break;
4326         case MVT::f64:
4327           if (Subtarget.hasVSX())
4328             RC = &PPC::VSFRCRegClass;
4329           else if (Subtarget.hasSPE())
4330             // SPE passes doubles in GPR pairs.
4331             RC = &PPC::GPRCRegClass;
4332           else
4333             RC = &PPC::F8RCRegClass;
4334           break;
4335         case MVT::v16i8:
4336         case MVT::v8i16:
4337         case MVT::v4i32:
4338           RC = &PPC::VRRCRegClass;
4339           break;
4340         case MVT::v4f32:
4341           RC = &PPC::VRRCRegClass;
4342           break;
4343         case MVT::v2f64:
4344         case MVT::v2i64:
4345           RC = &PPC::VRRCRegClass;
4346           break;
4347       }
4348 
4349       SDValue ArgValue;
4350       // Transform the arguments stored in physical registers into
4351       // virtual ones.
4352       if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4353         assert(i + 1 < e && "No second half of double precision argument");
4354         Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4355         Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4356         SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4357         SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4358         if (!Subtarget.isLittleEndian())
4359           std::swap (ArgValueLo, ArgValueHi);
4360         ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4361                                ArgValueHi);
4362       } else {
4363         Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4364         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4365                                       ValVT == MVT::i1 ? MVT::i32 : ValVT);
4366         if (ValVT == MVT::i1)
4367           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4368       }
4369 
4370       InVals.push_back(ArgValue);
4371     } else {
4372       // Argument stored in memory.
4373       assert(VA.isMemLoc());
4374 
4375       // Get the extended size of the argument type in stack
4376       unsigned ArgSize = VA.getLocVT().getStoreSize();
4377       // Get the actual size of the argument type
4378       unsigned ObjSize = VA.getValVT().getStoreSize();
4379       unsigned ArgOffset = VA.getLocMemOffset();
4380       // Stack objects in PPC32 are right justified.
4381       ArgOffset += ArgSize - ObjSize;
4382       int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4383 
4384       // Create load nodes to retrieve arguments from the stack.
4385       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4386       InVals.push_back(
4387           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4388     }
4389   }
4390 
4391   // Assign locations to all of the incoming aggregate by value arguments.
4392   // Aggregates passed by value are stored in the local variable space of the
4393   // caller's stack frame, right above the parameter list area.
4394   SmallVector<CCValAssign, 16> ByValArgLocs;
4395   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4396                       ByValArgLocs, *DAG.getContext());
4397 
4398   // Reserve stack space for the allocations in CCInfo.
4399   CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4400 
4401   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4402 
4403   // Area that is at least reserved in the caller of this function.
4404   unsigned MinReservedArea = CCByValInfo.getStackSize();
4405   MinReservedArea = std::max(MinReservedArea, LinkageSize);
4406 
4407   // Set the size that is at least reserved in caller of this function.  Tail
4408   // call optimized function's reserved stack space needs to be aligned so that
4409   // taking the difference between two stack areas will result in an aligned
4410   // stack.
4411   MinReservedArea =
4412       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4413   FuncInfo->setMinReservedArea(MinReservedArea);
4414 
4415   SmallVector<SDValue, 8> MemOps;
4416 
4417   // If the function takes variable number of arguments, make a frame index for
4418   // the start of the first vararg value... for expansion of llvm.va_start.
4419   if (isVarArg) {
4420     static const MCPhysReg GPArgRegs[] = {
4421       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4422       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4423     };
4424     const unsigned NumGPArgRegs = std::size(GPArgRegs);
4425 
4426     static const MCPhysReg FPArgRegs[] = {
4427       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4428       PPC::F8
4429     };
4430     unsigned NumFPArgRegs = std::size(FPArgRegs);
4431 
4432     if (useSoftFloat() || hasSPE())
4433        NumFPArgRegs = 0;
4434 
4435     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4436     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4437 
4438     // Make room for NumGPArgRegs and NumFPArgRegs.
4439     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4440                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4441 
4442     FuncInfo->setVarArgsStackOffset(MFI.CreateFixedObject(
4443         PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4444 
4445     FuncInfo->setVarArgsFrameIndex(
4446         MFI.CreateStackObject(Depth, Align(8), false));
4447     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4448 
4449     // The fixed integer arguments of a variadic function are stored to the
4450     // VarArgsFrameIndex on the stack so that they may be loaded by
4451     // dereferencing the result of va_next.
4452     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4453       // Get an existing live-in vreg, or add a new one.
4454       Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4455       if (!VReg)
4456         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4457 
4458       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4459       SDValue Store =
4460           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4461       MemOps.push_back(Store);
4462       // Increment the address by four for the next argument to store
4463       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4464       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4465     }
4466 
4467     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4468     // is set.
4469     // The double arguments are stored to the VarArgsFrameIndex
4470     // on the stack.
4471     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4472       // Get an existing live-in vreg, or add a new one.
4473       Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4474       if (!VReg)
4475         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4476 
4477       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4478       SDValue Store =
4479           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4480       MemOps.push_back(Store);
4481       // Increment the address by eight for the next argument to store
4482       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4483                                          PtrVT);
4484       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4485     }
4486   }
4487 
4488   if (!MemOps.empty())
4489     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4490 
4491   return Chain;
4492 }
4493 
4494 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4495 // value to MVT::i64 and then truncate to the correct register size.
4496 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4497                                              EVT ObjectVT, SelectionDAG &DAG,
4498                                              SDValue ArgVal,
4499                                              const SDLoc &dl) const {
4500   if (Flags.isSExt())
4501     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4502                          DAG.getValueType(ObjectVT));
4503   else if (Flags.isZExt())
4504     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4505                          DAG.getValueType(ObjectVT));
4506 
4507   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4508 }
4509 
4510 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4511     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4512     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4513     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4514   // TODO: add description of PPC stack frame format, or at least some docs.
4515   //
4516   bool isELFv2ABI = Subtarget.isELFv2ABI();
4517   bool isLittleEndian = Subtarget.isLittleEndian();
4518   MachineFunction &MF = DAG.getMachineFunction();
4519   MachineFrameInfo &MFI = MF.getFrameInfo();
4520   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4521 
4522   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4523          "fastcc not supported on varargs functions");
4524 
4525   EVT PtrVT = getPointerTy(MF.getDataLayout());
4526   // Potential tail calls could cause overwriting of argument stack slots.
4527   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4528                        (CallConv == CallingConv::Fast));
4529   unsigned PtrByteSize = 8;
4530   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4531 
4532   static const MCPhysReg GPR[] = {
4533     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4534     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4535   };
4536   static const MCPhysReg VR[] = {
4537     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4538     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4539   };
4540 
4541   const unsigned Num_GPR_Regs = std::size(GPR);
4542   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4543   const unsigned Num_VR_Regs = std::size(VR);
4544 
4545   // Do a first pass over the arguments to determine whether the ABI
4546   // guarantees that our caller has allocated the parameter save area
4547   // on its stack frame.  In the ELFv1 ABI, this is always the case;
4548   // in the ELFv2 ABI, it is true if this is a vararg function or if
4549   // any parameter is located in a stack slot.
4550 
4551   bool HasParameterArea = !isELFv2ABI || isVarArg;
4552   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4553   unsigned NumBytes = LinkageSize;
4554   unsigned AvailableFPRs = Num_FPR_Regs;
4555   unsigned AvailableVRs = Num_VR_Regs;
4556   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4557     if (Ins[i].Flags.isNest())
4558       continue;
4559 
4560     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4561                                PtrByteSize, LinkageSize, ParamAreaSize,
4562                                NumBytes, AvailableFPRs, AvailableVRs))
4563       HasParameterArea = true;
4564   }
4565 
4566   // Add DAG nodes to load the arguments or copy them out of registers.  On
4567   // entry to a function on PPC, the arguments start after the linkage area,
4568   // although the first ones are often in registers.
4569 
4570   unsigned ArgOffset = LinkageSize;
4571   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4572   SmallVector<SDValue, 8> MemOps;
4573   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4574   unsigned CurArgIdx = 0;
4575   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4576     SDValue ArgVal;
4577     bool needsLoad = false;
4578     EVT ObjectVT = Ins[ArgNo].VT;
4579     EVT OrigVT = Ins[ArgNo].ArgVT;
4580     unsigned ObjSize = ObjectVT.getStoreSize();
4581     unsigned ArgSize = ObjSize;
4582     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4583     if (Ins[ArgNo].isOrigArg()) {
4584       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4585       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4586     }
4587     // We re-align the argument offset for each argument, except when using the
4588     // fast calling convention, when we need to make sure we do that only when
4589     // we'll actually use a stack slot.
4590     unsigned CurArgOffset;
4591     Align Alignment;
4592     auto ComputeArgOffset = [&]() {
4593       /* Respect alignment of argument on the stack.  */
4594       Alignment =
4595           CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4596       ArgOffset = alignTo(ArgOffset, Alignment);
4597       CurArgOffset = ArgOffset;
4598     };
4599 
4600     if (CallConv != CallingConv::Fast) {
4601       ComputeArgOffset();
4602 
4603       /* Compute GPR index associated with argument offset.  */
4604       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4605       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4606     }
4607 
4608     // FIXME the codegen can be much improved in some cases.
4609     // We do not have to keep everything in memory.
4610     if (Flags.isByVal()) {
4611       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4612 
4613       if (CallConv == CallingConv::Fast)
4614         ComputeArgOffset();
4615 
4616       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4617       ObjSize = Flags.getByValSize();
4618       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4619       // Empty aggregate parameters do not take up registers.  Examples:
4620       //   struct { } a;
4621       //   union  { } b;
4622       //   int c[0];
4623       // etc.  However, we have to provide a place-holder in InVals, so
4624       // pretend we have an 8-byte item at the current address for that
4625       // purpose.
4626       if (!ObjSize) {
4627         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4628         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4629         InVals.push_back(FIN);
4630         continue;
4631       }
4632 
4633       // Create a stack object covering all stack doublewords occupied
4634       // by the argument.  If the argument is (fully or partially) on
4635       // the stack, or if the argument is fully in registers but the
4636       // caller has allocated the parameter save anyway, we can refer
4637       // directly to the caller's stack frame.  Otherwise, create a
4638       // local copy in our own frame.
4639       int FI;
4640       if (HasParameterArea ||
4641           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4642         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4643       else
4644         FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4645       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4646 
4647       // Handle aggregates smaller than 8 bytes.
4648       if (ObjSize < PtrByteSize) {
4649         // The value of the object is its address, which differs from the
4650         // address of the enclosing doubleword on big-endian systems.
4651         SDValue Arg = FIN;
4652         if (!isLittleEndian) {
4653           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4654           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4655         }
4656         InVals.push_back(Arg);
4657 
4658         if (GPR_idx != Num_GPR_Regs) {
4659           Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4660           FuncInfo->addLiveInAttr(VReg, Flags);
4661           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4662           EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4663           SDValue Store =
4664               DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4665                                 MachinePointerInfo(&*FuncArg), ObjType);
4666           MemOps.push_back(Store);
4667         }
4668         // Whether we copied from a register or not, advance the offset
4669         // into the parameter save area by a full doubleword.
4670         ArgOffset += PtrByteSize;
4671         continue;
4672       }
4673 
4674       // The value of the object is its address, which is the address of
4675       // its first stack doubleword.
4676       InVals.push_back(FIN);
4677 
4678       // Store whatever pieces of the object are in registers to memory.
4679       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4680         if (GPR_idx == Num_GPR_Regs)
4681           break;
4682 
4683         Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4684         FuncInfo->addLiveInAttr(VReg, Flags);
4685         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4686         SDValue Addr = FIN;
4687         if (j) {
4688           SDValue Off = DAG.getConstant(j, dl, PtrVT);
4689           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4690         }
4691         unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4692         EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4693         SDValue Store =
4694             DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4695                               MachinePointerInfo(&*FuncArg, j), ObjType);
4696         MemOps.push_back(Store);
4697         ++GPR_idx;
4698       }
4699       ArgOffset += ArgSize;
4700       continue;
4701     }
4702 
4703     switch (ObjectVT.getSimpleVT().SimpleTy) {
4704     default: llvm_unreachable("Unhandled argument type!");
4705     case MVT::i1:
4706     case MVT::i32:
4707     case MVT::i64:
4708       if (Flags.isNest()) {
4709         // The 'nest' parameter, if any, is passed in R11.
4710         Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4711         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4712 
4713         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4714           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4715 
4716         break;
4717       }
4718 
4719       // These can be scalar arguments or elements of an integer array type
4720       // passed directly.  Clang may use those instead of "byval" aggregate
4721       // types to avoid forcing arguments to memory unnecessarily.
4722       if (GPR_idx != Num_GPR_Regs) {
4723         Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4724         FuncInfo->addLiveInAttr(VReg, Flags);
4725         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4726 
4727         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4728           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4729           // value to MVT::i64 and then truncate to the correct register size.
4730           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4731       } else {
4732         if (CallConv == CallingConv::Fast)
4733           ComputeArgOffset();
4734 
4735         needsLoad = true;
4736         ArgSize = PtrByteSize;
4737       }
4738       if (CallConv != CallingConv::Fast || needsLoad)
4739         ArgOffset += 8;
4740       break;
4741 
4742     case MVT::f32:
4743     case MVT::f64:
4744       // These can be scalar arguments or elements of a float array type
4745       // passed directly.  The latter are used to implement ELFv2 homogenous
4746       // float aggregates.
4747       if (FPR_idx != Num_FPR_Regs) {
4748         unsigned VReg;
4749 
4750         if (ObjectVT == MVT::f32)
4751           VReg = MF.addLiveIn(FPR[FPR_idx],
4752                               Subtarget.hasP8Vector()
4753                                   ? &PPC::VSSRCRegClass
4754                                   : &PPC::F4RCRegClass);
4755         else
4756           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4757                                                 ? &PPC::VSFRCRegClass
4758                                                 : &PPC::F8RCRegClass);
4759 
4760         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4761         ++FPR_idx;
4762       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4763         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4764         // once we support fp <-> gpr moves.
4765 
4766         // This can only ever happen in the presence of f32 array types,
4767         // since otherwise we never run out of FPRs before running out
4768         // of GPRs.
4769         Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4770         FuncInfo->addLiveInAttr(VReg, Flags);
4771         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4772 
4773         if (ObjectVT == MVT::f32) {
4774           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4775             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4776                                  DAG.getConstant(32, dl, MVT::i32));
4777           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4778         }
4779 
4780         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4781       } else {
4782         if (CallConv == CallingConv::Fast)
4783           ComputeArgOffset();
4784 
4785         needsLoad = true;
4786       }
4787 
4788       // When passing an array of floats, the array occupies consecutive
4789       // space in the argument area; only round up to the next doubleword
4790       // at the end of the array.  Otherwise, each float takes 8 bytes.
4791       if (CallConv != CallingConv::Fast || needsLoad) {
4792         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4793         ArgOffset += ArgSize;
4794         if (Flags.isInConsecutiveRegsLast())
4795           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4796       }
4797       break;
4798     case MVT::v4f32:
4799     case MVT::v4i32:
4800     case MVT::v8i16:
4801     case MVT::v16i8:
4802     case MVT::v2f64:
4803     case MVT::v2i64:
4804     case MVT::v1i128:
4805     case MVT::f128:
4806       // These can be scalar arguments or elements of a vector array type
4807       // passed directly.  The latter are used to implement ELFv2 homogenous
4808       // vector aggregates.
4809       if (VR_idx != Num_VR_Regs) {
4810         Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4811         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4812         ++VR_idx;
4813       } else {
4814         if (CallConv == CallingConv::Fast)
4815           ComputeArgOffset();
4816         needsLoad = true;
4817       }
4818       if (CallConv != CallingConv::Fast || needsLoad)
4819         ArgOffset += 16;
4820       break;
4821     }
4822 
4823     // We need to load the argument to a virtual register if we determined
4824     // above that we ran out of physical registers of the appropriate type.
4825     if (needsLoad) {
4826       if (ObjSize < ArgSize && !isLittleEndian)
4827         CurArgOffset += ArgSize - ObjSize;
4828       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4829       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4830       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4831     }
4832 
4833     InVals.push_back(ArgVal);
4834   }
4835 
4836   // Area that is at least reserved in the caller of this function.
4837   unsigned MinReservedArea;
4838   if (HasParameterArea)
4839     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4840   else
4841     MinReservedArea = LinkageSize;
4842 
4843   // Set the size that is at least reserved in caller of this function.  Tail
4844   // call optimized functions' reserved stack space needs to be aligned so that
4845   // taking the difference between two stack areas will result in an aligned
4846   // stack.
4847   MinReservedArea =
4848       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4849   FuncInfo->setMinReservedArea(MinReservedArea);
4850 
4851   // If the function takes variable number of arguments, make a frame index for
4852   // the start of the first vararg value... for expansion of llvm.va_start.
4853   // On ELFv2ABI spec, it writes:
4854   // C programs that are intended to be *portable* across different compilers
4855   // and architectures must use the header file <stdarg.h> to deal with variable
4856   // argument lists.
4857   if (isVarArg && MFI.hasVAStart()) {
4858     int Depth = ArgOffset;
4859 
4860     FuncInfo->setVarArgsFrameIndex(
4861       MFI.CreateFixedObject(PtrByteSize, Depth, true));
4862     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4863 
4864     // If this function is vararg, store any remaining integer argument regs
4865     // to their spots on the stack so that they may be loaded by dereferencing
4866     // the result of va_next.
4867     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4868          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4869       Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4870       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4871       SDValue Store =
4872           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4873       MemOps.push_back(Store);
4874       // Increment the address by four for the next argument to store
4875       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4876       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4877     }
4878   }
4879 
4880   if (!MemOps.empty())
4881     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4882 
4883   return Chain;
4884 }
4885 
4886 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4887 /// adjusted to accommodate the arguments for the tailcall.
4888 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4889                                    unsigned ParamSize) {
4890 
4891   if (!isTailCall) return 0;
4892 
4893   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4894   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4895   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4896   // Remember only if the new adjustment is bigger.
4897   if (SPDiff < FI->getTailCallSPDelta())
4898     FI->setTailCallSPDelta(SPDiff);
4899 
4900   return SPDiff;
4901 }
4902 
4903 static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4904 
4905 static bool callsShareTOCBase(const Function *Caller,
4906                               const GlobalValue *CalleeGV,
4907                               const TargetMachine &TM) {
4908   // It does not make sense to call callsShareTOCBase() with a caller that
4909   // is PC Relative since PC Relative callers do not have a TOC.
4910 #ifndef NDEBUG
4911   const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4912   assert(!STICaller->isUsingPCRelativeCalls() &&
4913          "PC Relative callers do not have a TOC and cannot share a TOC Base");
4914 #endif
4915 
4916   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4917   // don't have enough information to determine if the caller and callee share
4918   // the same  TOC base, so we have to pessimistically assume they don't for
4919   // correctness.
4920   if (!CalleeGV)
4921     return false;
4922 
4923   // If the callee is preemptable, then the static linker will use a plt-stub
4924   // which saves the toc to the stack, and needs a nop after the call
4925   // instruction to convert to a toc-restore.
4926   if (!TM.shouldAssumeDSOLocal(CalleeGV))
4927     return false;
4928 
4929   // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4930   // We may need a TOC restore in the situation where the caller requires a
4931   // valid TOC but the callee is PC Relative and does not.
4932   const Function *F = dyn_cast<Function>(CalleeGV);
4933   const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4934 
4935   // If we have an Alias we can try to get the function from there.
4936   if (Alias) {
4937     const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4938     F = dyn_cast<Function>(GlobalObj);
4939   }
4940 
4941   // If we still have no valid function pointer we do not have enough
4942   // information to determine if the callee uses PC Relative calls so we must
4943   // assume that it does.
4944   if (!F)
4945     return false;
4946 
4947   // If the callee uses PC Relative we cannot guarantee that the callee won't
4948   // clobber the TOC of the caller and so we must assume that the two
4949   // functions do not share a TOC base.
4950   const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4951   if (STICallee->isUsingPCRelativeCalls())
4952     return false;
4953 
4954   // If the GV is not a strong definition then we need to assume it can be
4955   // replaced by another function at link time. The function that replaces
4956   // it may not share the same TOC as the caller since the callee may be
4957   // replaced by a PC Relative version of the same function.
4958   if (!CalleeGV->isStrongDefinitionForLinker())
4959     return false;
4960 
4961   // The medium and large code models are expected to provide a sufficiently
4962   // large TOC to provide all data addressing needs of a module with a
4963   // single TOC.
4964   if (CodeModel::Medium == TM.getCodeModel() ||
4965       CodeModel::Large == TM.getCodeModel())
4966     return true;
4967 
4968   // Any explicitly-specified sections and section prefixes must also match.
4969   // Also, if we're using -ffunction-sections, then each function is always in
4970   // a different section (the same is true for COMDAT functions).
4971   if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4972       Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4973     return false;
4974   if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4975     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4976       return false;
4977   }
4978 
4979   return true;
4980 }
4981 
4982 static bool
4983 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4984                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4985   assert(Subtarget.is64BitELFABI());
4986 
4987   const unsigned PtrByteSize = 8;
4988   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4989 
4990   static const MCPhysReg GPR[] = {
4991     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4992     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4993   };
4994   static const MCPhysReg VR[] = {
4995     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4996     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4997   };
4998 
4999   const unsigned NumGPRs = std::size(GPR);
5000   const unsigned NumFPRs = 13;
5001   const unsigned NumVRs = std::size(VR);
5002   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5003 
5004   unsigned NumBytes = LinkageSize;
5005   unsigned AvailableFPRs = NumFPRs;
5006   unsigned AvailableVRs = NumVRs;
5007 
5008   for (const ISD::OutputArg& Param : Outs) {
5009     if (Param.Flags.isNest()) continue;
5010 
5011     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
5012                                LinkageSize, ParamAreaSize, NumBytes,
5013                                AvailableFPRs, AvailableVRs))
5014       return true;
5015   }
5016   return false;
5017 }
5018 
5019 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5020   if (CB.arg_size() != CallerFn->arg_size())
5021     return false;
5022 
5023   auto CalleeArgIter = CB.arg_begin();
5024   auto CalleeArgEnd = CB.arg_end();
5025   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5026 
5027   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5028     const Value* CalleeArg = *CalleeArgIter;
5029     const Value* CallerArg = &(*CallerArgIter);
5030     if (CalleeArg == CallerArg)
5031       continue;
5032 
5033     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5034     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
5035     //      }
5036     // 1st argument of callee is undef and has the same type as caller.
5037     if (CalleeArg->getType() == CallerArg->getType() &&
5038         isa<UndefValue>(CalleeArg))
5039       continue;
5040 
5041     return false;
5042   }
5043 
5044   return true;
5045 }
5046 
5047 // Returns true if TCO is possible between the callers and callees
5048 // calling conventions.
5049 static bool
5050 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
5051                                     CallingConv::ID CalleeCC) {
5052   // Tail calls are possible with fastcc and ccc.
5053   auto isTailCallableCC  = [] (CallingConv::ID CC){
5054       return  CC == CallingConv::C || CC == CallingConv::Fast;
5055   };
5056   if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5057     return false;
5058 
5059   // We can safely tail call both fastcc and ccc callees from a c calling
5060   // convention caller. If the caller is fastcc, we may have less stack space
5061   // than a non-fastcc caller with the same signature so disable tail-calls in
5062   // that case.
5063   return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5064 }
5065 
5066 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5067     const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5068     CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5069     const SmallVectorImpl<ISD::OutputArg> &Outs,
5070     const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5071     bool isCalleeExternalSymbol) const {
5072   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5073 
5074   if (DisableSCO && !TailCallOpt) return false;
5075 
5076   // Variadic argument functions are not supported.
5077   if (isVarArg) return false;
5078 
5079   // Check that the calling conventions are compatible for tco.
5080   if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5081     return false;
5082 
5083   // Caller contains any byval parameter is not supported.
5084   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5085     return false;
5086 
5087   // Callee contains any byval parameter is not supported, too.
5088   // Note: This is a quick work around, because in some cases, e.g.
5089   // caller's stack size > callee's stack size, we are still able to apply
5090   // sibling call optimization. For example, gcc is able to do SCO for caller1
5091   // in the following example, but not for caller2.
5092   //   struct test {
5093   //     long int a;
5094   //     char ary[56];
5095   //   } gTest;
5096   //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
5097   //     b->a = v.a;
5098   //     return 0;
5099   //   }
5100   //   void caller1(struct test a, struct test c, struct test *b) {
5101   //     callee(gTest, b); }
5102   //   void caller2(struct test *b) { callee(gTest, b); }
5103   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5104     return false;
5105 
5106   // If callee and caller use different calling conventions, we cannot pass
5107   // parameters on stack since offsets for the parameter area may be different.
5108   if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5109     return false;
5110 
5111   // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5112   // the caller and callee share the same TOC for TCO/SCO. If the caller and
5113   // callee potentially have different TOC bases then we cannot tail call since
5114   // we need to restore the TOC pointer after the call.
5115   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5116   // We cannot guarantee this for indirect calls or calls to external functions.
5117   // When PC-Relative addressing is used, the concept of the TOC is no longer
5118   // applicable so this check is not required.
5119   // Check first for indirect calls.
5120   if (!Subtarget.isUsingPCRelativeCalls() &&
5121       !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5122     return false;
5123 
5124   // Check if we share the TOC base.
5125   if (!Subtarget.isUsingPCRelativeCalls() &&
5126       !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5127     return false;
5128 
5129   // TCO allows altering callee ABI, so we don't have to check further.
5130   if (CalleeCC == CallingConv::Fast && TailCallOpt)
5131     return true;
5132 
5133   if (DisableSCO) return false;
5134 
5135   // If callee use the same argument list that caller is using, then we can
5136   // apply SCO on this case. If it is not, then we need to check if callee needs
5137   // stack for passing arguments.
5138   // PC Relative tail calls may not have a CallBase.
5139   // If there is no CallBase we cannot verify if we have the same argument
5140   // list so assume that we don't have the same argument list.
5141   if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5142       needStackSlotPassParameters(Subtarget, Outs))
5143     return false;
5144   else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5145     return false;
5146 
5147   return true;
5148 }
5149 
5150 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
5151 /// for tail call optimization. Targets which want to do tail call
5152 /// optimization should implement this function.
5153 bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5154     const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5155     CallingConv::ID CallerCC, bool isVarArg,
5156     const SmallVectorImpl<ISD::InputArg> &Ins) const {
5157   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5158     return false;
5159 
5160   // Variable argument functions are not supported.
5161   if (isVarArg)
5162     return false;
5163 
5164   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5165     // Functions containing by val parameters are not supported.
5166     if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5167       return false;
5168 
5169     // Non-PIC/GOT tail calls are supported.
5170     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5171       return true;
5172 
5173     // At the moment we can only do local tail calls (in same module, hidden
5174     // or protected) if we are generating PIC.
5175     if (CalleeGV)
5176       return CalleeGV->hasHiddenVisibility() ||
5177              CalleeGV->hasProtectedVisibility();
5178   }
5179 
5180   return false;
5181 }
5182 
5183 /// isCallCompatibleAddress - Return the immediate to use if the specified
5184 /// 32-bit value is representable in the immediate field of a BxA instruction.
5185 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
5186   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5187   if (!C) return nullptr;
5188 
5189   int Addr = C->getZExtValue();
5190   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
5191       SignExtend32<26>(Addr) != Addr)
5192     return nullptr;  // Top 6 bits have to be sext of immediate.
5193 
5194   return DAG
5195       .getSignedConstant(
5196           (int)C->getZExtValue() >> 2, SDLoc(Op),
5197           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
5198       .getNode();
5199 }
5200 
5201 namespace {
5202 
5203 struct TailCallArgumentInfo {
5204   SDValue Arg;
5205   SDValue FrameIdxOp;
5206   int FrameIdx = 0;
5207 
5208   TailCallArgumentInfo() = default;
5209 };
5210 
5211 } // end anonymous namespace
5212 
5213 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5214 static void StoreTailCallArgumentsToStackSlot(
5215     SelectionDAG &DAG, SDValue Chain,
5216     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5217     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5218   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5219     SDValue Arg = TailCallArgs[i].Arg;
5220     SDValue FIN = TailCallArgs[i].FrameIdxOp;
5221     int FI = TailCallArgs[i].FrameIdx;
5222     // Store relative to framepointer.
5223     MemOpChains.push_back(DAG.getStore(
5224         Chain, dl, Arg, FIN,
5225         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
5226   }
5227 }
5228 
5229 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5230 /// the appropriate stack slot for the tail call optimized function call.
5231 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
5232                                              SDValue OldRetAddr, SDValue OldFP,
5233                                              int SPDiff, const SDLoc &dl) {
5234   if (SPDiff) {
5235     // Calculate the new stack slot for the return address.
5236     MachineFunction &MF = DAG.getMachineFunction();
5237     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5238     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5239     int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5240     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5241     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5242                                                          NewRetAddrLoc, true);
5243     SDValue NewRetAddrFrIdx =
5244         DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5245     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5246                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5247   }
5248   return Chain;
5249 }
5250 
5251 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5252 /// the position of the argument.
5253 static void CalculateTailCallArgDest(
5254     SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5255     int SPDiff, unsigned ArgOffset,
5256     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5257   int Offset = ArgOffset + SPDiff;
5258   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5259   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5260   EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5261   SDValue FIN = DAG.getFrameIndex(FI, VT);
5262   TailCallArgumentInfo Info;
5263   Info.Arg = Arg;
5264   Info.FrameIdxOp = FIN;
5265   Info.FrameIdx = FI;
5266   TailCallArguments.push_back(Info);
5267 }
5268 
5269 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5270 /// stack slot. Returns the chain as result and the loaded frame pointers in
5271 /// LROpOut/FPOpout. Used when tail calling.
5272 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5273     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5274     SDValue &FPOpOut, const SDLoc &dl) const {
5275   if (SPDiff) {
5276     // Load the LR and FP stack slot for later adjusting.
5277     LROpOut = getReturnAddrFrameIndex(DAG);
5278     LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5279                           MachinePointerInfo());
5280     Chain = SDValue(LROpOut.getNode(), 1);
5281   }
5282   return Chain;
5283 }
5284 
5285 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5286 /// by "Src" to address "Dst" of size "Size".  Alignment information is
5287 /// specified by the specific parameter attribute. The copy will be passed as
5288 /// a byval function parameter.
5289 /// Sometimes what we are copying is the end of a larger object, the part that
5290 /// does not fit in registers.
5291 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5292                                          SDValue Chain, ISD::ArgFlagsTy Flags,
5293                                          SelectionDAG &DAG, const SDLoc &dl) {
5294   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5295   return DAG.getMemcpy(
5296       Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5297       /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5298 }
5299 
5300 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5301 /// tail calls.
5302 static void LowerMemOpCallTo(
5303     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5304     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5305     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5306     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5307   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5308   if (!isTailCall) {
5309     if (isVector) {
5310       SDValue StackPtr;
5311       if (isPPC64)
5312         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5313       else
5314         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5315       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5316                            DAG.getConstant(ArgOffset, dl, PtrVT));
5317     }
5318     MemOpChains.push_back(
5319         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5320     // Calculate and remember argument location.
5321   } else
5322     CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5323                              TailCallArguments);
5324 }
5325 
5326 static void
5327 PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain,
5328                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5329                 SDValue FPOp,
5330                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5331   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5332   // might overwrite each other in case of tail call optimization.
5333   SmallVector<SDValue, 8> MemOpChains2;
5334   // Do not flag preceding copytoreg stuff together with the following stuff.
5335   InGlue = SDValue();
5336   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5337                                     MemOpChains2, dl);
5338   if (!MemOpChains2.empty())
5339     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5340 
5341   // Store the return address to the appropriate stack slot.
5342   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5343 
5344   // Emit callseq_end just before tailcall node.
5345   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5346   InGlue = Chain.getValue(1);
5347 }
5348 
5349 // Is this global address that of a function that can be called by name? (as
5350 // opposed to something that must hold a descriptor for an indirect call).
5351 static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5352   if (GV) {
5353     if (GV->isThreadLocal())
5354       return false;
5355 
5356     return GV->getValueType()->isFunctionTy();
5357   }
5358 
5359   return false;
5360 }
5361 
5362 SDValue PPCTargetLowering::LowerCallResult(
5363     SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5364     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5365     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5366   SmallVector<CCValAssign, 16> RVLocs;
5367   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5368                     *DAG.getContext());
5369 
5370   CCRetInfo.AnalyzeCallResult(
5371       Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5372                ? RetCC_PPC_Cold
5373                : RetCC_PPC);
5374 
5375   // Copy all of the result registers out of their specified physreg.
5376   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5377     CCValAssign &VA = RVLocs[i];
5378     assert(VA.isRegLoc() && "Can only return in registers!");
5379 
5380     SDValue Val;
5381 
5382     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5383       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5384                                       InGlue);
5385       Chain = Lo.getValue(1);
5386       InGlue = Lo.getValue(2);
5387       VA = RVLocs[++i]; // skip ahead to next loc
5388       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5389                                       InGlue);
5390       Chain = Hi.getValue(1);
5391       InGlue = Hi.getValue(2);
5392       if (!Subtarget.isLittleEndian())
5393         std::swap (Lo, Hi);
5394       Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5395     } else {
5396       Val = DAG.getCopyFromReg(Chain, dl,
5397                                VA.getLocReg(), VA.getLocVT(), InGlue);
5398       Chain = Val.getValue(1);
5399       InGlue = Val.getValue(2);
5400     }
5401 
5402     switch (VA.getLocInfo()) {
5403     default: llvm_unreachable("Unknown loc info!");
5404     case CCValAssign::Full: break;
5405     case CCValAssign::AExt:
5406       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5407       break;
5408     case CCValAssign::ZExt:
5409       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5410                         DAG.getValueType(VA.getValVT()));
5411       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5412       break;
5413     case CCValAssign::SExt:
5414       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5415                         DAG.getValueType(VA.getValVT()));
5416       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5417       break;
5418     }
5419 
5420     InVals.push_back(Val);
5421   }
5422 
5423   return Chain;
5424 }
5425 
5426 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5427                            const PPCSubtarget &Subtarget, bool isPatchPoint) {
5428   auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5429   const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5430 
5431   // PatchPoint calls are not indirect.
5432   if (isPatchPoint)
5433     return false;
5434 
5435   if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5436     return false;
5437 
5438   // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5439   // becuase the immediate function pointer points to a descriptor instead of
5440   // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5441   // pointer immediate points to the global entry point, while the BLA would
5442   // need to jump to the local entry point (see rL211174).
5443   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5444       isBLACompatibleAddress(Callee, DAG))
5445     return false;
5446 
5447   return true;
5448 }
5449 
5450 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5451 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5452   return Subtarget.isAIXABI() ||
5453          (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5454 }
5455 
5456 static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5457                               const Function &Caller, const SDValue &Callee,
5458                               const PPCSubtarget &Subtarget,
5459                               const TargetMachine &TM,
5460                               bool IsStrictFPCall = false) {
5461   if (CFlags.IsTailCall)
5462     return PPCISD::TC_RETURN;
5463 
5464   unsigned RetOpc = 0;
5465   // This is a call through a function pointer.
5466   if (CFlags.IsIndirect) {
5467     // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5468     // indirect calls. The save of the caller's TOC pointer to the stack will be
5469     // inserted into the DAG as part of call lowering. The restore of the TOC
5470     // pointer is modeled by using a pseudo instruction for the call opcode that
5471     // represents the 2 instruction sequence of an indirect branch and link,
5472     // immediately followed by a load of the TOC pointer from the stack save
5473     // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5474     // as it is not saved or used.
5475     RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5476                                                  : PPCISD::BCTRL;
5477   } else if (Subtarget.isUsingPCRelativeCalls()) {
5478     assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5479     RetOpc = PPCISD::CALL_NOTOC;
5480   } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5481     // The ABIs that maintain a TOC pointer accross calls need to have a nop
5482     // immediately following the call instruction if the caller and callee may
5483     // have different TOC bases. At link time if the linker determines the calls
5484     // may not share a TOC base, the call is redirected to a trampoline inserted
5485     // by the linker. The trampoline will (among other things) save the callers
5486     // TOC pointer at an ABI designated offset in the linkage area and the
5487     // linker will rewrite the nop to be a load of the TOC pointer from the
5488     // linkage area into gpr2.
5489     auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5490     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5491     RetOpc =
5492         callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5493   } else
5494     RetOpc = PPCISD::CALL;
5495   if (IsStrictFPCall) {
5496     switch (RetOpc) {
5497     default:
5498       llvm_unreachable("Unknown call opcode");
5499     case PPCISD::BCTRL_LOAD_TOC:
5500       RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5501       break;
5502     case PPCISD::BCTRL:
5503       RetOpc = PPCISD::BCTRL_RM;
5504       break;
5505     case PPCISD::CALL_NOTOC:
5506       RetOpc = PPCISD::CALL_NOTOC_RM;
5507       break;
5508     case PPCISD::CALL:
5509       RetOpc = PPCISD::CALL_RM;
5510       break;
5511     case PPCISD::CALL_NOP:
5512       RetOpc = PPCISD::CALL_NOP_RM;
5513       break;
5514     }
5515   }
5516   return RetOpc;
5517 }
5518 
5519 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5520                                const SDLoc &dl, const PPCSubtarget &Subtarget) {
5521   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5522     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5523       return SDValue(Dest, 0);
5524 
5525   // Returns true if the callee is local, and false otherwise.
5526   auto isLocalCallee = [&]() {
5527     const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5528     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5529 
5530     return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5531            !isa_and_nonnull<GlobalIFunc>(GV);
5532   };
5533 
5534   // The PLT is only used in 32-bit ELF PIC mode.  Attempting to use the PLT in
5535   // a static relocation model causes some versions of GNU LD (2.17.50, at
5536   // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5537   // built with secure-PLT.
5538   bool UsePlt =
5539       Subtarget.is32BitELFABI() && !isLocalCallee() &&
5540       Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5541 
5542   const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5543     const TargetMachine &TM = Subtarget.getTargetMachine();
5544     const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5545     MCSymbolXCOFF *S =
5546         cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5547 
5548     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5549     return DAG.getMCSymbol(S, PtrVT);
5550   };
5551 
5552   auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5553   const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5554   if (isFunctionGlobalAddress(GV)) {
5555     const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5556 
5557     if (Subtarget.isAIXABI()) {
5558       assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5559       return getAIXFuncEntryPointSymbolSDNode(GV);
5560     }
5561     return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5562                                       UsePlt ? PPCII::MO_PLT : 0);
5563   }
5564 
5565   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5566     const char *SymName = S->getSymbol();
5567     if (Subtarget.isAIXABI()) {
5568       // If there exists a user-declared function whose name is the same as the
5569       // ExternalSymbol's, then we pick up the user-declared version.
5570       const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5571       if (const Function *F =
5572               dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5573         return getAIXFuncEntryPointSymbolSDNode(F);
5574 
5575       // On AIX, direct function calls reference the symbol for the function's
5576       // entry point, which is named by prepending a "." before the function's
5577       // C-linkage name. A Qualname is returned here because an external
5578       // function entry point is a csect with XTY_ER property.
5579       const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5580         auto &Context = DAG.getMachineFunction().getContext();
5581         MCSectionXCOFF *Sec = Context.getXCOFFSection(
5582             (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5583             XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER));
5584         return Sec->getQualNameSymbol();
5585       };
5586 
5587       SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5588     }
5589     return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5590                                        UsePlt ? PPCII::MO_PLT : 0);
5591   }
5592 
5593   // No transformation needed.
5594   assert(Callee.getNode() && "What no callee?");
5595   return Callee;
5596 }
5597 
5598 static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5599   assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5600          "Expected a CALLSEQ_STARTSDNode.");
5601 
5602   // The last operand is the chain, except when the node has glue. If the node
5603   // has glue, then the last operand is the glue, and the chain is the second
5604   // last operand.
5605   SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5606   if (LastValue.getValueType() != MVT::Glue)
5607     return LastValue;
5608 
5609   return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5610 }
5611 
5612 // Creates the node that moves a functions address into the count register
5613 // to prepare for an indirect call instruction.
5614 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5615                                 SDValue &Glue, SDValue &Chain,
5616                                 const SDLoc &dl) {
5617   SDValue MTCTROps[] = {Chain, Callee, Glue};
5618   EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5619   Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5620                       ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5621   // The glue is the second value produced.
5622   Glue = Chain.getValue(1);
5623 }
5624 
5625 static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5626                                           SDValue &Glue, SDValue &Chain,
5627                                           SDValue CallSeqStart,
5628                                           const CallBase *CB, const SDLoc &dl,
5629                                           bool hasNest,
5630                                           const PPCSubtarget &Subtarget) {
5631   // Function pointers in the 64-bit SVR4 ABI do not point to the function
5632   // entry point, but to the function descriptor (the function entry point
5633   // address is part of the function descriptor though).
5634   // The function descriptor is a three doubleword structure with the
5635   // following fields: function entry point, TOC base address and
5636   // environment pointer.
5637   // Thus for a call through a function pointer, the following actions need
5638   // to be performed:
5639   //   1. Save the TOC of the caller in the TOC save area of its stack
5640   //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5641   //   2. Load the address of the function entry point from the function
5642   //      descriptor.
5643   //   3. Load the TOC of the callee from the function descriptor into r2.
5644   //   4. Load the environment pointer from the function descriptor into
5645   //      r11.
5646   //   5. Branch to the function entry point address.
5647   //   6. On return of the callee, the TOC of the caller needs to be
5648   //      restored (this is done in FinishCall()).
5649   //
5650   // The loads are scheduled at the beginning of the call sequence, and the
5651   // register copies are flagged together to ensure that no other
5652   // operations can be scheduled in between. E.g. without flagging the
5653   // copies together, a TOC access in the caller could be scheduled between
5654   // the assignment of the callee TOC and the branch to the callee, which leads
5655   // to incorrect code.
5656 
5657   // Start by loading the function address from the descriptor.
5658   SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5659   auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5660                       ? (MachineMemOperand::MODereferenceable |
5661                          MachineMemOperand::MOInvariant)
5662                       : MachineMemOperand::MONone;
5663 
5664   MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5665 
5666   // Registers used in building the DAG.
5667   const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5668   const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5669 
5670   // Offsets of descriptor members.
5671   const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5672   const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5673 
5674   const MVT RegVT = Subtarget.getScalarIntVT();
5675   const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5676 
5677   // One load for the functions entry point address.
5678   SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5679                                     Alignment, MMOFlags);
5680 
5681   // One for loading the TOC anchor for the module that contains the called
5682   // function.
5683   SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5684   SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5685   SDValue TOCPtr =
5686       DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5687                   MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5688 
5689   // One for loading the environment pointer.
5690   SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5691   SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5692   SDValue LoadEnvPtr =
5693       DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5694                   MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5695 
5696 
5697   // Then copy the newly loaded TOC anchor to the TOC pointer.
5698   SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5699   Chain = TOCVal.getValue(0);
5700   Glue = TOCVal.getValue(1);
5701 
5702   // If the function call has an explicit 'nest' parameter, it takes the
5703   // place of the environment pointer.
5704   assert((!hasNest || !Subtarget.isAIXABI()) &&
5705          "Nest parameter is not supported on AIX.");
5706   if (!hasNest) {
5707     SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5708     Chain = EnvVal.getValue(0);
5709     Glue = EnvVal.getValue(1);
5710   }
5711 
5712   // The rest of the indirect call sequence is the same as the non-descriptor
5713   // DAG.
5714   prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5715 }
5716 
5717 static void
5718 buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5719                   PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5720                   SelectionDAG &DAG,
5721                   SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5722                   SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5723                   const PPCSubtarget &Subtarget) {
5724   const bool IsPPC64 = Subtarget.isPPC64();
5725   // MVT for a general purpose register.
5726   const MVT RegVT = Subtarget.getScalarIntVT();
5727 
5728   // First operand is always the chain.
5729   Ops.push_back(Chain);
5730 
5731   // If it's a direct call pass the callee as the second operand.
5732   if (!CFlags.IsIndirect)
5733     Ops.push_back(Callee);
5734   else {
5735     assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5736 
5737     // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5738     // on the stack (this would have been done in `LowerCall_64SVR4` or
5739     // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5740     // represents both the indirect branch and a load that restores the TOC
5741     // pointer from the linkage area. The operand for the TOC restore is an add
5742     // of the TOC save offset to the stack pointer. This must be the second
5743     // operand: after the chain input but before any other variadic arguments.
5744     // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5745     // saved or used.
5746     if (isTOCSaveRestoreRequired(Subtarget)) {
5747       const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5748 
5749       SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5750       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5751       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5752       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5753       Ops.push_back(AddTOC);
5754     }
5755 
5756     // Add the register used for the environment pointer.
5757     if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5758       Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5759                                     RegVT));
5760 
5761 
5762     // Add CTR register as callee so a bctr can be emitted later.
5763     if (CFlags.IsTailCall)
5764       Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5765   }
5766 
5767   // If this is a tail call add stack pointer delta.
5768   if (CFlags.IsTailCall)
5769     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5770 
5771   // Add argument registers to the end of the list so that they are known live
5772   // into the call.
5773   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5774     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5775                                   RegsToPass[i].second.getValueType()));
5776 
5777   // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5778   // no way to mark dependencies as implicit here.
5779   // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5780   if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5781        !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5782     Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5783 
5784   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5785   if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5786     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5787 
5788   // Add a register mask operand representing the call-preserved registers.
5789   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5790   const uint32_t *Mask =
5791       TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5792   assert(Mask && "Missing call preserved mask for calling convention");
5793   Ops.push_back(DAG.getRegisterMask(Mask));
5794 
5795   // If the glue is valid, it is the last operand.
5796   if (Glue.getNode())
5797     Ops.push_back(Glue);
5798 }
5799 
5800 SDValue PPCTargetLowering::FinishCall(
5801     CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5802     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5803     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5804     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5805     SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5806 
5807   if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5808       Subtarget.isAIXABI())
5809     setUsesTOCBasePtr(DAG);
5810 
5811   unsigned CallOpc =
5812       getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5813                     Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5814 
5815   if (!CFlags.IsIndirect)
5816     Callee = transformCallee(Callee, DAG, dl, Subtarget);
5817   else if (Subtarget.usesFunctionDescriptors())
5818     prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5819                                   dl, CFlags.HasNest, Subtarget);
5820   else
5821     prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5822 
5823   // Build the operand list for the call instruction.
5824   SmallVector<SDValue, 8> Ops;
5825   buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5826                     SPDiff, Subtarget);
5827 
5828   // Emit tail call.
5829   if (CFlags.IsTailCall) {
5830     // Indirect tail call when using PC Relative calls do not have the same
5831     // constraints.
5832     assert(((Callee.getOpcode() == ISD::Register &&
5833              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5834             Callee.getOpcode() == ISD::TargetExternalSymbol ||
5835             Callee.getOpcode() == ISD::TargetGlobalAddress ||
5836             isa<ConstantSDNode>(Callee) ||
5837             (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5838            "Expecting a global address, external symbol, absolute value, "
5839            "register or an indirect tail call when PC Relative calls are "
5840            "used.");
5841     // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5842     assert(CallOpc == PPCISD::TC_RETURN &&
5843            "Unexpected call opcode for a tail call.");
5844     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5845     SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5846     DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5847     return Ret;
5848   }
5849 
5850   std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5851   Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5852   DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5853   Glue = Chain.getValue(1);
5854 
5855   // When performing tail call optimization the callee pops its arguments off
5856   // the stack. Account for this here so these bytes can be pushed back on in
5857   // PPCFrameLowering::eliminateCallFramePseudoInstr.
5858   int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5859                          getTargetMachine().Options.GuaranteedTailCallOpt)
5860                             ? NumBytes
5861                             : 0;
5862 
5863   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5864   Glue = Chain.getValue(1);
5865 
5866   return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5867                          DAG, InVals);
5868 }
5869 
5870 bool PPCTargetLowering::supportsTailCallFor(const CallBase *CB) const {
5871   CallingConv::ID CalleeCC = CB->getCallingConv();
5872   const Function *CallerFunc = CB->getCaller();
5873   CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5874   const Function *CalleeFunc = CB->getCalledFunction();
5875   if (!CalleeFunc)
5876     return false;
5877   const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5878 
5879   SmallVector<ISD::OutputArg, 2> Outs;
5880   SmallVector<ISD::InputArg, 2> Ins;
5881 
5882   GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5883                 CalleeFunc->getAttributes(), Outs, *this,
5884                 CalleeFunc->getDataLayout());
5885 
5886   return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5887                           CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5888                           false /*isCalleeExternalSymbol*/);
5889 }
5890 
5891 bool PPCTargetLowering::isEligibleForTCO(
5892     const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5893     CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5894     const SmallVectorImpl<ISD::OutputArg> &Outs,
5895     const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5896     bool isCalleeExternalSymbol) const {
5897   if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5898     return false;
5899 
5900   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5901     return IsEligibleForTailCallOptimization_64SVR4(
5902         CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5903         isCalleeExternalSymbol);
5904   else
5905     return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5906                                              isVarArg, Ins);
5907 }
5908 
5909 SDValue
5910 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5911                              SmallVectorImpl<SDValue> &InVals) const {
5912   SelectionDAG &DAG                     = CLI.DAG;
5913   SDLoc &dl                             = CLI.DL;
5914   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5915   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5916   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5917   SDValue Chain                         = CLI.Chain;
5918   SDValue Callee                        = CLI.Callee;
5919   bool &isTailCall                      = CLI.IsTailCall;
5920   CallingConv::ID CallConv              = CLI.CallConv;
5921   bool isVarArg                         = CLI.IsVarArg;
5922   bool isPatchPoint                     = CLI.IsPatchPoint;
5923   const CallBase *CB                    = CLI.CB;
5924 
5925   if (isTailCall) {
5926     MachineFunction &MF = DAG.getMachineFunction();
5927     CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5928     auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5929     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5930     bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5931 
5932     isTailCall =
5933         isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5934                          &(MF.getFunction()), IsCalleeExternalSymbol);
5935     if (isTailCall) {
5936       ++NumTailCalls;
5937       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5938         ++NumSiblingCalls;
5939 
5940       // PC Relative calls no longer guarantee that the callee is a Global
5941       // Address Node. The callee could be an indirect tail call in which
5942       // case the SDValue for the callee could be a load (to load the address
5943       // of a function pointer) or it may be a register copy (to move the
5944       // address of the callee from a function parameter into a virtual
5945       // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5946       assert((Subtarget.isUsingPCRelativeCalls() ||
5947               isa<GlobalAddressSDNode>(Callee)) &&
5948              "Callee should be an llvm::Function object.");
5949 
5950       LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5951                         << "\nTCO callee: ");
5952       LLVM_DEBUG(Callee.dump());
5953     }
5954   }
5955 
5956   if (!isTailCall && CB && CB->isMustTailCall())
5957     report_fatal_error("failed to perform tail call elimination on a call "
5958                        "site marked musttail");
5959 
5960   // When long calls (i.e. indirect calls) are always used, calls are always
5961   // made via function pointer. If we have a function name, first translate it
5962   // into a pointer.
5963   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5964       !isTailCall)
5965     Callee = LowerGlobalAddress(Callee, DAG);
5966 
5967   CallFlags CFlags(
5968       CallConv, isTailCall, isVarArg, isPatchPoint,
5969       isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5970       // hasNest
5971       Subtarget.is64BitELFABI() &&
5972           any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5973       CLI.NoMerge);
5974 
5975   if (Subtarget.isAIXABI())
5976     return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5977                          InVals, CB);
5978 
5979   assert(Subtarget.isSVR4ABI());
5980   if (Subtarget.isPPC64())
5981     return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5982                             InVals, CB);
5983   return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5984                           InVals, CB);
5985 }
5986 
5987 SDValue PPCTargetLowering::LowerCall_32SVR4(
5988     SDValue Chain, SDValue Callee, CallFlags CFlags,
5989     const SmallVectorImpl<ISD::OutputArg> &Outs,
5990     const SmallVectorImpl<SDValue> &OutVals,
5991     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5992     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5993     const CallBase *CB) const {
5994   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5995   // of the 32-bit SVR4 ABI stack frame layout.
5996 
5997   const CallingConv::ID CallConv = CFlags.CallConv;
5998   const bool IsVarArg = CFlags.IsVarArg;
5999   const bool IsTailCall = CFlags.IsTailCall;
6000 
6001   assert((CallConv == CallingConv::C ||
6002           CallConv == CallingConv::Cold ||
6003           CallConv == CallingConv::Fast) && "Unknown calling convention!");
6004 
6005   const Align PtrAlign(4);
6006 
6007   MachineFunction &MF = DAG.getMachineFunction();
6008 
6009   // Mark this function as potentially containing a function that contains a
6010   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6011   // and restoring the callers stack pointer in this functions epilog. This is
6012   // done because by tail calling the called function might overwrite the value
6013   // in this function's (MF) stack pointer stack slot 0(SP).
6014   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6015       CallConv == CallingConv::Fast)
6016     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6017 
6018   // Count how many bytes are to be pushed on the stack, including the linkage
6019   // area, parameter list area and the part of the local variable space which
6020   // contains copies of aggregates which are passed by value.
6021 
6022   // Assign locations to all of the outgoing arguments.
6023   SmallVector<CCValAssign, 16> ArgLocs;
6024   PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6025 
6026   // Reserve space for the linkage area on the stack.
6027   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6028                        PtrAlign);
6029   if (useSoftFloat())
6030     CCInfo.PreAnalyzeCallOperands(Outs);
6031 
6032   if (IsVarArg) {
6033     // Handle fixed and variable vector arguments differently.
6034     // Fixed vector arguments go into registers as long as registers are
6035     // available. Variable vector arguments always go into memory.
6036     unsigned NumArgs = Outs.size();
6037 
6038     for (unsigned i = 0; i != NumArgs; ++i) {
6039       MVT ArgVT = Outs[i].VT;
6040       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6041       bool Result;
6042 
6043       if (Outs[i].IsFixed) {
6044         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6045                                CCInfo);
6046       } else {
6047         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
6048                                       ArgFlags, CCInfo);
6049       }
6050 
6051       if (Result) {
6052 #ifndef NDEBUG
6053         errs() << "Call operand #" << i << " has unhandled type "
6054                << ArgVT << "\n";
6055 #endif
6056         llvm_unreachable(nullptr);
6057       }
6058     }
6059   } else {
6060     // All arguments are treated the same.
6061     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6062   }
6063   CCInfo.clearWasPPCF128();
6064 
6065   // Assign locations to all of the outgoing aggregate by value arguments.
6066   SmallVector<CCValAssign, 16> ByValArgLocs;
6067   CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6068 
6069   // Reserve stack space for the allocations in CCInfo.
6070   CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6071 
6072   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6073 
6074   // Size of the linkage area, parameter list area and the part of the local
6075   // space variable where copies of aggregates which are passed by value are
6076   // stored.
6077   unsigned NumBytes = CCByValInfo.getStackSize();
6078 
6079   // Calculate by how many bytes the stack has to be adjusted in case of tail
6080   // call optimization.
6081   int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6082 
6083   // Adjust the stack pointer for the new arguments...
6084   // These operations are automatically eliminated by the prolog/epilog pass
6085   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6086   SDValue CallSeqStart = Chain;
6087 
6088   // Load the return address and frame pointer so it can be moved somewhere else
6089   // later.
6090   SDValue LROp, FPOp;
6091   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6092 
6093   // Set up a copy of the stack pointer for use loading and storing any
6094   // arguments that may not fit in the registers available for argument
6095   // passing.
6096   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6097 
6098   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6099   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6100   SmallVector<SDValue, 8> MemOpChains;
6101 
6102   bool seenFloatArg = false;
6103   // Walk the register/memloc assignments, inserting copies/loads.
6104   // i - Tracks the index into the list of registers allocated for the call
6105   // RealArgIdx - Tracks the index into the list of actual function arguments
6106   // j - Tracks the index into the list of byval arguments
6107   for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6108        i != e;
6109        ++i, ++RealArgIdx) {
6110     CCValAssign &VA = ArgLocs[i];
6111     SDValue Arg = OutVals[RealArgIdx];
6112     ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6113 
6114     if (Flags.isByVal()) {
6115       // Argument is an aggregate which is passed by value, thus we need to
6116       // create a copy of it in the local variable space of the current stack
6117       // frame (which is the stack frame of the caller) and pass the address of
6118       // this copy to the callee.
6119       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6120       CCValAssign &ByValVA = ByValArgLocs[j++];
6121       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6122 
6123       // Memory reserved in the local variable space of the callers stack frame.
6124       unsigned LocMemOffset = ByValVA.getLocMemOffset();
6125 
6126       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6127       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6128                            StackPtr, PtrOff);
6129 
6130       // Create a copy of the argument in the local area of the current
6131       // stack frame.
6132       SDValue MemcpyCall =
6133         CreateCopyOfByValArgument(Arg, PtrOff,
6134                                   CallSeqStart.getNode()->getOperand(0),
6135                                   Flags, DAG, dl);
6136 
6137       // This must go outside the CALLSEQ_START..END.
6138       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6139                                                      SDLoc(MemcpyCall));
6140       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6141                              NewCallSeqStart.getNode());
6142       Chain = CallSeqStart = NewCallSeqStart;
6143 
6144       // Pass the address of the aggregate copy on the stack either in a
6145       // physical register or in the parameter list area of the current stack
6146       // frame to the callee.
6147       Arg = PtrOff;
6148     }
6149 
6150     // When useCRBits() is true, there can be i1 arguments.
6151     // It is because getRegisterType(MVT::i1) => MVT::i1,
6152     // and for other integer types getRegisterType() => MVT::i32.
6153     // Extend i1 and ensure callee will get i32.
6154     if (Arg.getValueType() == MVT::i1)
6155       Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6156                         dl, MVT::i32, Arg);
6157 
6158     if (VA.isRegLoc()) {
6159       seenFloatArg |= VA.getLocVT().isFloatingPoint();
6160       // Put argument in a physical register.
6161       if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6162         bool IsLE = Subtarget.isLittleEndian();
6163         SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6164                         DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6165         RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6166         SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6167                            DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6168         RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6169                              SVal.getValue(0)));
6170       } else
6171         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6172     } else {
6173       // Put argument in the parameter list area of the current stack frame.
6174       assert(VA.isMemLoc());
6175       unsigned LocMemOffset = VA.getLocMemOffset();
6176 
6177       if (!IsTailCall) {
6178         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6179         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6180                              StackPtr, PtrOff);
6181 
6182         MemOpChains.push_back(
6183             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6184       } else {
6185         // Calculate and remember argument location.
6186         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6187                                  TailCallArguments);
6188       }
6189     }
6190   }
6191 
6192   if (!MemOpChains.empty())
6193     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6194 
6195   // Build a sequence of copy-to-reg nodes chained together with token chain
6196   // and flag operands which copy the outgoing args into the appropriate regs.
6197   SDValue InGlue;
6198   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6199     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6200                              RegsToPass[i].second, InGlue);
6201     InGlue = Chain.getValue(1);
6202   }
6203 
6204   // Set CR bit 6 to true if this is a vararg call with floating args passed in
6205   // registers.
6206   if (IsVarArg) {
6207     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6208     SDValue Ops[] = { Chain, InGlue };
6209 
6210     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6211                         VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6212 
6213     InGlue = Chain.getValue(1);
6214   }
6215 
6216   if (IsTailCall)
6217     PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6218                     TailCallArguments);
6219 
6220   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6221                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6222 }
6223 
6224 // Copy an argument into memory, being careful to do this outside the
6225 // call sequence for the call to which the argument belongs.
6226 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6227     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6228     SelectionDAG &DAG, const SDLoc &dl) const {
6229   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6230                         CallSeqStart.getNode()->getOperand(0),
6231                         Flags, DAG, dl);
6232   // The MEMCPY must go outside the CALLSEQ_START..END.
6233   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6234   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6235                                                  SDLoc(MemcpyCall));
6236   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6237                          NewCallSeqStart.getNode());
6238   return NewCallSeqStart;
6239 }
6240 
6241 SDValue PPCTargetLowering::LowerCall_64SVR4(
6242     SDValue Chain, SDValue Callee, CallFlags CFlags,
6243     const SmallVectorImpl<ISD::OutputArg> &Outs,
6244     const SmallVectorImpl<SDValue> &OutVals,
6245     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6246     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6247     const CallBase *CB) const {
6248   bool isELFv2ABI = Subtarget.isELFv2ABI();
6249   bool isLittleEndian = Subtarget.isLittleEndian();
6250   unsigned NumOps = Outs.size();
6251   bool IsSibCall = false;
6252   bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6253 
6254   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6255   unsigned PtrByteSize = 8;
6256 
6257   MachineFunction &MF = DAG.getMachineFunction();
6258 
6259   if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6260     IsSibCall = true;
6261 
6262   // Mark this function as potentially containing a function that contains a
6263   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6264   // and restoring the callers stack pointer in this functions epilog. This is
6265   // done because by tail calling the called function might overwrite the value
6266   // in this function's (MF) stack pointer stack slot 0(SP).
6267   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6268     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6269 
6270   assert(!(IsFastCall && CFlags.IsVarArg) &&
6271          "fastcc not supported on varargs functions");
6272 
6273   // Count how many bytes are to be pushed on the stack, including the linkage
6274   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
6275   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6276   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6277   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6278   unsigned NumBytes = LinkageSize;
6279   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6280 
6281   static const MCPhysReg GPR[] = {
6282     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6283     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6284   };
6285   static const MCPhysReg VR[] = {
6286     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6287     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6288   };
6289 
6290   const unsigned NumGPRs = std::size(GPR);
6291   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6292   const unsigned NumVRs = std::size(VR);
6293 
6294   // On ELFv2, we can avoid allocating the parameter area if all the arguments
6295   // can be passed to the callee in registers.
6296   // For the fast calling convention, there is another check below.
6297   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6298   bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6299   if (!HasParameterArea) {
6300     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6301     unsigned AvailableFPRs = NumFPRs;
6302     unsigned AvailableVRs = NumVRs;
6303     unsigned NumBytesTmp = NumBytes;
6304     for (unsigned i = 0; i != NumOps; ++i) {
6305       if (Outs[i].Flags.isNest()) continue;
6306       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6307                                  PtrByteSize, LinkageSize, ParamAreaSize,
6308                                  NumBytesTmp, AvailableFPRs, AvailableVRs))
6309         HasParameterArea = true;
6310     }
6311   }
6312 
6313   // When using the fast calling convention, we don't provide backing for
6314   // arguments that will be in registers.
6315   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6316 
6317   // Avoid allocating parameter area for fastcc functions if all the arguments
6318   // can be passed in the registers.
6319   if (IsFastCall)
6320     HasParameterArea = false;
6321 
6322   // Add up all the space actually used.
6323   for (unsigned i = 0; i != NumOps; ++i) {
6324     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6325     EVT ArgVT = Outs[i].VT;
6326     EVT OrigVT = Outs[i].ArgVT;
6327 
6328     if (Flags.isNest())
6329       continue;
6330 
6331     if (IsFastCall) {
6332       if (Flags.isByVal()) {
6333         NumGPRsUsed += (Flags.getByValSize()+7)/8;
6334         if (NumGPRsUsed > NumGPRs)
6335           HasParameterArea = true;
6336       } else {
6337         switch (ArgVT.getSimpleVT().SimpleTy) {
6338         default: llvm_unreachable("Unexpected ValueType for argument!");
6339         case MVT::i1:
6340         case MVT::i32:
6341         case MVT::i64:
6342           if (++NumGPRsUsed <= NumGPRs)
6343             continue;
6344           break;
6345         case MVT::v4i32:
6346         case MVT::v8i16:
6347         case MVT::v16i8:
6348         case MVT::v2f64:
6349         case MVT::v2i64:
6350         case MVT::v1i128:
6351         case MVT::f128:
6352           if (++NumVRsUsed <= NumVRs)
6353             continue;
6354           break;
6355         case MVT::v4f32:
6356           if (++NumVRsUsed <= NumVRs)
6357             continue;
6358           break;
6359         case MVT::f32:
6360         case MVT::f64:
6361           if (++NumFPRsUsed <= NumFPRs)
6362             continue;
6363           break;
6364         }
6365         HasParameterArea = true;
6366       }
6367     }
6368 
6369     /* Respect alignment of argument on the stack.  */
6370     auto Alignement =
6371         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6372     NumBytes = alignTo(NumBytes, Alignement);
6373 
6374     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6375     if (Flags.isInConsecutiveRegsLast())
6376       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6377   }
6378 
6379   unsigned NumBytesActuallyUsed = NumBytes;
6380 
6381   // In the old ELFv1 ABI,
6382   // the prolog code of the callee may store up to 8 GPR argument registers to
6383   // the stack, allowing va_start to index over them in memory if its varargs.
6384   // Because we cannot tell if this is needed on the caller side, we have to
6385   // conservatively assume that it is needed.  As such, make sure we have at
6386   // least enough stack space for the caller to store the 8 GPRs.
6387   // In the ELFv2 ABI, we allocate the parameter area iff a callee
6388   // really requires memory operands, e.g. a vararg function.
6389   if (HasParameterArea)
6390     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6391   else
6392     NumBytes = LinkageSize;
6393 
6394   // Tail call needs the stack to be aligned.
6395   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6396     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6397 
6398   int SPDiff = 0;
6399 
6400   // Calculate by how many bytes the stack has to be adjusted in case of tail
6401   // call optimization.
6402   if (!IsSibCall)
6403     SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6404 
6405   // To protect arguments on the stack from being clobbered in a tail call,
6406   // force all the loads to happen before doing any other lowering.
6407   if (CFlags.IsTailCall)
6408     Chain = DAG.getStackArgumentTokenFactor(Chain);
6409 
6410   // Adjust the stack pointer for the new arguments...
6411   // These operations are automatically eliminated by the prolog/epilog pass
6412   if (!IsSibCall)
6413     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6414   SDValue CallSeqStart = Chain;
6415 
6416   // Load the return address and frame pointer so it can be move somewhere else
6417   // later.
6418   SDValue LROp, FPOp;
6419   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6420 
6421   // Set up a copy of the stack pointer for use loading and storing any
6422   // arguments that may not fit in the registers available for argument
6423   // passing.
6424   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6425 
6426   // Figure out which arguments are going to go in registers, and which in
6427   // memory.  Also, if this is a vararg function, floating point operations
6428   // must be stored to our stack, and loaded into integer regs as well, if
6429   // any integer regs are available for argument passing.
6430   unsigned ArgOffset = LinkageSize;
6431 
6432   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6433   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6434 
6435   SmallVector<SDValue, 8> MemOpChains;
6436   for (unsigned i = 0; i != NumOps; ++i) {
6437     SDValue Arg = OutVals[i];
6438     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6439     EVT ArgVT = Outs[i].VT;
6440     EVT OrigVT = Outs[i].ArgVT;
6441 
6442     // PtrOff will be used to store the current argument to the stack if a
6443     // register cannot be found for it.
6444     SDValue PtrOff;
6445 
6446     // We re-align the argument offset for each argument, except when using the
6447     // fast calling convention, when we need to make sure we do that only when
6448     // we'll actually use a stack slot.
6449     auto ComputePtrOff = [&]() {
6450       /* Respect alignment of argument on the stack.  */
6451       auto Alignment =
6452           CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6453       ArgOffset = alignTo(ArgOffset, Alignment);
6454 
6455       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6456 
6457       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6458     };
6459 
6460     if (!IsFastCall) {
6461       ComputePtrOff();
6462 
6463       /* Compute GPR index associated with argument offset.  */
6464       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6465       GPR_idx = std::min(GPR_idx, NumGPRs);
6466     }
6467 
6468     // Promote integers to 64-bit values.
6469     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6470       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6471       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6472       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6473     }
6474 
6475     // FIXME memcpy is used way more than necessary.  Correctness first.
6476     // Note: "by value" is code for passing a structure by value, not
6477     // basic types.
6478     if (Flags.isByVal()) {
6479       // Note: Size includes alignment padding, so
6480       //   struct x { short a; char b; }
6481       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
6482       // These are the proper values we need for right-justifying the
6483       // aggregate in a parameter register.
6484       unsigned Size = Flags.getByValSize();
6485 
6486       // An empty aggregate parameter takes up no storage and no
6487       // registers.
6488       if (Size == 0)
6489         continue;
6490 
6491       if (IsFastCall)
6492         ComputePtrOff();
6493 
6494       // All aggregates smaller than 8 bytes must be passed right-justified.
6495       if (Size==1 || Size==2 || Size==4) {
6496         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6497         if (GPR_idx != NumGPRs) {
6498           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6499                                         MachinePointerInfo(), VT);
6500           MemOpChains.push_back(Load.getValue(1));
6501           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6502 
6503           ArgOffset += PtrByteSize;
6504           continue;
6505         }
6506       }
6507 
6508       if (GPR_idx == NumGPRs && Size < 8) {
6509         SDValue AddPtr = PtrOff;
6510         if (!isLittleEndian) {
6511           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6512                                           PtrOff.getValueType());
6513           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6514         }
6515         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6516                                                           CallSeqStart,
6517                                                           Flags, DAG, dl);
6518         ArgOffset += PtrByteSize;
6519         continue;
6520       }
6521       // Copy the object to parameter save area if it can not be entirely passed
6522       // by registers.
6523       // FIXME: we only need to copy the parts which need to be passed in
6524       // parameter save area. For the parts passed by registers, we don't need
6525       // to copy them to the stack although we need to allocate space for them
6526       // in parameter save area.
6527       if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6528         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6529                                                           CallSeqStart,
6530                                                           Flags, DAG, dl);
6531 
6532       // When a register is available, pass a small aggregate right-justified.
6533       if (Size < 8 && GPR_idx != NumGPRs) {
6534         // The easiest way to get this right-justified in a register
6535         // is to copy the structure into the rightmost portion of a
6536         // local variable slot, then load the whole slot into the
6537         // register.
6538         // FIXME: The memcpy seems to produce pretty awful code for
6539         // small aggregates, particularly for packed ones.
6540         // FIXME: It would be preferable to use the slot in the
6541         // parameter save area instead of a new local variable.
6542         SDValue AddPtr = PtrOff;
6543         if (!isLittleEndian) {
6544           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6545           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6546         }
6547         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6548                                                           CallSeqStart,
6549                                                           Flags, DAG, dl);
6550 
6551         // Load the slot into the register.
6552         SDValue Load =
6553             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6554         MemOpChains.push_back(Load.getValue(1));
6555         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6556 
6557         // Done with this argument.
6558         ArgOffset += PtrByteSize;
6559         continue;
6560       }
6561 
6562       // For aggregates larger than PtrByteSize, copy the pieces of the
6563       // object that fit into registers from the parameter save area.
6564       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6565         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6566         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6567         if (GPR_idx != NumGPRs) {
6568           unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6569           EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6570           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6571                                         MachinePointerInfo(), ObjType);
6572 
6573           MemOpChains.push_back(Load.getValue(1));
6574           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6575           ArgOffset += PtrByteSize;
6576         } else {
6577           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6578           break;
6579         }
6580       }
6581       continue;
6582     }
6583 
6584     switch (Arg.getSimpleValueType().SimpleTy) {
6585     default: llvm_unreachable("Unexpected ValueType for argument!");
6586     case MVT::i1:
6587     case MVT::i32:
6588     case MVT::i64:
6589       if (Flags.isNest()) {
6590         // The 'nest' parameter, if any, is passed in R11.
6591         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6592         break;
6593       }
6594 
6595       // These can be scalar arguments or elements of an integer array type
6596       // passed directly.  Clang may use those instead of "byval" aggregate
6597       // types to avoid forcing arguments to memory unnecessarily.
6598       if (GPR_idx != NumGPRs) {
6599         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6600       } else {
6601         if (IsFastCall)
6602           ComputePtrOff();
6603 
6604         assert(HasParameterArea &&
6605                "Parameter area must exist to pass an argument in memory.");
6606         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6607                          true, CFlags.IsTailCall, false, MemOpChains,
6608                          TailCallArguments, dl);
6609         if (IsFastCall)
6610           ArgOffset += PtrByteSize;
6611       }
6612       if (!IsFastCall)
6613         ArgOffset += PtrByteSize;
6614       break;
6615     case MVT::f32:
6616     case MVT::f64: {
6617       // These can be scalar arguments or elements of a float array type
6618       // passed directly.  The latter are used to implement ELFv2 homogenous
6619       // float aggregates.
6620 
6621       // Named arguments go into FPRs first, and once they overflow, the
6622       // remaining arguments go into GPRs and then the parameter save area.
6623       // Unnamed arguments for vararg functions always go to GPRs and
6624       // then the parameter save area.  For now, put all arguments to vararg
6625       // routines always in both locations (FPR *and* GPR or stack slot).
6626       bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6627       bool NeededLoad = false;
6628 
6629       // First load the argument into the next available FPR.
6630       if (FPR_idx != NumFPRs)
6631         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6632 
6633       // Next, load the argument into GPR or stack slot if needed.
6634       if (!NeedGPROrStack)
6635         ;
6636       else if (GPR_idx != NumGPRs && !IsFastCall) {
6637         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6638         // once we support fp <-> gpr moves.
6639 
6640         // In the non-vararg case, this can only ever happen in the
6641         // presence of f32 array types, since otherwise we never run
6642         // out of FPRs before running out of GPRs.
6643         SDValue ArgVal;
6644 
6645         // Double values are always passed in a single GPR.
6646         if (Arg.getValueType() != MVT::f32) {
6647           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6648 
6649         // Non-array float values are extended and passed in a GPR.
6650         } else if (!Flags.isInConsecutiveRegs()) {
6651           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6652           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6653 
6654         // If we have an array of floats, we collect every odd element
6655         // together with its predecessor into one GPR.
6656         } else if (ArgOffset % PtrByteSize != 0) {
6657           SDValue Lo, Hi;
6658           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6659           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6660           if (!isLittleEndian)
6661             std::swap(Lo, Hi);
6662           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6663 
6664         // The final element, if even, goes into the first half of a GPR.
6665         } else if (Flags.isInConsecutiveRegsLast()) {
6666           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6667           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6668           if (!isLittleEndian)
6669             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6670                                  DAG.getConstant(32, dl, MVT::i32));
6671 
6672         // Non-final even elements are skipped; they will be handled
6673         // together the with subsequent argument on the next go-around.
6674         } else
6675           ArgVal = SDValue();
6676 
6677         if (ArgVal.getNode())
6678           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6679       } else {
6680         if (IsFastCall)
6681           ComputePtrOff();
6682 
6683         // Single-precision floating-point values are mapped to the
6684         // second (rightmost) word of the stack doubleword.
6685         if (Arg.getValueType() == MVT::f32 &&
6686             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6687           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6688           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6689         }
6690 
6691         assert(HasParameterArea &&
6692                "Parameter area must exist to pass an argument in memory.");
6693         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6694                          true, CFlags.IsTailCall, false, MemOpChains,
6695                          TailCallArguments, dl);
6696 
6697         NeededLoad = true;
6698       }
6699       // When passing an array of floats, the array occupies consecutive
6700       // space in the argument area; only round up to the next doubleword
6701       // at the end of the array.  Otherwise, each float takes 8 bytes.
6702       if (!IsFastCall || NeededLoad) {
6703         ArgOffset += (Arg.getValueType() == MVT::f32 &&
6704                       Flags.isInConsecutiveRegs()) ? 4 : 8;
6705         if (Flags.isInConsecutiveRegsLast())
6706           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6707       }
6708       break;
6709     }
6710     case MVT::v4f32:
6711     case MVT::v4i32:
6712     case MVT::v8i16:
6713     case MVT::v16i8:
6714     case MVT::v2f64:
6715     case MVT::v2i64:
6716     case MVT::v1i128:
6717     case MVT::f128:
6718       // These can be scalar arguments or elements of a vector array type
6719       // passed directly.  The latter are used to implement ELFv2 homogenous
6720       // vector aggregates.
6721 
6722       // For a varargs call, named arguments go into VRs or on the stack as
6723       // usual; unnamed arguments always go to the stack or the corresponding
6724       // GPRs when within range.  For now, we always put the value in both
6725       // locations (or even all three).
6726       if (CFlags.IsVarArg) {
6727         assert(HasParameterArea &&
6728                "Parameter area must exist if we have a varargs call.");
6729         // We could elide this store in the case where the object fits
6730         // entirely in R registers.  Maybe later.
6731         SDValue Store =
6732             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6733         MemOpChains.push_back(Store);
6734         if (VR_idx != NumVRs) {
6735           SDValue Load =
6736               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6737           MemOpChains.push_back(Load.getValue(1));
6738           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6739         }
6740         ArgOffset += 16;
6741         for (unsigned i=0; i<16; i+=PtrByteSize) {
6742           if (GPR_idx == NumGPRs)
6743             break;
6744           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6745                                    DAG.getConstant(i, dl, PtrVT));
6746           SDValue Load =
6747               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6748           MemOpChains.push_back(Load.getValue(1));
6749           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6750         }
6751         break;
6752       }
6753 
6754       // Non-varargs Altivec params go into VRs or on the stack.
6755       if (VR_idx != NumVRs) {
6756         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6757       } else {
6758         if (IsFastCall)
6759           ComputePtrOff();
6760 
6761         assert(HasParameterArea &&
6762                "Parameter area must exist to pass an argument in memory.");
6763         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6764                          true, CFlags.IsTailCall, true, MemOpChains,
6765                          TailCallArguments, dl);
6766         if (IsFastCall)
6767           ArgOffset += 16;
6768       }
6769 
6770       if (!IsFastCall)
6771         ArgOffset += 16;
6772       break;
6773     }
6774   }
6775 
6776   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6777          "mismatch in size of parameter area");
6778   (void)NumBytesActuallyUsed;
6779 
6780   if (!MemOpChains.empty())
6781     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6782 
6783   // Check if this is an indirect call (MTCTR/BCTRL).
6784   // See prepareDescriptorIndirectCall and buildCallOperands for more
6785   // information about calls through function pointers in the 64-bit SVR4 ABI.
6786   if (CFlags.IsIndirect) {
6787     // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6788     // caller in the TOC save area.
6789     if (isTOCSaveRestoreRequired(Subtarget)) {
6790       assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6791       // Load r2 into a virtual register and store it to the TOC save area.
6792       setUsesTOCBasePtr(DAG);
6793       SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6794       // TOC save area offset.
6795       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6796       SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6797       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6798       Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6799                            MachinePointerInfo::getStack(
6800                                DAG.getMachineFunction(), TOCSaveOffset));
6801     }
6802     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6803     // This does not mean the MTCTR instruction must use R12; it's easier
6804     // to model this as an extra parameter, so do that.
6805     if (isELFv2ABI && !CFlags.IsPatchPoint)
6806       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6807   }
6808 
6809   // Build a sequence of copy-to-reg nodes chained together with token chain
6810   // and flag operands which copy the outgoing args into the appropriate regs.
6811   SDValue InGlue;
6812   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6813     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6814                              RegsToPass[i].second, InGlue);
6815     InGlue = Chain.getValue(1);
6816   }
6817 
6818   if (CFlags.IsTailCall && !IsSibCall)
6819     PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6820                     TailCallArguments);
6821 
6822   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6823                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6824 }
6825 
6826 // Returns true when the shadow of a general purpose argument register
6827 // in the parameter save area is aligned to at least 'RequiredAlign'.
6828 static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6829   assert(RequiredAlign.value() <= 16 &&
6830          "Required alignment greater than stack alignment.");
6831   switch (Reg) {
6832   default:
6833     report_fatal_error("called on invalid register.");
6834   case PPC::R5:
6835   case PPC::R9:
6836   case PPC::X3:
6837   case PPC::X5:
6838   case PPC::X7:
6839   case PPC::X9:
6840     // These registers are 16 byte aligned which is the most strict aligment
6841     // we can support.
6842     return true;
6843   case PPC::R3:
6844   case PPC::R7:
6845   case PPC::X4:
6846   case PPC::X6:
6847   case PPC::X8:
6848   case PPC::X10:
6849     // The shadow of these registers in the PSA is 8 byte aligned.
6850     return RequiredAlign <= 8;
6851   case PPC::R4:
6852   case PPC::R6:
6853   case PPC::R8:
6854   case PPC::R10:
6855     return RequiredAlign <= 4;
6856   }
6857 }
6858 
6859 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6860                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6861                    CCState &S) {
6862   AIXCCState &State = static_cast<AIXCCState &>(S);
6863   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6864       State.getMachineFunction().getSubtarget());
6865   const bool IsPPC64 = Subtarget.isPPC64();
6866   const unsigned PtrSize = IsPPC64 ? 8 : 4;
6867   const Align PtrAlign(PtrSize);
6868   const Align StackAlign(16);
6869   const MVT RegVT = Subtarget.getScalarIntVT();
6870 
6871   if (ValVT == MVT::f128)
6872     report_fatal_error("f128 is unimplemented on AIX.");
6873 
6874   if (ArgFlags.isNest())
6875     report_fatal_error("Nest arguments are unimplemented.");
6876 
6877   static const MCPhysReg GPR_32[] = {// 32-bit registers.
6878                                      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6879                                      PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6880   static const MCPhysReg GPR_64[] = {// 64-bit registers.
6881                                      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6882                                      PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6883 
6884   static const MCPhysReg VR[] = {// Vector registers.
6885                                  PPC::V2,  PPC::V3,  PPC::V4,  PPC::V5,
6886                                  PPC::V6,  PPC::V7,  PPC::V8,  PPC::V9,
6887                                  PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6888 
6889   const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6890 
6891   if (ArgFlags.isByVal()) {
6892     const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6893     if (ByValAlign > StackAlign)
6894       report_fatal_error("Pass-by-value arguments with alignment greater than "
6895                          "16 are not supported.");
6896 
6897     const unsigned ByValSize = ArgFlags.getByValSize();
6898     const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6899 
6900     // An empty aggregate parameter takes up no storage and no registers,
6901     // but needs a MemLoc for a stack slot for the formal arguments side.
6902     if (ByValSize == 0) {
6903       State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6904                                        State.getStackSize(), RegVT, LocInfo));
6905       return false;
6906     }
6907 
6908     // Shadow allocate any registers that are not properly aligned.
6909     unsigned NextReg = State.getFirstUnallocated(GPRs);
6910     while (NextReg != GPRs.size() &&
6911            !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6912       // Shadow allocate next registers since its aligment is not strict enough.
6913       MCRegister Reg = State.AllocateReg(GPRs);
6914       // Allocate the stack space shadowed by said register.
6915       State.AllocateStack(PtrSize, PtrAlign);
6916       assert(Reg && "Alocating register unexpectedly failed.");
6917       (void)Reg;
6918       NextReg = State.getFirstUnallocated(GPRs);
6919     }
6920 
6921     const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6922     unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6923     for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6924       if (MCRegister Reg = State.AllocateReg(GPRs))
6925         State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6926       else {
6927         State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6928                                          Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
6929                                          LocInfo));
6930         break;
6931       }
6932     }
6933     return false;
6934   }
6935 
6936   // Arguments always reserve parameter save area.
6937   switch (ValVT.SimpleTy) {
6938   default:
6939     report_fatal_error("Unhandled value type for argument.");
6940   case MVT::i64:
6941     // i64 arguments should have been split to i32 for PPC32.
6942     assert(IsPPC64 && "PPC32 should have split i64 values.");
6943     [[fallthrough]];
6944   case MVT::i1:
6945   case MVT::i32: {
6946     const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6947     // AIX integer arguments are always passed in register width.
6948     if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6949       LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6950                                   : CCValAssign::LocInfo::ZExt;
6951     if (MCRegister Reg = State.AllocateReg(GPRs))
6952       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6953     else
6954       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6955 
6956     return false;
6957   }
6958   case MVT::f32:
6959   case MVT::f64: {
6960     // Parameter save area (PSA) is reserved even if the float passes in fpr.
6961     const unsigned StoreSize = LocVT.getStoreSize();
6962     // Floats are always 4-byte aligned in the PSA on AIX.
6963     // This includes f64 in 64-bit mode for ABI compatibility.
6964     const unsigned Offset =
6965         State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6966     MCRegister FReg = State.AllocateReg(FPR);
6967     if (FReg)
6968       State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6969 
6970     // Reserve and initialize GPRs or initialize the PSA as required.
6971     for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6972       if (MCRegister Reg = State.AllocateReg(GPRs)) {
6973         assert(FReg && "An FPR should be available when a GPR is reserved.");
6974         if (State.isVarArg()) {
6975           // Successfully reserved GPRs are only initialized for vararg calls.
6976           // Custom handling is required for:
6977           //   f64 in PPC32 needs to be split into 2 GPRs.
6978           //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6979           State.addLoc(
6980               CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6981         }
6982       } else {
6983         // If there are insufficient GPRs, the PSA needs to be initialized.
6984         // Initialization occurs even if an FPR was initialized for
6985         // compatibility with the AIX XL compiler. The full memory for the
6986         // argument will be initialized even if a prior word is saved in GPR.
6987         // A custom memLoc is used when the argument also passes in FPR so
6988         // that the callee handling can skip over it easily.
6989         State.addLoc(
6990             FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6991                                              LocInfo)
6992                  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6993         break;
6994       }
6995     }
6996 
6997     return false;
6998   }
6999   case MVT::v4f32:
7000   case MVT::v4i32:
7001   case MVT::v8i16:
7002   case MVT::v16i8:
7003   case MVT::v2i64:
7004   case MVT::v2f64:
7005   case MVT::v1i128: {
7006     const unsigned VecSize = 16;
7007     const Align VecAlign(VecSize);
7008 
7009     if (!State.isVarArg()) {
7010       // If there are vector registers remaining we don't consume any stack
7011       // space.
7012       if (MCRegister VReg = State.AllocateReg(VR)) {
7013         State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7014         return false;
7015       }
7016       // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7017       // might be allocated in the portion of the PSA that is shadowed by the
7018       // GPRs.
7019       const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7020       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7021       return false;
7022     }
7023 
7024     unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7025     // Burn any underaligned registers and their shadowed stack space until
7026     // we reach the required alignment.
7027     while (NextRegIndex != GPRs.size() &&
7028            !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7029       // Shadow allocate register and its stack shadow.
7030       MCRegister Reg = State.AllocateReg(GPRs);
7031       State.AllocateStack(PtrSize, PtrAlign);
7032       assert(Reg && "Allocating register unexpectedly failed.");
7033       (void)Reg;
7034       NextRegIndex = State.getFirstUnallocated(GPRs);
7035     }
7036 
7037     // Vectors that are passed as fixed arguments are handled differently.
7038     // They are passed in VRs if any are available (unlike arguments passed
7039     // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7040     // functions)
7041     if (State.isFixed(ValNo)) {
7042       if (MCRegister VReg = State.AllocateReg(VR)) {
7043         State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7044         // Shadow allocate GPRs and stack space even though we pass in a VR.
7045         for (unsigned I = 0; I != VecSize; I += PtrSize)
7046           State.AllocateReg(GPRs);
7047         State.AllocateStack(VecSize, VecAlign);
7048         return false;
7049       }
7050       // No vector registers remain so pass on the stack.
7051       const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7052       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7053       return false;
7054     }
7055 
7056     // If all GPRS are consumed then we pass the argument fully on the stack.
7057     if (NextRegIndex == GPRs.size()) {
7058       const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7059       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7060       return false;
7061     }
7062 
7063     // Corner case for 32-bit codegen. We have 2 registers to pass the first
7064     // half of the argument, and then need to pass the remaining half on the
7065     // stack.
7066     if (GPRs[NextRegIndex] == PPC::R9) {
7067       const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7068       State.addLoc(
7069           CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7070 
7071       const MCRegister FirstReg = State.AllocateReg(PPC::R9);
7072       const MCRegister SecondReg = State.AllocateReg(PPC::R10);
7073       assert(FirstReg && SecondReg &&
7074              "Allocating R9 or R10 unexpectedly failed.");
7075       State.addLoc(
7076           CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7077       State.addLoc(
7078           CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7079       return false;
7080     }
7081 
7082     // We have enough GPRs to fully pass the vector argument, and we have
7083     // already consumed any underaligned registers. Start with the custom
7084     // MemLoc and then the custom RegLocs.
7085     const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7086     State.addLoc(
7087         CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7088     for (unsigned I = 0; I != VecSize; I += PtrSize) {
7089       const MCRegister Reg = State.AllocateReg(GPRs);
7090       assert(Reg && "Failed to allocated register for vararg vector argument");
7091       State.addLoc(
7092           CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7093     }
7094     return false;
7095   }
7096   }
7097   return true;
7098 }
7099 
7100 // So far, this function is only used by LowerFormalArguments_AIX()
7101 static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
7102                                                     bool IsPPC64,
7103                                                     bool HasP8Vector,
7104                                                     bool HasVSX) {
7105   assert((IsPPC64 || SVT != MVT::i64) &&
7106          "i64 should have been split for 32-bit codegen.");
7107 
7108   switch (SVT) {
7109   default:
7110     report_fatal_error("Unexpected value type for formal argument");
7111   case MVT::i1:
7112   case MVT::i32:
7113   case MVT::i64:
7114     return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7115   case MVT::f32:
7116     return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7117   case MVT::f64:
7118     return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7119   case MVT::v4f32:
7120   case MVT::v4i32:
7121   case MVT::v8i16:
7122   case MVT::v16i8:
7123   case MVT::v2i64:
7124   case MVT::v2f64:
7125   case MVT::v1i128:
7126     return &PPC::VRRCRegClass;
7127   }
7128 }
7129 
7130 static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
7131                                         SelectionDAG &DAG, SDValue ArgValue,
7132                                         MVT LocVT, const SDLoc &dl) {
7133   assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7134   assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7135 
7136   if (Flags.isSExt())
7137     ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7138                            DAG.getValueType(ValVT));
7139   else if (Flags.isZExt())
7140     ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7141                            DAG.getValueType(ValVT));
7142 
7143   return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7144 }
7145 
7146 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7147   const unsigned LASize = FL->getLinkageSize();
7148 
7149   if (PPC::GPRCRegClass.contains(Reg)) {
7150     assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7151            "Reg must be a valid argument register!");
7152     return LASize + 4 * (Reg - PPC::R3);
7153   }
7154 
7155   if (PPC::G8RCRegClass.contains(Reg)) {
7156     assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7157            "Reg must be a valid argument register!");
7158     return LASize + 8 * (Reg - PPC::X3);
7159   }
7160 
7161   llvm_unreachable("Only general purpose registers expected.");
7162 }
7163 
7164 //   AIX ABI Stack Frame Layout:
7165 //
7166 //   Low Memory +--------------------------------------------+
7167 //   SP   +---> | Back chain                                 | ---+
7168 //        |     +--------------------------------------------+    |
7169 //        |     | Saved Condition Register                   |    |
7170 //        |     +--------------------------------------------+    |
7171 //        |     | Saved Linkage Register                     |    |
7172 //        |     +--------------------------------------------+    | Linkage Area
7173 //        |     | Reserved for compilers                     |    |
7174 //        |     +--------------------------------------------+    |
7175 //        |     | Reserved for binders                       |    |
7176 //        |     +--------------------------------------------+    |
7177 //        |     | Saved TOC pointer                          | ---+
7178 //        |     +--------------------------------------------+
7179 //        |     | Parameter save area                        |
7180 //        |     +--------------------------------------------+
7181 //        |     | Alloca space                               |
7182 //        |     +--------------------------------------------+
7183 //        |     | Local variable space                       |
7184 //        |     +--------------------------------------------+
7185 //        |     | Float/int conversion temporary             |
7186 //        |     +--------------------------------------------+
7187 //        |     | Save area for AltiVec registers            |
7188 //        |     +--------------------------------------------+
7189 //        |     | AltiVec alignment padding                  |
7190 //        |     +--------------------------------------------+
7191 //        |     | Save area for VRSAVE register              |
7192 //        |     +--------------------------------------------+
7193 //        |     | Save area for General Purpose registers    |
7194 //        |     +--------------------------------------------+
7195 //        |     | Save area for Floating Point registers     |
7196 //        |     +--------------------------------------------+
7197 //        +---- | Back chain                                 |
7198 // High Memory  +--------------------------------------------+
7199 //
7200 //  Specifications:
7201 //  AIX 7.2 Assembler Language Reference
7202 //  Subroutine linkage convention
7203 
7204 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7205     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7206     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7207     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7208 
7209   assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7210           CallConv == CallingConv::Fast) &&
7211          "Unexpected calling convention!");
7212 
7213   if (getTargetMachine().Options.GuaranteedTailCallOpt)
7214     report_fatal_error("Tail call support is unimplemented on AIX.");
7215 
7216   if (useSoftFloat())
7217     report_fatal_error("Soft float support is unimplemented on AIX.");
7218 
7219   const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7220 
7221   const bool IsPPC64 = Subtarget.isPPC64();
7222   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7223 
7224   // Assign locations to all of the incoming arguments.
7225   SmallVector<CCValAssign, 16> ArgLocs;
7226   MachineFunction &MF = DAG.getMachineFunction();
7227   MachineFrameInfo &MFI = MF.getFrameInfo();
7228   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7229   AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7230 
7231   const EVT PtrVT = getPointerTy(MF.getDataLayout());
7232   // Reserve space for the linkage area on the stack.
7233   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7234   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7235   uint64_t SaveStackPos = CCInfo.getStackSize();
7236   bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7237   CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7238 
7239   SmallVector<SDValue, 8> MemOps;
7240 
7241   for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7242     CCValAssign &VA = ArgLocs[I++];
7243     MVT LocVT = VA.getLocVT();
7244     MVT ValVT = VA.getValVT();
7245     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7246 
7247     EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7248     bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7249     // For compatibility with the AIX XL compiler, the float args in the
7250     // parameter save area are initialized even if the argument is available
7251     // in register.  The caller is required to initialize both the register
7252     // and memory, however, the callee can choose to expect it in either.
7253     // The memloc is dismissed here because the argument is retrieved from
7254     // the register.
7255     if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7256       continue;
7257 
7258     if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7259       const TargetRegisterClass *RegClass = getRegClassForSVT(
7260           LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7261       // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7262       MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7263       const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7264       SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7265       int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7266       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7267       SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7268                                       MachinePointerInfo(), Align(PtrByteSize));
7269       SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7270       MemOps.push_back(StoreReg);
7271     }
7272 
7273     if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7274       unsigned StoreSize =
7275           Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7276       SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7277     }
7278 
7279     auto HandleMemLoc = [&]() {
7280       const unsigned LocSize = LocVT.getStoreSize();
7281       const unsigned ValSize = ValVT.getStoreSize();
7282       assert((ValSize <= LocSize) &&
7283              "Object size is larger than size of MemLoc");
7284       int CurArgOffset = VA.getLocMemOffset();
7285       // Objects are right-justified because AIX is big-endian.
7286       if (LocSize > ValSize)
7287         CurArgOffset += LocSize - ValSize;
7288       // Potential tail calls could cause overwriting of argument stack slots.
7289       const bool IsImmutable =
7290           !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7291             (CallConv == CallingConv::Fast));
7292       int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7293       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7294       SDValue ArgValue =
7295           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7296 
7297       // While the ABI specifies the argument type is (sign or zero) extended
7298       // out to register width, not all code is compliant. We truncate and
7299       // re-extend to be more forgiving of these callers when the argument type
7300       // is smaller than register width.
7301       if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7302           ValVT.isInteger() &&
7303           ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7304         SDValue ArgValueTrunc = DAG.getNode(
7305             ISD::TRUNCATE, dl, ArgVT.getSimpleVT() == MVT::i1 ? MVT::i8 : ArgVT,
7306             ArgValue);
7307         SDValue ArgValueExt =
7308             ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7309                        : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7310         InVals.push_back(ArgValueExt);
7311       } else {
7312         InVals.push_back(ArgValue);
7313       }
7314     };
7315 
7316     // Vector arguments to VaArg functions are passed both on the stack, and
7317     // in any available GPRs. Load the value from the stack and add the GPRs
7318     // as live ins.
7319     if (VA.isMemLoc() && VA.needsCustom()) {
7320       assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7321       assert(isVarArg && "Only use custom memloc for vararg.");
7322       // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7323       // matching custom RegLocs.
7324       const unsigned OriginalValNo = VA.getValNo();
7325       (void)OriginalValNo;
7326 
7327       auto HandleCustomVecRegLoc = [&]() {
7328         assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7329                "Missing custom RegLoc.");
7330         VA = ArgLocs[I++];
7331         assert(VA.getValVT().isVector() &&
7332                "Unexpected Val type for custom RegLoc.");
7333         assert(VA.getValNo() == OriginalValNo &&
7334                "ValNo mismatch between custom MemLoc and RegLoc.");
7335         MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
7336         MF.addLiveIn(VA.getLocReg(),
7337                      getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7338                                        Subtarget.hasVSX()));
7339       };
7340 
7341       HandleMemLoc();
7342       // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7343       // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7344       // R10.
7345       HandleCustomVecRegLoc();
7346       HandleCustomVecRegLoc();
7347 
7348       // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7349       // we passed the vector in R5, R6, R7 and R8.
7350       if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7351         assert(!IsPPC64 &&
7352                "Only 2 custom RegLocs expected for 64-bit codegen.");
7353         HandleCustomVecRegLoc();
7354         HandleCustomVecRegLoc();
7355       }
7356 
7357       continue;
7358     }
7359 
7360     if (VA.isRegLoc()) {
7361       if (VA.getValVT().isScalarInteger())
7362         FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
7363       else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7364         switch (VA.getValVT().SimpleTy) {
7365         default:
7366           report_fatal_error("Unhandled value type for argument.");
7367         case MVT::f32:
7368           FuncInfo->appendParameterType(PPCFunctionInfo::ShortFloatingPoint);
7369           break;
7370         case MVT::f64:
7371           FuncInfo->appendParameterType(PPCFunctionInfo::LongFloatingPoint);
7372           break;
7373         }
7374       } else if (VA.getValVT().isVector()) {
7375         switch (VA.getValVT().SimpleTy) {
7376         default:
7377           report_fatal_error("Unhandled value type for argument.");
7378         case MVT::v16i8:
7379           FuncInfo->appendParameterType(PPCFunctionInfo::VectorChar);
7380           break;
7381         case MVT::v8i16:
7382           FuncInfo->appendParameterType(PPCFunctionInfo::VectorShort);
7383           break;
7384         case MVT::v4i32:
7385         case MVT::v2i64:
7386         case MVT::v1i128:
7387           FuncInfo->appendParameterType(PPCFunctionInfo::VectorInt);
7388           break;
7389         case MVT::v4f32:
7390         case MVT::v2f64:
7391           FuncInfo->appendParameterType(PPCFunctionInfo::VectorFloat);
7392           break;
7393         }
7394       }
7395     }
7396 
7397     if (Flags.isByVal() && VA.isMemLoc()) {
7398       const unsigned Size =
7399           alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7400                   PtrByteSize);
7401       const int FI = MF.getFrameInfo().CreateFixedObject(
7402           Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7403           /* IsAliased */ true);
7404       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7405       InVals.push_back(FIN);
7406 
7407       continue;
7408     }
7409 
7410     if (Flags.isByVal()) {
7411       assert(VA.isRegLoc() && "MemLocs should already be handled.");
7412 
7413       const MCPhysReg ArgReg = VA.getLocReg();
7414       const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7415 
7416       const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7417       const int FI = MF.getFrameInfo().CreateFixedObject(
7418           StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7419           /* IsAliased */ true);
7420       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7421       InVals.push_back(FIN);
7422 
7423       // Add live ins for all the RegLocs for the same ByVal.
7424       const TargetRegisterClass *RegClass =
7425           IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7426 
7427       auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7428                                                unsigned Offset) {
7429         const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7430         // Since the callers side has left justified the aggregate in the
7431         // register, we can simply store the entire register into the stack
7432         // slot.
7433         SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7434         // The store to the fixedstack object is needed becuase accessing a
7435         // field of the ByVal will use a gep and load. Ideally we will optimize
7436         // to extracting the value from the register directly, and elide the
7437         // stores when the arguments address is not taken, but that will need to
7438         // be future work.
7439         SDValue Store = DAG.getStore(
7440             CopyFrom.getValue(1), dl, CopyFrom,
7441             DAG.getObjectPtrOffset(dl, FIN, TypeSize::getFixed(Offset)),
7442             MachinePointerInfo::getFixedStack(MF, FI, Offset));
7443 
7444         MemOps.push_back(Store);
7445       };
7446 
7447       unsigned Offset = 0;
7448       HandleRegLoc(VA.getLocReg(), Offset);
7449       Offset += PtrByteSize;
7450       for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7451            Offset += PtrByteSize) {
7452         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7453                "RegLocs should be for ByVal argument.");
7454 
7455         const CCValAssign RL = ArgLocs[I++];
7456         HandleRegLoc(RL.getLocReg(), Offset);
7457         FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
7458       }
7459 
7460       if (Offset != StackSize) {
7461         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7462                "Expected MemLoc for remaining bytes.");
7463         assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7464         // Consume the MemLoc.The InVal has already been emitted, so nothing
7465         // more needs to be done.
7466         ++I;
7467       }
7468 
7469       continue;
7470     }
7471 
7472     if (VA.isRegLoc() && !VA.needsCustom()) {
7473       MVT::SimpleValueType SVT = ValVT.SimpleTy;
7474       Register VReg =
7475           MF.addLiveIn(VA.getLocReg(),
7476                        getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7477                                          Subtarget.hasVSX()));
7478       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7479       if (ValVT.isScalarInteger() &&
7480           (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7481         ArgValue =
7482             truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7483       }
7484       InVals.push_back(ArgValue);
7485       continue;
7486     }
7487     if (VA.isMemLoc()) {
7488       HandleMemLoc();
7489       continue;
7490     }
7491   }
7492 
7493   // On AIX a minimum of 8 words is saved to the parameter save area.
7494   const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7495   // Area that is at least reserved in the caller of this function.
7496   unsigned CallerReservedArea = std::max<unsigned>(
7497       CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7498 
7499   // Set the size that is at least reserved in caller of this function. Tail
7500   // call optimized function's reserved stack space needs to be aligned so
7501   // that taking the difference between two stack areas will result in an
7502   // aligned stack.
7503   CallerReservedArea =
7504       EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7505   FuncInfo->setMinReservedArea(CallerReservedArea);
7506 
7507   if (isVarArg) {
7508     FuncInfo->setVarArgsFrameIndex(
7509         MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7510     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7511 
7512     static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7513                                        PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7514 
7515     static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7516                                        PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7517     const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7518 
7519     // The fixed integer arguments of a variadic function are stored to the
7520     // VarArgsFrameIndex on the stack so that they may be loaded by
7521     // dereferencing the result of va_next.
7522     for (unsigned GPRIndex =
7523              (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7524          GPRIndex < NumGPArgRegs; ++GPRIndex) {
7525 
7526       const Register VReg =
7527           IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7528                   : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7529 
7530       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7531       SDValue Store =
7532           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7533       MemOps.push_back(Store);
7534       // Increment the address for the next argument to store.
7535       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7536       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7537     }
7538   }
7539 
7540   if (!MemOps.empty())
7541     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7542 
7543   return Chain;
7544 }
7545 
7546 SDValue PPCTargetLowering::LowerCall_AIX(
7547     SDValue Chain, SDValue Callee, CallFlags CFlags,
7548     const SmallVectorImpl<ISD::OutputArg> &Outs,
7549     const SmallVectorImpl<SDValue> &OutVals,
7550     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7551     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7552     const CallBase *CB) const {
7553   // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7554   // AIX ABI stack frame layout.
7555 
7556   assert((CFlags.CallConv == CallingConv::C ||
7557           CFlags.CallConv == CallingConv::Cold ||
7558           CFlags.CallConv == CallingConv::Fast) &&
7559          "Unexpected calling convention!");
7560 
7561   if (CFlags.IsPatchPoint)
7562     report_fatal_error("This call type is unimplemented on AIX.");
7563 
7564   const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7565 
7566   MachineFunction &MF = DAG.getMachineFunction();
7567   SmallVector<CCValAssign, 16> ArgLocs;
7568   AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7569                     *DAG.getContext());
7570 
7571   // Reserve space for the linkage save area (LSA) on the stack.
7572   // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7573   //   [SP][CR][LR][2 x reserved][TOC].
7574   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7575   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7576   const bool IsPPC64 = Subtarget.isPPC64();
7577   const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7578   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7579   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7580   CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7581 
7582   // The prolog code of the callee may store up to 8 GPR argument registers to
7583   // the stack, allowing va_start to index over them in memory if the callee
7584   // is variadic.
7585   // Because we cannot tell if this is needed on the caller side, we have to
7586   // conservatively assume that it is needed.  As such, make sure we have at
7587   // least enough stack space for the caller to store the 8 GPRs.
7588   const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7589   const unsigned NumBytes = std::max<unsigned>(
7590       LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7591 
7592   // Adjust the stack pointer for the new arguments...
7593   // These operations are automatically eliminated by the prolog/epilog pass.
7594   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7595   SDValue CallSeqStart = Chain;
7596 
7597   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7598   SmallVector<SDValue, 8> MemOpChains;
7599 
7600   // Set up a copy of the stack pointer for loading and storing any
7601   // arguments that may not fit in the registers available for argument
7602   // passing.
7603   const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7604                                    : DAG.getRegister(PPC::R1, MVT::i32);
7605 
7606   for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7607     const unsigned ValNo = ArgLocs[I].getValNo();
7608     SDValue Arg = OutVals[ValNo];
7609     ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7610 
7611     if (Flags.isByVal()) {
7612       const unsigned ByValSize = Flags.getByValSize();
7613 
7614       // Nothing to do for zero-sized ByVals on the caller side.
7615       if (!ByValSize) {
7616         ++I;
7617         continue;
7618       }
7619 
7620       auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7621         return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7622                               (LoadOffset != 0)
7623                                   ? DAG.getObjectPtrOffset(
7624                                         dl, Arg, TypeSize::getFixed(LoadOffset))
7625                                   : Arg,
7626                               MachinePointerInfo(), VT);
7627       };
7628 
7629       unsigned LoadOffset = 0;
7630 
7631       // Initialize registers, which are fully occupied by the by-val argument.
7632       while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7633         SDValue Load = GetLoad(PtrVT, LoadOffset);
7634         MemOpChains.push_back(Load.getValue(1));
7635         LoadOffset += PtrByteSize;
7636         const CCValAssign &ByValVA = ArgLocs[I++];
7637         assert(ByValVA.getValNo() == ValNo &&
7638                "Unexpected location for pass-by-value argument.");
7639         RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7640       }
7641 
7642       if (LoadOffset == ByValSize)
7643         continue;
7644 
7645       // There must be one more loc to handle the remainder.
7646       assert(ArgLocs[I].getValNo() == ValNo &&
7647              "Expected additional location for by-value argument.");
7648 
7649       if (ArgLocs[I].isMemLoc()) {
7650         assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7651         const CCValAssign &ByValVA = ArgLocs[I++];
7652         ISD::ArgFlagsTy MemcpyFlags = Flags;
7653         // Only memcpy the bytes that don't pass in register.
7654         MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7655         Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7656             (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7657                                     dl, Arg, TypeSize::getFixed(LoadOffset))
7658                               : Arg,
7659             DAG.getObjectPtrOffset(
7660                 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7661             CallSeqStart, MemcpyFlags, DAG, dl);
7662         continue;
7663       }
7664 
7665       // Initialize the final register residue.
7666       // Any residue that occupies the final by-val arg register must be
7667       // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7668       // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7669       // 2 and 1 byte loads.
7670       const unsigned ResidueBytes = ByValSize % PtrByteSize;
7671       assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7672              "Unexpected register residue for by-value argument.");
7673       SDValue ResidueVal;
7674       for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7675         const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7676         const MVT VT =
7677             N == 1 ? MVT::i8
7678                    : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7679         SDValue Load = GetLoad(VT, LoadOffset);
7680         MemOpChains.push_back(Load.getValue(1));
7681         LoadOffset += N;
7682         Bytes += N;
7683 
7684         // By-val arguments are passed left-justfied in register.
7685         // Every load here needs to be shifted, otherwise a full register load
7686         // should have been used.
7687         assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7688                "Unexpected load emitted during handling of pass-by-value "
7689                "argument.");
7690         unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7691         EVT ShiftAmountTy =
7692             getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7693         SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7694         SDValue ShiftedLoad =
7695             DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7696         ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7697                                               ShiftedLoad)
7698                                 : ShiftedLoad;
7699       }
7700 
7701       const CCValAssign &ByValVA = ArgLocs[I++];
7702       RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7703       continue;
7704     }
7705 
7706     CCValAssign &VA = ArgLocs[I++];
7707     const MVT LocVT = VA.getLocVT();
7708     const MVT ValVT = VA.getValVT();
7709 
7710     switch (VA.getLocInfo()) {
7711     default:
7712       report_fatal_error("Unexpected argument extension type.");
7713     case CCValAssign::Full:
7714       break;
7715     case CCValAssign::ZExt:
7716       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7717       break;
7718     case CCValAssign::SExt:
7719       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7720       break;
7721     }
7722 
7723     if (VA.isRegLoc() && !VA.needsCustom()) {
7724       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7725       continue;
7726     }
7727 
7728     // Vector arguments passed to VarArg functions need custom handling when
7729     // they are passed (at least partially) in GPRs.
7730     if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7731       assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7732       // Store value to its stack slot.
7733       SDValue PtrOff =
7734           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7735       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7736       SDValue Store =
7737           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7738       MemOpChains.push_back(Store);
7739       const unsigned OriginalValNo = VA.getValNo();
7740       // Then load the GPRs from the stack
7741       unsigned LoadOffset = 0;
7742       auto HandleCustomVecRegLoc = [&]() {
7743         assert(I != E && "Unexpected end of CCvalAssigns.");
7744         assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7745                "Expected custom RegLoc.");
7746         CCValAssign RegVA = ArgLocs[I++];
7747         assert(RegVA.getValNo() == OriginalValNo &&
7748                "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7749         SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7750                                   DAG.getConstant(LoadOffset, dl, PtrVT));
7751         SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7752         MemOpChains.push_back(Load.getValue(1));
7753         RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7754         LoadOffset += PtrByteSize;
7755       };
7756 
7757       // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7758       // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7759       // R10.
7760       HandleCustomVecRegLoc();
7761       HandleCustomVecRegLoc();
7762 
7763       if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7764           ArgLocs[I].getValNo() == OriginalValNo) {
7765         assert(!IsPPC64 &&
7766                "Only 2 custom RegLocs expected for 64-bit codegen.");
7767         HandleCustomVecRegLoc();
7768         HandleCustomVecRegLoc();
7769       }
7770 
7771       continue;
7772     }
7773 
7774     if (VA.isMemLoc()) {
7775       SDValue PtrOff =
7776           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7777       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7778       MemOpChains.push_back(
7779           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7780 
7781       continue;
7782     }
7783 
7784     if (!ValVT.isFloatingPoint())
7785       report_fatal_error(
7786           "Unexpected register handling for calling convention.");
7787 
7788     // Custom handling is used for GPR initializations for vararg float
7789     // arguments.
7790     assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7791            LocVT.isInteger() &&
7792            "Custom register handling only expected for VarArg.");
7793 
7794     SDValue ArgAsInt =
7795         DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7796 
7797     if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7798       // f32 in 32-bit GPR
7799       // f64 in 64-bit GPR
7800       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7801     else if (Arg.getValueType().getFixedSizeInBits() <
7802              LocVT.getFixedSizeInBits())
7803       // f32 in 64-bit GPR.
7804       RegsToPass.push_back(std::make_pair(
7805           VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7806     else {
7807       // f64 in two 32-bit GPRs
7808       // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7809       assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7810              "Unexpected custom register for argument!");
7811       CCValAssign &GPR1 = VA;
7812       SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7813                                      DAG.getConstant(32, dl, MVT::i8));
7814       RegsToPass.push_back(std::make_pair(
7815           GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7816 
7817       if (I != E) {
7818         // If only 1 GPR was available, there will only be one custom GPR and
7819         // the argument will also pass in memory.
7820         CCValAssign &PeekArg = ArgLocs[I];
7821         if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7822           assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7823           CCValAssign &GPR2 = ArgLocs[I++];
7824           RegsToPass.push_back(std::make_pair(
7825               GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7826         }
7827       }
7828     }
7829   }
7830 
7831   if (!MemOpChains.empty())
7832     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7833 
7834   // For indirect calls, we need to save the TOC base to the stack for
7835   // restoration after the call.
7836   if (CFlags.IsIndirect) {
7837     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7838     const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7839     const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7840     const MVT PtrVT = Subtarget.getScalarIntVT();
7841     const unsigned TOCSaveOffset =
7842         Subtarget.getFrameLowering()->getTOCSaveOffset();
7843 
7844     setUsesTOCBasePtr(DAG);
7845     SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7846     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7847     SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7848     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7849     Chain = DAG.getStore(
7850         Val.getValue(1), dl, Val, AddPtr,
7851         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7852   }
7853 
7854   // Build a sequence of copy-to-reg nodes chained together with token chain
7855   // and flag operands which copy the outgoing args into the appropriate regs.
7856   SDValue InGlue;
7857   for (auto Reg : RegsToPass) {
7858     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7859     InGlue = Chain.getValue(1);
7860   }
7861 
7862   const int SPDiff = 0;
7863   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7864                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
7865 }
7866 
7867 bool
7868 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7869                                   MachineFunction &MF, bool isVarArg,
7870                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
7871                                   LLVMContext &Context,
7872                                   const Type *RetTy) const {
7873   SmallVector<CCValAssign, 16> RVLocs;
7874   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7875   return CCInfo.CheckReturn(
7876       Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7877                 ? RetCC_PPC_Cold
7878                 : RetCC_PPC);
7879 }
7880 
7881 SDValue
7882 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7883                                bool isVarArg,
7884                                const SmallVectorImpl<ISD::OutputArg> &Outs,
7885                                const SmallVectorImpl<SDValue> &OutVals,
7886                                const SDLoc &dl, SelectionDAG &DAG) const {
7887   SmallVector<CCValAssign, 16> RVLocs;
7888   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7889                  *DAG.getContext());
7890   CCInfo.AnalyzeReturn(Outs,
7891                        (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7892                            ? RetCC_PPC_Cold
7893                            : RetCC_PPC);
7894 
7895   SDValue Glue;
7896   SmallVector<SDValue, 4> RetOps(1, Chain);
7897 
7898   // Copy the result values into the output registers.
7899   for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7900     CCValAssign &VA = RVLocs[i];
7901     assert(VA.isRegLoc() && "Can only return in registers!");
7902 
7903     SDValue Arg = OutVals[RealResIdx];
7904 
7905     switch (VA.getLocInfo()) {
7906     default: llvm_unreachable("Unknown loc info!");
7907     case CCValAssign::Full: break;
7908     case CCValAssign::AExt:
7909       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7910       break;
7911     case CCValAssign::ZExt:
7912       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7913       break;
7914     case CCValAssign::SExt:
7915       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7916       break;
7917     }
7918     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7919       bool isLittleEndian = Subtarget.isLittleEndian();
7920       // Legalize ret f64 -> ret 2 x i32.
7921       SDValue SVal =
7922           DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7923                       DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7924       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7925       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7926       SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7927                          DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7928       Glue = Chain.getValue(1);
7929       VA = RVLocs[++i]; // skip ahead to next loc
7930       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7931     } else
7932       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7933     Glue = Chain.getValue(1);
7934     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7935   }
7936 
7937   RetOps[0] = Chain;  // Update chain.
7938 
7939   // Add the glue if we have it.
7940   if (Glue.getNode())
7941     RetOps.push_back(Glue);
7942 
7943   return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7944 }
7945 
7946 SDValue
7947 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7948                                                 SelectionDAG &DAG) const {
7949   SDLoc dl(Op);
7950 
7951   // Get the correct type for integers.
7952   EVT IntVT = Op.getValueType();
7953 
7954   // Get the inputs.
7955   SDValue Chain = Op.getOperand(0);
7956   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7957   // Build a DYNAREAOFFSET node.
7958   SDValue Ops[2] = {Chain, FPSIdx};
7959   SDVTList VTs = DAG.getVTList(IntVT);
7960   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7961 }
7962 
7963 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7964                                              SelectionDAG &DAG) const {
7965   // When we pop the dynamic allocation we need to restore the SP link.
7966   SDLoc dl(Op);
7967 
7968   // Get the correct type for pointers.
7969   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7970 
7971   // Construct the stack pointer operand.
7972   bool isPPC64 = Subtarget.isPPC64();
7973   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7974   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7975 
7976   // Get the operands for the STACKRESTORE.
7977   SDValue Chain = Op.getOperand(0);
7978   SDValue SaveSP = Op.getOperand(1);
7979 
7980   // Load the old link SP.
7981   SDValue LoadLinkSP =
7982       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7983 
7984   // Restore the stack pointer.
7985   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7986 
7987   // Store the old link SP.
7988   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7989 }
7990 
7991 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7992   MachineFunction &MF = DAG.getMachineFunction();
7993   bool isPPC64 = Subtarget.isPPC64();
7994   EVT PtrVT = getPointerTy(MF.getDataLayout());
7995 
7996   // Get current frame pointer save index.  The users of this index will be
7997   // primarily DYNALLOC instructions.
7998   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7999   int RASI = FI->getReturnAddrSaveIndex();
8000 
8001   // If the frame pointer save index hasn't been defined yet.
8002   if (!RASI) {
8003     // Find out what the fix offset of the frame pointer save area.
8004     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
8005     // Allocate the frame index for frame pointer save area.
8006     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
8007     // Save the result.
8008     FI->setReturnAddrSaveIndex(RASI);
8009   }
8010   return DAG.getFrameIndex(RASI, PtrVT);
8011 }
8012 
8013 SDValue
8014 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
8015   MachineFunction &MF = DAG.getMachineFunction();
8016   bool isPPC64 = Subtarget.isPPC64();
8017   EVT PtrVT = getPointerTy(MF.getDataLayout());
8018 
8019   // Get current frame pointer save index.  The users of this index will be
8020   // primarily DYNALLOC instructions.
8021   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
8022   int FPSI = FI->getFramePointerSaveIndex();
8023 
8024   // If the frame pointer save index hasn't been defined yet.
8025   if (!FPSI) {
8026     // Find out what the fix offset of the frame pointer save area.
8027     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8028     // Allocate the frame index for frame pointer save area.
8029     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
8030     // Save the result.
8031     FI->setFramePointerSaveIndex(FPSI);
8032   }
8033   return DAG.getFrameIndex(FPSI, PtrVT);
8034 }
8035 
8036 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8037                                                    SelectionDAG &DAG) const {
8038   MachineFunction &MF = DAG.getMachineFunction();
8039   // Get the inputs.
8040   SDValue Chain = Op.getOperand(0);
8041   SDValue Size  = Op.getOperand(1);
8042   SDLoc dl(Op);
8043 
8044   // Get the correct type for pointers.
8045   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8046   // Negate the size.
8047   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8048                                 DAG.getConstant(0, dl, PtrVT), Size);
8049   // Construct a node for the frame pointer save index.
8050   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8051   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8052   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8053   if (hasInlineStackProbe(MF))
8054     return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8055   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8056 }
8057 
8058 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8059                                                      SelectionDAG &DAG) const {
8060   MachineFunction &MF = DAG.getMachineFunction();
8061 
8062   bool isPPC64 = Subtarget.isPPC64();
8063   EVT PtrVT = getPointerTy(DAG.getDataLayout());
8064 
8065   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8066   return DAG.getFrameIndex(FI, PtrVT);
8067 }
8068 
8069 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8070                                                SelectionDAG &DAG) const {
8071   SDLoc DL(Op);
8072   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8073                      DAG.getVTList(MVT::i32, MVT::Other),
8074                      Op.getOperand(0), Op.getOperand(1));
8075 }
8076 
8077 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8078                                                 SelectionDAG &DAG) const {
8079   SDLoc DL(Op);
8080   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8081                      Op.getOperand(0), Op.getOperand(1));
8082 }
8083 
8084 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8085   if (Op.getValueType().isVector())
8086     return LowerVectorLoad(Op, DAG);
8087 
8088   assert(Op.getValueType() == MVT::i1 &&
8089          "Custom lowering only for i1 loads");
8090 
8091   // First, load 8 bits into 32 bits, then truncate to 1 bit.
8092 
8093   SDLoc dl(Op);
8094   LoadSDNode *LD = cast<LoadSDNode>(Op);
8095 
8096   SDValue Chain = LD->getChain();
8097   SDValue BasePtr = LD->getBasePtr();
8098   MachineMemOperand *MMO = LD->getMemOperand();
8099 
8100   SDValue NewLD =
8101       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8102                      BasePtr, MVT::i8, MMO);
8103   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8104 
8105   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8106   return DAG.getMergeValues(Ops, dl);
8107 }
8108 
8109 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8110   if (Op.getOperand(1).getValueType().isVector())
8111     return LowerVectorStore(Op, DAG);
8112 
8113   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8114          "Custom lowering only for i1 stores");
8115 
8116   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8117 
8118   SDLoc dl(Op);
8119   StoreSDNode *ST = cast<StoreSDNode>(Op);
8120 
8121   SDValue Chain = ST->getChain();
8122   SDValue BasePtr = ST->getBasePtr();
8123   SDValue Value = ST->getValue();
8124   MachineMemOperand *MMO = ST->getMemOperand();
8125 
8126   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
8127                       Value);
8128   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8129 }
8130 
8131 // FIXME: Remove this once the ANDI glue bug is fixed:
8132 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8133   assert(Op.getValueType() == MVT::i1 &&
8134          "Custom lowering only for i1 results");
8135 
8136   SDLoc DL(Op);
8137   return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8138 }
8139 
8140 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8141                                                SelectionDAG &DAG) const {
8142 
8143   // Implements a vector truncate that fits in a vector register as a shuffle.
8144   // We want to legalize vector truncates down to where the source fits in
8145   // a vector register (and target is therefore smaller than vector register
8146   // size).  At that point legalization will try to custom lower the sub-legal
8147   // result and get here - where we can contain the truncate as a single target
8148   // operation.
8149 
8150   // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8151   //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8152   //
8153   // We will implement it for big-endian ordering as this (where x denotes
8154   // undefined):
8155   //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8156   //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8157   //
8158   // The same operation in little-endian ordering will be:
8159   //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8160   //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8161 
8162   EVT TrgVT = Op.getValueType();
8163   assert(TrgVT.isVector() && "Vector type expected.");
8164   unsigned TrgNumElts = TrgVT.getVectorNumElements();
8165   EVT EltVT = TrgVT.getVectorElementType();
8166   if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8167       TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8168       !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8169     return SDValue();
8170 
8171   SDValue N1 = Op.getOperand(0);
8172   EVT SrcVT = N1.getValueType();
8173   unsigned SrcSize = SrcVT.getSizeInBits();
8174   if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8175       !llvm::has_single_bit<uint32_t>(
8176           SrcVT.getVectorElementType().getSizeInBits()))
8177     return SDValue();
8178   if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8179     return SDValue();
8180 
8181   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8182   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8183 
8184   SDLoc DL(Op);
8185   SDValue Op1, Op2;
8186   if (SrcSize == 256) {
8187     EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8188     EVT SplitVT =
8189         N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
8190     unsigned SplitNumElts = SplitVT.getVectorNumElements();
8191     Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8192                       DAG.getConstant(0, DL, VecIdxTy));
8193     Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8194                       DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8195   }
8196   else {
8197     Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8198     Op2 = DAG.getUNDEF(WideVT);
8199   }
8200 
8201   // First list the elements we want to keep.
8202   unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8203   SmallVector<int, 16> ShuffV;
8204   if (Subtarget.isLittleEndian())
8205     for (unsigned i = 0; i < TrgNumElts; ++i)
8206       ShuffV.push_back(i * SizeMult);
8207   else
8208     for (unsigned i = 1; i <= TrgNumElts; ++i)
8209       ShuffV.push_back(i * SizeMult - 1);
8210 
8211   // Populate the remaining elements with undefs.
8212   for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8213     // ShuffV.push_back(i + WideNumElts);
8214     ShuffV.push_back(WideNumElts + 1);
8215 
8216   Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8217   Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8218   return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8219 }
8220 
8221 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8222 /// possible.
8223 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8224   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8225   EVT ResVT = Op.getValueType();
8226   EVT CmpVT = Op.getOperand(0).getValueType();
8227   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8228   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
8229   SDLoc dl(Op);
8230 
8231   // Without power9-vector, we don't have native instruction for f128 comparison.
8232   // Following transformation to libcall is needed for setcc:
8233   // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8234   if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8235     SDValue Z = DAG.getSetCC(
8236         dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8237         LHS, RHS, CC);
8238     SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8239     return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8240   }
8241 
8242   // Not FP, or using SPE? Not a fsel.
8243   if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8244       Subtarget.hasSPE())
8245     return Op;
8246 
8247   SDNodeFlags Flags = Op.getNode()->getFlags();
8248 
8249   // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8250   // presence of infinities.
8251   if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8252     switch (CC) {
8253     default:
8254       break;
8255     case ISD::SETOGT:
8256     case ISD::SETGT:
8257       return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8258     case ISD::SETOLT:
8259     case ISD::SETLT:
8260       return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8261     }
8262   }
8263 
8264   // We might be able to do better than this under some circumstances, but in
8265   // general, fsel-based lowering of select is a finite-math-only optimization.
8266   // For more information, see section F.3 of the 2.06 ISA specification.
8267   // With ISA 3.0
8268   if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8269       (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8270       ResVT == MVT::f128)
8271     return Op;
8272 
8273   // If the RHS of the comparison is a 0.0, we don't need to do the
8274   // subtraction at all.
8275   SDValue Sel1;
8276   if (isFloatingPointZero(RHS))
8277     switch (CC) {
8278     default: break;       // SETUO etc aren't handled by fsel.
8279     case ISD::SETNE:
8280       std::swap(TV, FV);
8281       [[fallthrough]];
8282     case ISD::SETEQ:
8283       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8284         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8285       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8286       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8287         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8288       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8289                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8290     case ISD::SETULT:
8291     case ISD::SETLT:
8292       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8293       [[fallthrough]];
8294     case ISD::SETOGE:
8295     case ISD::SETGE:
8296       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8297         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8298       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8299     case ISD::SETUGT:
8300     case ISD::SETGT:
8301       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8302       [[fallthrough]];
8303     case ISD::SETOLE:
8304     case ISD::SETLE:
8305       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8306         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8307       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8308                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8309     }
8310 
8311   SDValue Cmp;
8312   switch (CC) {
8313   default: break;       // SETUO etc aren't handled by fsel.
8314   case ISD::SETNE:
8315     std::swap(TV, FV);
8316     [[fallthrough]];
8317   case ISD::SETEQ:
8318     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8319     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8320       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8321     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8322     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8323       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8324     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8325                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8326   case ISD::SETULT:
8327   case ISD::SETLT:
8328     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8329     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8330       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8331     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8332   case ISD::SETOGE:
8333   case ISD::SETGE:
8334     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8335     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8336       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8337     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8338   case ISD::SETUGT:
8339   case ISD::SETGT:
8340     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8341     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8342       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8343     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8344   case ISD::SETOLE:
8345   case ISD::SETLE:
8346     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8347     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8348       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8349     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8350   }
8351   return Op;
8352 }
8353 
8354 static unsigned getPPCStrictOpcode(unsigned Opc) {
8355   switch (Opc) {
8356   default:
8357     llvm_unreachable("No strict version of this opcode!");
8358   case PPCISD::FCTIDZ:
8359     return PPCISD::STRICT_FCTIDZ;
8360   case PPCISD::FCTIWZ:
8361     return PPCISD::STRICT_FCTIWZ;
8362   case PPCISD::FCTIDUZ:
8363     return PPCISD::STRICT_FCTIDUZ;
8364   case PPCISD::FCTIWUZ:
8365     return PPCISD::STRICT_FCTIWUZ;
8366   case PPCISD::FCFID:
8367     return PPCISD::STRICT_FCFID;
8368   case PPCISD::FCFIDU:
8369     return PPCISD::STRICT_FCFIDU;
8370   case PPCISD::FCFIDS:
8371     return PPCISD::STRICT_FCFIDS;
8372   case PPCISD::FCFIDUS:
8373     return PPCISD::STRICT_FCFIDUS;
8374   }
8375 }
8376 
8377 static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8378                               const PPCSubtarget &Subtarget) {
8379   SDLoc dl(Op);
8380   bool IsStrict = Op->isStrictFPOpcode();
8381   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8382                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8383 
8384   // TODO: Any other flags to propagate?
8385   SDNodeFlags Flags;
8386   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8387 
8388   // For strict nodes, source is the second operand.
8389   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8390   SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8391   MVT DestTy = Op.getSimpleValueType();
8392   assert(Src.getValueType().isFloatingPoint() &&
8393          (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8394           DestTy == MVT::i64) &&
8395          "Invalid FP_TO_INT types");
8396   if (Src.getValueType() == MVT::f32) {
8397     if (IsStrict) {
8398       Src =
8399           DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
8400                       DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8401       Chain = Src.getValue(1);
8402     } else
8403       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8404   }
8405   if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8406     DestTy = Subtarget.getScalarIntVT();
8407   unsigned Opc = ISD::DELETED_NODE;
8408   switch (DestTy.SimpleTy) {
8409   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8410   case MVT::i32:
8411     Opc = IsSigned ? PPCISD::FCTIWZ
8412                    : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8413     break;
8414   case MVT::i64:
8415     assert((IsSigned || Subtarget.hasFPCVT()) &&
8416            "i64 FP_TO_UINT is supported only with FPCVT");
8417     Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8418   }
8419   EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8420   SDValue Conv;
8421   if (IsStrict) {
8422     Opc = getPPCStrictOpcode(Opc);
8423     Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8424                        Flags);
8425   } else {
8426     Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8427   }
8428   return Conv;
8429 }
8430 
8431 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8432                                                SelectionDAG &DAG,
8433                                                const SDLoc &dl) const {
8434   SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8435   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8436                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8437   bool IsStrict = Op->isStrictFPOpcode();
8438 
8439   // Convert the FP value to an int value through memory.
8440   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8441                   (IsSigned || Subtarget.hasFPCVT());
8442   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8443   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8444   MachinePointerInfo MPI =
8445       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
8446 
8447   // Emit a store to the stack slot.
8448   SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8449   Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8450   if (i32Stack) {
8451     MachineFunction &MF = DAG.getMachineFunction();
8452     Alignment = Align(4);
8453     MachineMemOperand *MMO =
8454         MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8455     SDValue Ops[] = { Chain, Tmp, FIPtr };
8456     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8457               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8458   } else
8459     Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8460 
8461   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
8462   // add in a bias on big endian.
8463   if (Op.getValueType() == MVT::i32 && !i32Stack) {
8464     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8465                         DAG.getConstant(4, dl, FIPtr.getValueType()));
8466     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8467   }
8468 
8469   RLI.Chain = Chain;
8470   RLI.Ptr = FIPtr;
8471   RLI.MPI = MPI;
8472   RLI.Alignment = Alignment;
8473 }
8474 
8475 /// Custom lowers floating point to integer conversions to use
8476 /// the direct move instructions available in ISA 2.07 to avoid the
8477 /// need for load/store combinations.
8478 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8479                                                     SelectionDAG &DAG,
8480                                                     const SDLoc &dl) const {
8481   SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8482   SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8483   if (Op->isStrictFPOpcode())
8484     return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8485   else
8486     return Mov;
8487 }
8488 
8489 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8490                                           const SDLoc &dl) const {
8491   bool IsStrict = Op->isStrictFPOpcode();
8492   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8493                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8494   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8495   EVT SrcVT = Src.getValueType();
8496   EVT DstVT = Op.getValueType();
8497 
8498   // FP to INT conversions are legal for f128.
8499   if (SrcVT == MVT::f128)
8500     return Subtarget.hasP9Vector() ? Op : SDValue();
8501 
8502   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8503   // PPC (the libcall is not available).
8504   if (SrcVT == MVT::ppcf128) {
8505     if (DstVT == MVT::i32) {
8506       // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8507       // set other fast-math flags to FP operations in both strict and
8508       // non-strict cases. (FP_TO_SINT, FSUB)
8509       SDNodeFlags Flags;
8510       Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8511 
8512       if (IsSigned) {
8513         SDValue Lo, Hi;
8514         std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8515 
8516         // Add the two halves of the long double in round-to-zero mode, and use
8517         // a smaller FP_TO_SINT.
8518         if (IsStrict) {
8519           SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8520                                     DAG.getVTList(MVT::f64, MVT::Other),
8521                                     {Op.getOperand(0), Lo, Hi}, Flags);
8522           return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8523                              DAG.getVTList(MVT::i32, MVT::Other),
8524                              {Res.getValue(1), Res}, Flags);
8525         } else {
8526           SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8527           return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8528         }
8529       } else {
8530         const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8531         APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8532         SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8533         SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8534         if (IsStrict) {
8535           // Sel = Src < 0x80000000
8536           // FltOfs = select Sel, 0.0, 0x80000000
8537           // IntOfs = select Sel, 0, 0x80000000
8538           // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8539           SDValue Chain = Op.getOperand(0);
8540           EVT SetCCVT =
8541               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8542           EVT DstSetCCVT =
8543               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8544           SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8545                                      Chain, true);
8546           Chain = Sel.getValue(1);
8547 
8548           SDValue FltOfs = DAG.getSelect(
8549               dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8550           Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8551 
8552           SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8553                                     DAG.getVTList(SrcVT, MVT::Other),
8554                                     {Chain, Src, FltOfs}, Flags);
8555           Chain = Val.getValue(1);
8556           SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8557                                      DAG.getVTList(DstVT, MVT::Other),
8558                                      {Chain, Val}, Flags);
8559           Chain = SInt.getValue(1);
8560           SDValue IntOfs = DAG.getSelect(
8561               dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8562           SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8563           return DAG.getMergeValues({Result, Chain}, dl);
8564         } else {
8565           // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8566           // FIXME: generated code sucks.
8567           SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8568           True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8569           True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8570           SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8571           return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8572         }
8573       }
8574     }
8575 
8576     return SDValue();
8577   }
8578 
8579   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8580     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8581 
8582   ReuseLoadInfo RLI;
8583   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8584 
8585   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8586                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8587 }
8588 
8589 // We're trying to insert a regular store, S, and then a load, L. If the
8590 // incoming value, O, is a load, we might just be able to have our load use the
8591 // address used by O. However, we don't know if anything else will store to
8592 // that address before we can load from it. To prevent this situation, we need
8593 // to insert our load, L, into the chain as a peer of O. To do this, we give L
8594 // the same chain operand as O, we create a token factor from the chain results
8595 // of O and L, and we replace all uses of O's chain result with that token
8596 // factor (this last part is handled by makeEquivalentMemoryOrdering).
8597 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8598                                             ReuseLoadInfo &RLI,
8599                                             SelectionDAG &DAG,
8600                                             ISD::LoadExtType ET) const {
8601   // Conservatively skip reusing for constrained FP nodes.
8602   if (Op->isStrictFPOpcode())
8603     return false;
8604 
8605   SDLoc dl(Op);
8606   bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8607                        (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8608   if (ET == ISD::NON_EXTLOAD &&
8609       (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8610       isOperationLegalOrCustom(Op.getOpcode(),
8611                                Op.getOperand(0).getValueType())) {
8612 
8613     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8614     return true;
8615   }
8616 
8617   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8618   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8619       LD->isNonTemporal())
8620     return false;
8621   if (LD->getMemoryVT() != MemVT)
8622     return false;
8623 
8624   // If the result of the load is an illegal type, then we can't build a
8625   // valid chain for reuse since the legalised loads and token factor node that
8626   // ties the legalised loads together uses a different output chain then the
8627   // illegal load.
8628   if (!isTypeLegal(LD->getValueType(0)))
8629     return false;
8630 
8631   RLI.Ptr = LD->getBasePtr();
8632   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8633     assert(LD->getAddressingMode() == ISD::PRE_INC &&
8634            "Non-pre-inc AM on PPC?");
8635     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8636                           LD->getOffset());
8637   }
8638 
8639   RLI.Chain = LD->getChain();
8640   RLI.MPI = LD->getPointerInfo();
8641   RLI.IsDereferenceable = LD->isDereferenceable();
8642   RLI.IsInvariant = LD->isInvariant();
8643   RLI.Alignment = LD->getAlign();
8644   RLI.AAInfo = LD->getAAInfo();
8645   RLI.Ranges = LD->getRanges();
8646 
8647   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8648   return true;
8649 }
8650 
8651 /// Analyze profitability of direct move
8652 /// prefer float load to int load plus direct move
8653 /// when there is no integer use of int load
8654 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8655   SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8656   if (Origin->getOpcode() != ISD::LOAD)
8657     return true;
8658 
8659   // If there is no LXSIBZX/LXSIHZX, like Power8,
8660   // prefer direct move if the memory size is 1 or 2 bytes.
8661   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8662   if (!Subtarget.hasP9Vector() &&
8663       (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8664     return true;
8665 
8666   for (SDUse &Use : Origin->uses()) {
8667 
8668     // Only look at the users of the loaded value.
8669     if (Use.getResNo() != 0)
8670       continue;
8671 
8672     SDNode *User = Use.getUser();
8673     if (User->getOpcode() != ISD::SINT_TO_FP &&
8674         User->getOpcode() != ISD::UINT_TO_FP &&
8675         User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8676         User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8677       return true;
8678   }
8679 
8680   return false;
8681 }
8682 
8683 static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8684                               const PPCSubtarget &Subtarget,
8685                               SDValue Chain = SDValue()) {
8686   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8687                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8688   SDLoc dl(Op);
8689 
8690   // TODO: Any other flags to propagate?
8691   SDNodeFlags Flags;
8692   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8693 
8694   // If we have FCFIDS, then use it when converting to single-precision.
8695   // Otherwise, convert to double-precision and then round.
8696   bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8697   unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8698                               : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8699   EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8700   if (Op->isStrictFPOpcode()) {
8701     if (!Chain)
8702       Chain = Op.getOperand(0);
8703     return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8704                        DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8705   } else
8706     return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8707 }
8708 
8709 /// Custom lowers integer to floating point conversions to use
8710 /// the direct move instructions available in ISA 2.07 to avoid the
8711 /// need for load/store combinations.
8712 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8713                                                     SelectionDAG &DAG,
8714                                                     const SDLoc &dl) const {
8715   assert((Op.getValueType() == MVT::f32 ||
8716           Op.getValueType() == MVT::f64) &&
8717          "Invalid floating point type as target of conversion");
8718   assert(Subtarget.hasFPCVT() &&
8719          "Int to FP conversions with direct moves require FPCVT");
8720   SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8721   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8722   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8723                 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8724   unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8725   SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8726   return convertIntToFP(Op, Mov, DAG, Subtarget);
8727 }
8728 
8729 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8730 
8731   EVT VecVT = Vec.getValueType();
8732   assert(VecVT.isVector() && "Expected a vector type.");
8733   assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8734 
8735   EVT EltVT = VecVT.getVectorElementType();
8736   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8737   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8738 
8739   unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8740   SmallVector<SDValue, 16> Ops(NumConcat);
8741   Ops[0] = Vec;
8742   SDValue UndefVec = DAG.getUNDEF(VecVT);
8743   for (unsigned i = 1; i < NumConcat; ++i)
8744     Ops[i] = UndefVec;
8745 
8746   return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8747 }
8748 
8749 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8750                                                 const SDLoc &dl) const {
8751   bool IsStrict = Op->isStrictFPOpcode();
8752   unsigned Opc = Op.getOpcode();
8753   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8754   assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8755           Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8756          "Unexpected conversion type");
8757   assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8758          "Supports conversions to v2f64/v4f32 only.");
8759 
8760   // TODO: Any other flags to propagate?
8761   SDNodeFlags Flags;
8762   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8763 
8764   bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8765   bool FourEltRes = Op.getValueType() == MVT::v4f32;
8766 
8767   SDValue Wide = widenVec(DAG, Src, dl);
8768   EVT WideVT = Wide.getValueType();
8769   unsigned WideNumElts = WideVT.getVectorNumElements();
8770   MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8771 
8772   SmallVector<int, 16> ShuffV;
8773   for (unsigned i = 0; i < WideNumElts; ++i)
8774     ShuffV.push_back(i + WideNumElts);
8775 
8776   int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8777   int SaveElts = FourEltRes ? 4 : 2;
8778   if (Subtarget.isLittleEndian())
8779     for (int i = 0; i < SaveElts; i++)
8780       ShuffV[i * Stride] = i;
8781   else
8782     for (int i = 1; i <= SaveElts; i++)
8783       ShuffV[i * Stride - 1] = i - 1;
8784 
8785   SDValue ShuffleSrc2 =
8786       SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8787   SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8788 
8789   SDValue Extend;
8790   if (SignedConv) {
8791     Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8792     EVT ExtVT = Src.getValueType();
8793     if (Subtarget.hasP9Altivec())
8794       ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8795                                IntermediateVT.getVectorNumElements());
8796 
8797     Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8798                          DAG.getValueType(ExtVT));
8799   } else
8800     Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8801 
8802   if (IsStrict)
8803     return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8804                        {Op.getOperand(0), Extend}, Flags);
8805 
8806   return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8807 }
8808 
8809 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8810                                           SelectionDAG &DAG) const {
8811   SDLoc dl(Op);
8812   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8813                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8814   bool IsStrict = Op->isStrictFPOpcode();
8815   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8816   SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8817 
8818   // TODO: Any other flags to propagate?
8819   SDNodeFlags Flags;
8820   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8821 
8822   EVT InVT = Src.getValueType();
8823   EVT OutVT = Op.getValueType();
8824   if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8825       isOperationCustom(Op.getOpcode(), InVT))
8826     return LowerINT_TO_FPVector(Op, DAG, dl);
8827 
8828   // Conversions to f128 are legal.
8829   if (Op.getValueType() == MVT::f128)
8830     return Subtarget.hasP9Vector() ? Op : SDValue();
8831 
8832   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8833   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8834     return SDValue();
8835 
8836   if (Src.getValueType() == MVT::i1) {
8837     SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8838                               DAG.getConstantFP(1.0, dl, Op.getValueType()),
8839                               DAG.getConstantFP(0.0, dl, Op.getValueType()));
8840     if (IsStrict)
8841       return DAG.getMergeValues({Sel, Chain}, dl);
8842     else
8843       return Sel;
8844   }
8845 
8846   // If we have direct moves, we can do all the conversion, skip the store/load
8847   // however, without FPCVT we can't do most conversions.
8848   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8849       Subtarget.isPPC64() && Subtarget.hasFPCVT())
8850     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8851 
8852   assert((IsSigned || Subtarget.hasFPCVT()) &&
8853          "UINT_TO_FP is supported only with FPCVT");
8854 
8855   if (Src.getValueType() == MVT::i64) {
8856     SDValue SINT = Src;
8857     // When converting to single-precision, we actually need to convert
8858     // to double-precision first and then round to single-precision.
8859     // To avoid double-rounding effects during that operation, we have
8860     // to prepare the input operand.  Bits that might be truncated when
8861     // converting to double-precision are replaced by a bit that won't
8862     // be lost at this stage, but is below the single-precision rounding
8863     // position.
8864     //
8865     // However, if -enable-unsafe-fp-math is in effect, accept double
8866     // rounding to avoid the extra overhead.
8867     if (Op.getValueType() == MVT::f32 &&
8868         !Subtarget.hasFPCVT() &&
8869         !DAG.getTarget().Options.UnsafeFPMath) {
8870 
8871       // Twiddle input to make sure the low 11 bits are zero.  (If this
8872       // is the case, we are guaranteed the value will fit into the 53 bit
8873       // mantissa of an IEEE double-precision value without rounding.)
8874       // If any of those low 11 bits were not zero originally, make sure
8875       // bit 12 (value 2048) is set instead, so that the final rounding
8876       // to single-precision gets the correct result.
8877       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8878                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
8879       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8880                           Round, DAG.getConstant(2047, dl, MVT::i64));
8881       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8882       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8883                           Round, DAG.getConstant(-2048, dl, MVT::i64));
8884 
8885       // However, we cannot use that value unconditionally: if the magnitude
8886       // of the input value is small, the bit-twiddling we did above might
8887       // end up visibly changing the output.  Fortunately, in that case, we
8888       // don't need to twiddle bits since the original input will convert
8889       // exactly to double-precision floating-point already.  Therefore,
8890       // construct a conditional to use the original value if the top 11
8891       // bits are all sign-bit copies, and use the rounded value computed
8892       // above otherwise.
8893       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8894                                  SINT, DAG.getConstant(53, dl, MVT::i32));
8895       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8896                          Cond, DAG.getConstant(1, dl, MVT::i64));
8897       Cond = DAG.getSetCC(
8898           dl,
8899           getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8900           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8901 
8902       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8903     }
8904 
8905     ReuseLoadInfo RLI;
8906     SDValue Bits;
8907 
8908     MachineFunction &MF = DAG.getMachineFunction();
8909     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8910       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8911                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8912       if (RLI.ResChain)
8913         DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8914     } else if (Subtarget.hasLFIWAX() &&
8915                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8916       MachineMemOperand *MMO =
8917         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8918                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8919       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8920       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8921                                      DAG.getVTList(MVT::f64, MVT::Other),
8922                                      Ops, MVT::i32, MMO);
8923       if (RLI.ResChain)
8924         DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8925     } else if (Subtarget.hasFPCVT() &&
8926                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8927       MachineMemOperand *MMO =
8928         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8929                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8930       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8931       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8932                                      DAG.getVTList(MVT::f64, MVT::Other),
8933                                      Ops, MVT::i32, MMO);
8934       if (RLI.ResChain)
8935         DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8936     } else if (((Subtarget.hasLFIWAX() &&
8937                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8938                 (Subtarget.hasFPCVT() &&
8939                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8940                SINT.getOperand(0).getValueType() == MVT::i32) {
8941       MachineFrameInfo &MFI = MF.getFrameInfo();
8942       EVT PtrVT = getPointerTy(DAG.getDataLayout());
8943 
8944       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8945       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8946 
8947       SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8948                                    MachinePointerInfo::getFixedStack(
8949                                        DAG.getMachineFunction(), FrameIdx));
8950       Chain = Store;
8951 
8952       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8953              "Expected an i32 store");
8954 
8955       RLI.Ptr = FIdx;
8956       RLI.Chain = Chain;
8957       RLI.MPI =
8958           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8959       RLI.Alignment = Align(4);
8960 
8961       MachineMemOperand *MMO =
8962         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8963                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8964       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8965       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8966                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
8967                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
8968                                      Ops, MVT::i32, MMO);
8969       Chain = Bits.getValue(1);
8970     } else
8971       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8972 
8973     SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8974     if (IsStrict)
8975       Chain = FP.getValue(1);
8976 
8977     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8978       if (IsStrict)
8979         FP = DAG.getNode(
8980             ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8981             {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
8982             Flags);
8983       else
8984         FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8985                          DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8986     }
8987     return FP;
8988   }
8989 
8990   assert(Src.getValueType() == MVT::i32 &&
8991          "Unhandled INT_TO_FP type in custom expander!");
8992   // Since we only generate this in 64-bit mode, we can take advantage of
8993   // 64-bit registers.  In particular, sign extend the input value into the
8994   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8995   // then lfd it and fcfid it.
8996   MachineFunction &MF = DAG.getMachineFunction();
8997   MachineFrameInfo &MFI = MF.getFrameInfo();
8998   EVT PtrVT = getPointerTy(MF.getDataLayout());
8999 
9000   SDValue Ld;
9001   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
9002     ReuseLoadInfo RLI;
9003     bool ReusingLoad;
9004     if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
9005       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
9006       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9007 
9008       SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
9009                                    MachinePointerInfo::getFixedStack(
9010                                        DAG.getMachineFunction(), FrameIdx));
9011       Chain = Store;
9012 
9013       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9014              "Expected an i32 store");
9015 
9016       RLI.Ptr = FIdx;
9017       RLI.Chain = Chain;
9018       RLI.MPI =
9019           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
9020       RLI.Alignment = Align(4);
9021     }
9022 
9023     MachineMemOperand *MMO =
9024       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
9025                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9026     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9027     Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9028                                  DAG.getVTList(MVT::f64, MVT::Other), Ops,
9029                                  MVT::i32, MMO);
9030     Chain = Ld.getValue(1);
9031     if (ReusingLoad && RLI.ResChain) {
9032       DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
9033     }
9034   } else {
9035     assert(Subtarget.isPPC64() &&
9036            "i32->FP without LFIWAX supported only on PPC64");
9037 
9038     int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9039     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9040 
9041     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9042 
9043     // STD the extended value into the stack slot.
9044     SDValue Store = DAG.getStore(
9045         Chain, dl, Ext64, FIdx,
9046         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
9047     Chain = Store;
9048 
9049     // Load the value as a double.
9050     Ld = DAG.getLoad(
9051         MVT::f64, dl, Chain, FIdx,
9052         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
9053     Chain = Ld.getValue(1);
9054   }
9055 
9056   // FCFID it and return it.
9057   SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9058   if (IsStrict)
9059     Chain = FP.getValue(1);
9060   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9061     if (IsStrict)
9062       FP = DAG.getNode(
9063           ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9064           {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
9065     else
9066       FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9067                        DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9068   }
9069   return FP;
9070 }
9071 
9072 SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9073                                              SelectionDAG &DAG) const {
9074   SDLoc Dl(Op);
9075   MachineFunction &MF = DAG.getMachineFunction();
9076   EVT PtrVT = getPointerTy(MF.getDataLayout());
9077   SDValue Chain = Op.getOperand(0);
9078 
9079   // If requested mode is constant, just use simpler mtfsb/mffscrni
9080   if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9081     uint64_t Mode = CVal->getZExtValue();
9082     assert(Mode < 4 && "Unsupported rounding mode!");
9083     unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9084     if (Subtarget.isISA3_0())
9085       return SDValue(
9086           DAG.getMachineNode(
9087               PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9088               {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9089           1);
9090     SDNode *SetHi = DAG.getMachineNode(
9091         (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9092         {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9093     SDNode *SetLo = DAG.getMachineNode(
9094         (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9095         {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9096     return SDValue(SetLo, 0);
9097   }
9098 
9099   // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9100   SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9101   SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9102                                 DAG.getConstant(3, Dl, MVT::i32));
9103   SDValue DstFlag = DAG.getNode(
9104       ISD::XOR, Dl, MVT::i32, SrcFlag,
9105       DAG.getNode(ISD::AND, Dl, MVT::i32,
9106                   DAG.getNOT(Dl,
9107                              DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9108                              MVT::i32),
9109                   One));
9110   // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9111   SDValue MFFS;
9112   if (!Subtarget.isISA3_0()) {
9113     MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9114     Chain = MFFS.getValue(1);
9115   }
9116   SDValue NewFPSCR;
9117   if (Subtarget.isPPC64()) {
9118     if (Subtarget.isISA3_0()) {
9119       NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9120     } else {
9121       // Set the last two bits (rounding mode) of bitcasted FPSCR.
9122       SDNode *InsertRN = DAG.getMachineNode(
9123           PPC::RLDIMI, Dl, MVT::i64,
9124           {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9125            DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9126            DAG.getTargetConstant(0, Dl, MVT::i32),
9127            DAG.getTargetConstant(62, Dl, MVT::i32)});
9128       NewFPSCR = SDValue(InsertRN, 0);
9129     }
9130     NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9131   } else {
9132     // In 32-bit mode, store f64, load and update the lower half.
9133     int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9134     SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9135     SDValue Addr = Subtarget.isLittleEndian()
9136                        ? StackSlot
9137                        : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9138                                      DAG.getConstant(4, Dl, PtrVT));
9139     if (Subtarget.isISA3_0()) {
9140       Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9141     } else {
9142       Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9143       SDValue Tmp =
9144           DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9145       Chain = Tmp.getValue(1);
9146       Tmp = SDValue(DAG.getMachineNode(
9147                         PPC::RLWIMI, Dl, MVT::i32,
9148                         {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9149                          DAG.getTargetConstant(30, Dl, MVT::i32),
9150                          DAG.getTargetConstant(31, Dl, MVT::i32)}),
9151                     0);
9152       Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9153     }
9154     NewFPSCR =
9155         DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9156     Chain = NewFPSCR.getValue(1);
9157   }
9158   if (Subtarget.isISA3_0())
9159     return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9160                                       {NewFPSCR, Chain}),
9161                    1);
9162   SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9163   SDNode *MTFSF = DAG.getMachineNode(
9164       PPC::MTFSF, Dl, MVT::Other,
9165       {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9166   return SDValue(MTFSF, 0);
9167 }
9168 
9169 SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9170                                              SelectionDAG &DAG) const {
9171   SDLoc dl(Op);
9172   /*
9173    The rounding mode is in bits 30:31 of FPSR, and has the following
9174    settings:
9175      00 Round to nearest
9176      01 Round to 0
9177      10 Round to +inf
9178      11 Round to -inf
9179 
9180   GET_ROUNDING, on the other hand, expects the following:
9181     -1 Undefined
9182      0 Round to 0
9183      1 Round to nearest
9184      2 Round to +inf
9185      3 Round to -inf
9186 
9187   To perform the conversion, we do:
9188     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9189   */
9190 
9191   MachineFunction &MF = DAG.getMachineFunction();
9192   EVT VT = Op.getValueType();
9193   EVT PtrVT = getPointerTy(MF.getDataLayout());
9194 
9195   // Save FP Control Word to register
9196   SDValue Chain = Op.getOperand(0);
9197   SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9198   Chain = MFFS.getValue(1);
9199 
9200   SDValue CWD;
9201   if (isTypeLegal(MVT::i64)) {
9202     CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9203                       DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9204   } else {
9205     // Save FP register to stack slot
9206     int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9207     SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9208     Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9209 
9210     // Load FP Control Word from low 32 bits of stack slot.
9211     assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
9212            "Stack slot adjustment is valid only on big endian subtargets!");
9213     SDValue Four = DAG.getConstant(4, dl, PtrVT);
9214     SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9215     CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9216     Chain = CWD.getValue(1);
9217   }
9218 
9219   // Transform as necessary
9220   SDValue CWD1 =
9221     DAG.getNode(ISD::AND, dl, MVT::i32,
9222                 CWD, DAG.getConstant(3, dl, MVT::i32));
9223   SDValue CWD2 =
9224     DAG.getNode(ISD::SRL, dl, MVT::i32,
9225                 DAG.getNode(ISD::AND, dl, MVT::i32,
9226                             DAG.getNode(ISD::XOR, dl, MVT::i32,
9227                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
9228                             DAG.getConstant(3, dl, MVT::i32)),
9229                 DAG.getConstant(1, dl, MVT::i32));
9230 
9231   SDValue RetVal =
9232     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9233 
9234   RetVal =
9235       DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
9236                   dl, VT, RetVal);
9237 
9238   return DAG.getMergeValues({RetVal, Chain}, dl);
9239 }
9240 
9241 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9242   EVT VT = Op.getValueType();
9243   unsigned BitWidth = VT.getSizeInBits();
9244   SDLoc dl(Op);
9245   assert(Op.getNumOperands() == 3 &&
9246          VT == Op.getOperand(1).getValueType() &&
9247          "Unexpected SHL!");
9248 
9249   // Expand into a bunch of logical ops.  Note that these ops
9250   // depend on the PPC behavior for oversized shift amounts.
9251   SDValue Lo = Op.getOperand(0);
9252   SDValue Hi = Op.getOperand(1);
9253   SDValue Amt = Op.getOperand(2);
9254   EVT AmtVT = Amt.getValueType();
9255 
9256   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9257                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9258   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9259   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9260   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9261   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9262                              DAG.getConstant(-BitWidth, dl, AmtVT));
9263   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9264   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9265   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9266   SDValue OutOps[] = { OutLo, OutHi };
9267   return DAG.getMergeValues(OutOps, dl);
9268 }
9269 
9270 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9271   EVT VT = Op.getValueType();
9272   SDLoc dl(Op);
9273   unsigned BitWidth = VT.getSizeInBits();
9274   assert(Op.getNumOperands() == 3 &&
9275          VT == Op.getOperand(1).getValueType() &&
9276          "Unexpected SRL!");
9277 
9278   // Expand into a bunch of logical ops.  Note that these ops
9279   // depend on the PPC behavior for oversized shift amounts.
9280   SDValue Lo = Op.getOperand(0);
9281   SDValue Hi = Op.getOperand(1);
9282   SDValue Amt = Op.getOperand(2);
9283   EVT AmtVT = Amt.getValueType();
9284 
9285   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9286                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9287   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9288   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9289   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9290   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9291                              DAG.getConstant(-BitWidth, dl, AmtVT));
9292   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9293   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9294   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9295   SDValue OutOps[] = { OutLo, OutHi };
9296   return DAG.getMergeValues(OutOps, dl);
9297 }
9298 
9299 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9300   SDLoc dl(Op);
9301   EVT VT = Op.getValueType();
9302   unsigned BitWidth = VT.getSizeInBits();
9303   assert(Op.getNumOperands() == 3 &&
9304          VT == Op.getOperand(1).getValueType() &&
9305          "Unexpected SRA!");
9306 
9307   // Expand into a bunch of logical ops, followed by a select_cc.
9308   SDValue Lo = Op.getOperand(0);
9309   SDValue Hi = Op.getOperand(1);
9310   SDValue Amt = Op.getOperand(2);
9311   EVT AmtVT = Amt.getValueType();
9312 
9313   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9314                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9315   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9316   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9317   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9318   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9319                              DAG.getConstant(-BitWidth, dl, AmtVT));
9320   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9321   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9322   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9323                                   Tmp4, Tmp6, ISD::SETLE);
9324   SDValue OutOps[] = { OutLo, OutHi };
9325   return DAG.getMergeValues(OutOps, dl);
9326 }
9327 
9328 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9329                                             SelectionDAG &DAG) const {
9330   SDLoc dl(Op);
9331   EVT VT = Op.getValueType();
9332   unsigned BitWidth = VT.getSizeInBits();
9333 
9334   bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9335   SDValue X = Op.getOperand(0);
9336   SDValue Y = Op.getOperand(1);
9337   SDValue Z = Op.getOperand(2);
9338   EVT AmtVT = Z.getValueType();
9339 
9340   // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9341   // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9342   // This is simpler than TargetLowering::expandFunnelShift because we can rely
9343   // on PowerPC shift by BW being well defined.
9344   Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9345                   DAG.getConstant(BitWidth - 1, dl, AmtVT));
9346   SDValue SubZ =
9347       DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9348   X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9349   Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9350   return DAG.getNode(ISD::OR, dl, VT, X, Y);
9351 }
9352 
9353 //===----------------------------------------------------------------------===//
9354 // Vector related lowering.
9355 //
9356 
9357 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9358 /// element size of SplatSize. Cast the result to VT.
9359 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9360                                       SelectionDAG &DAG, const SDLoc &dl) {
9361   static const MVT VTys[] = { // canonical VT to use for each size.
9362     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9363   };
9364 
9365   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9366 
9367   // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9368   if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9369     SplatSize = 1;
9370     Val = 0xFF;
9371   }
9372 
9373   EVT CanonicalVT = VTys[SplatSize-1];
9374 
9375   // Build a canonical splat for this value.
9376   // Explicitly truncate APInt here, as this API is used with a mix of
9377   // signed and unsigned values.
9378   return DAG.getBitcast(
9379       ReqVT,
9380       DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9381 }
9382 
9383 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9384 /// specified intrinsic ID.
9385 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9386                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
9387   if (DestVT == MVT::Other) DestVT = Op.getValueType();
9388   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9389                      DAG.getConstant(IID, dl, MVT::i32), Op);
9390 }
9391 
9392 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9393 /// specified intrinsic ID.
9394 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9395                                 SelectionDAG &DAG, const SDLoc &dl,
9396                                 EVT DestVT = MVT::Other) {
9397   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9398   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9399                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9400 }
9401 
9402 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9403 /// specified intrinsic ID.
9404 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9405                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9406                                 EVT DestVT = MVT::Other) {
9407   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9408   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9409                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9410 }
9411 
9412 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9413 /// amount.  The result has the specified value type.
9414 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9415                            SelectionDAG &DAG, const SDLoc &dl) {
9416   // Force LHS/RHS to be the right type.
9417   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9418   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9419 
9420   int Ops[16];
9421   for (unsigned i = 0; i != 16; ++i)
9422     Ops[i] = i + Amt;
9423   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9424   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9425 }
9426 
9427 /// Do we have an efficient pattern in a .td file for this node?
9428 ///
9429 /// \param V - pointer to the BuildVectorSDNode being matched
9430 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9431 ///
9432 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9433 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9434 /// the opposite is true (expansion is beneficial) are:
9435 /// - The node builds a vector out of integers that are not 32 or 64-bits
9436 /// - The node builds a vector out of constants
9437 /// - The node is a "load-and-splat"
9438 /// In all other cases, we will choose to keep the BUILD_VECTOR.
9439 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9440                                             bool HasDirectMove,
9441                                             bool HasP8Vector) {
9442   EVT VecVT = V->getValueType(0);
9443   bool RightType = VecVT == MVT::v2f64 ||
9444     (HasP8Vector && VecVT == MVT::v4f32) ||
9445     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9446   if (!RightType)
9447     return false;
9448 
9449   bool IsSplat = true;
9450   bool IsLoad = false;
9451   SDValue Op0 = V->getOperand(0);
9452 
9453   // This function is called in a block that confirms the node is not a constant
9454   // splat. So a constant BUILD_VECTOR here means the vector is built out of
9455   // different constants.
9456   if (V->isConstant())
9457     return false;
9458   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9459     if (V->getOperand(i).isUndef())
9460       return false;
9461     // We want to expand nodes that represent load-and-splat even if the
9462     // loaded value is a floating point truncation or conversion to int.
9463     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9464         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9465          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9466         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9467          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9468         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9469          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9470       IsLoad = true;
9471     // If the operands are different or the input is not a load and has more
9472     // uses than just this BV node, then it isn't a splat.
9473     if (V->getOperand(i) != Op0 ||
9474         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9475       IsSplat = false;
9476   }
9477   return !(IsSplat && IsLoad);
9478 }
9479 
9480 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9481 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9482 
9483   SDLoc dl(Op);
9484   SDValue Op0 = Op->getOperand(0);
9485 
9486   if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9487       (Op.getValueType() != MVT::f128))
9488     return SDValue();
9489 
9490   SDValue Lo = Op0.getOperand(0);
9491   SDValue Hi = Op0.getOperand(1);
9492   if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9493     return SDValue();
9494 
9495   if (!Subtarget.isLittleEndian())
9496     std::swap(Lo, Hi);
9497 
9498   return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9499 }
9500 
9501 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9502   const SDValue *InputLoad = &Op;
9503   while (InputLoad->getOpcode() == ISD::BITCAST)
9504     InputLoad = &InputLoad->getOperand(0);
9505   if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9506       InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9507     IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9508     InputLoad = &InputLoad->getOperand(0);
9509   }
9510   if (InputLoad->getOpcode() != ISD::LOAD)
9511     return nullptr;
9512   LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9513   return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9514 }
9515 
9516 // Convert the argument APFloat to a single precision APFloat if there is no
9517 // loss in information during the conversion to single precision APFloat and the
9518 // resulting number is not a denormal number. Return true if successful.
9519 bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9520   APFloat APFloatToConvert = ArgAPFloat;
9521   bool LosesInfo = true;
9522   APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9523                            &LosesInfo);
9524   bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9525   if (Success)
9526     ArgAPFloat = APFloatToConvert;
9527   return Success;
9528 }
9529 
9530 // Bitcast the argument APInt to a double and convert it to a single precision
9531 // APFloat, bitcast the APFloat to an APInt and assign it to the original
9532 // argument if there is no loss in information during the conversion from
9533 // double to single precision APFloat and the resulting number is not a denormal
9534 // number. Return true if successful.
9535 bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9536   double DpValue = ArgAPInt.bitsToDouble();
9537   APFloat APFloatDp(DpValue);
9538   bool Success = convertToNonDenormSingle(APFloatDp);
9539   if (Success)
9540     ArgAPInt = APFloatDp.bitcastToAPInt();
9541   return Success;
9542 }
9543 
9544 // Nondestructive check for convertTonNonDenormSingle.
9545 bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
9546   // Only convert if it loses info, since XXSPLTIDP should
9547   // handle the other case.
9548   APFloat APFloatToConvert = ArgAPFloat;
9549   bool LosesInfo = true;
9550   APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9551                            &LosesInfo);
9552 
9553   return (!LosesInfo && !APFloatToConvert.isDenormal());
9554 }
9555 
9556 static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9557                              unsigned &Opcode) {
9558   LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9559   if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9560     return false;
9561 
9562   EVT Ty = Op->getValueType(0);
9563   // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9564   // as we cannot handle extending loads for these types.
9565   if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9566       ISD::isNON_EXTLoad(InputNode))
9567     return true;
9568 
9569   EVT MemVT = InputNode->getMemoryVT();
9570   // For v8i16 and v16i8 types, extending loads can be handled as long as the
9571   // memory VT is the same vector element VT type.
9572   // The loads feeding into the v8i16 and v16i8 types will be extending because
9573   // scalar i8/i16 are not legal types.
9574   if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9575       (MemVT == Ty.getVectorElementType()))
9576     return true;
9577 
9578   if (Ty == MVT::v2i64) {
9579     // Check the extend type, when the input type is i32, and the output vector
9580     // type is v2i64.
9581     if (MemVT == MVT::i32) {
9582       if (ISD::isZEXTLoad(InputNode))
9583         Opcode = PPCISD::ZEXT_LD_SPLAT;
9584       if (ISD::isSEXTLoad(InputNode))
9585         Opcode = PPCISD::SEXT_LD_SPLAT;
9586     }
9587     return true;
9588   }
9589   return false;
9590 }
9591 
9592 // If this is a case we can't handle, return null and let the default
9593 // expansion code take care of it.  If we CAN select this case, and if it
9594 // selects to a single instruction, return Op.  Otherwise, if we can codegen
9595 // this case more efficiently than a constant pool load, lower it to the
9596 // sequence of ops that should be used.
9597 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9598                                              SelectionDAG &DAG) const {
9599   SDLoc dl(Op);
9600   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9601   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9602 
9603   // Check if this is a splat of a constant value.
9604   APInt APSplatBits, APSplatUndef;
9605   unsigned SplatBitSize;
9606   bool HasAnyUndefs;
9607   bool BVNIsConstantSplat =
9608       BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9609                            HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9610 
9611   // If it is a splat of a double, check if we can shrink it to a 32 bit
9612   // non-denormal float which when converted back to double gives us the same
9613   // double. This is to exploit the XXSPLTIDP instruction.
9614   // If we lose precision, we use XXSPLTI32DX.
9615   if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9616       Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9617     // Check the type first to short-circuit so we don't modify APSplatBits if
9618     // this block isn't executed.
9619     if ((Op->getValueType(0) == MVT::v2f64) &&
9620         convertToNonDenormSingle(APSplatBits)) {
9621       SDValue SplatNode = DAG.getNode(
9622           PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9623           DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9624       return DAG.getBitcast(Op.getValueType(), SplatNode);
9625     } else {
9626       // We may lose precision, so we have to use XXSPLTI32DX.
9627 
9628       uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9629       uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9630       SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9631 
9632       if (!Hi || !Lo)
9633         // If either load is 0, then we should generate XXLXOR to set to 0.
9634         SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9635 
9636       if (Hi)
9637         SplatNode = DAG.getNode(
9638             PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9639             DAG.getTargetConstant(0, dl, MVT::i32),
9640             DAG.getTargetConstant(Hi, dl, MVT::i32));
9641 
9642       if (Lo)
9643         SplatNode =
9644             DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9645                         DAG.getTargetConstant(1, dl, MVT::i32),
9646                         DAG.getTargetConstant(Lo, dl, MVT::i32));
9647 
9648       return DAG.getBitcast(Op.getValueType(), SplatNode);
9649     }
9650   }
9651 
9652   if (!BVNIsConstantSplat || SplatBitSize > 32) {
9653     unsigned NewOpcode = PPCISD::LD_SPLAT;
9654 
9655     // Handle load-and-splat patterns as we have instructions that will do this
9656     // in one go.
9657     if (DAG.isSplatValue(Op, true) &&
9658         isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9659       const SDValue *InputLoad = &Op.getOperand(0);
9660       LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9661 
9662       // If the input load is an extending load, it will be an i32 -> i64
9663       // extending load and isValidSplatLoad() will update NewOpcode.
9664       unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9665       unsigned ElementSize =
9666           MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9667 
9668       assert(((ElementSize == 2 * MemorySize)
9669                   ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9670                      NewOpcode == PPCISD::SEXT_LD_SPLAT)
9671                   : (NewOpcode == PPCISD::LD_SPLAT)) &&
9672              "Unmatched element size and opcode!\n");
9673 
9674       // Checking for a single use of this load, we have to check for vector
9675       // width (128 bits) / ElementSize uses (since each operand of the
9676       // BUILD_VECTOR is a separate use of the value.
9677       unsigned NumUsesOfInputLD = 128 / ElementSize;
9678       for (SDValue BVInOp : Op->ops())
9679         if (BVInOp.isUndef())
9680           NumUsesOfInputLD--;
9681 
9682       // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9683       // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9684       // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9685       // 15", but function IsValidSplatLoad() now will only return true when
9686       // the data at index 0 is not nullptr. So we will not get into trouble for
9687       // these cases.
9688       //
9689       // case 1 - lfiwzx/lfiwax
9690       // 1.1: load result is i32 and is sign/zero extend to i64;
9691       // 1.2: build a v2i64 vector type with above loaded value;
9692       // 1.3: the vector has only one value at index 0, others are all undef;
9693       // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9694       if (NumUsesOfInputLD == 1 &&
9695           (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9696            !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9697            Subtarget.hasLFIWAX()))
9698         return SDValue();
9699 
9700       // case 2 - lxvr[hb]x
9701       // 2.1: load result is at most i16;
9702       // 2.2: build a vector with above loaded value;
9703       // 2.3: the vector has only one value at index 0, others are all undef;
9704       // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9705       if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9706           Subtarget.isISA3_1() && ElementSize <= 16)
9707         return SDValue();
9708 
9709       assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9710       if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9711           Subtarget.hasVSX()) {
9712         SDValue Ops[] = {
9713           LD->getChain(),    // Chain
9714           LD->getBasePtr(),  // Ptr
9715           DAG.getValueType(Op.getValueType()) // VT
9716         };
9717         SDValue LdSplt = DAG.getMemIntrinsicNode(
9718             NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9719             LD->getMemoryVT(), LD->getMemOperand());
9720         // Replace all uses of the output chain of the original load with the
9721         // output chain of the new load.
9722         DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9723                                       LdSplt.getValue(1));
9724         return LdSplt;
9725       }
9726     }
9727 
9728     // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9729     // 32-bits can be lowered to VSX instructions under certain conditions.
9730     // Without VSX, there is no pattern more efficient than expanding the node.
9731     if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9732         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9733                                         Subtarget.hasP8Vector()))
9734       return Op;
9735     return SDValue();
9736   }
9737 
9738   uint64_t SplatBits = APSplatBits.getZExtValue();
9739   uint64_t SplatUndef = APSplatUndef.getZExtValue();
9740   unsigned SplatSize = SplatBitSize / 8;
9741 
9742   // First, handle single instruction cases.
9743 
9744   // All zeros?
9745   if (SplatBits == 0) {
9746     // Canonicalize all zero vectors to be v4i32.
9747     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9748       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9749       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9750     }
9751     return Op;
9752   }
9753 
9754   // We have XXSPLTIW for constant splats four bytes wide.
9755   // Given vector length is a multiple of 4, 2-byte splats can be replaced
9756   // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9757   // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9758   // turned into a 4-byte splat of 0xABABABAB.
9759   if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9760     return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9761                                   Op.getValueType(), DAG, dl);
9762 
9763   if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9764     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9765                                   dl);
9766 
9767   // We have XXSPLTIB for constant splats one byte wide.
9768   if (Subtarget.hasP9Vector() && SplatSize == 1)
9769     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9770                                   dl);
9771 
9772   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9773   int32_t SextVal = SignExtend32(SplatBits, SplatBitSize);
9774   if (SextVal >= -16 && SextVal <= 15)
9775     return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9776                                   dl);
9777 
9778   // Two instruction sequences.
9779 
9780   // If this value is in the range [-32,30] and is even, use:
9781   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9782   // If this value is in the range [17,31] and is odd, use:
9783   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9784   // If this value is in the range [-31,-17] and is odd, use:
9785   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9786   // Note the last two are three-instruction sequences.
9787   if (SextVal >= -32 && SextVal <= 31) {
9788     // To avoid having these optimizations undone by constant folding,
9789     // we convert to a pseudo that will be expanded later into one of
9790     // the above forms.
9791     SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9792     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9793               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9794     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9795     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9796     if (VT == Op.getValueType())
9797       return RetVal;
9798     else
9799       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9800   }
9801 
9802   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
9803   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
9804   // for fneg/fabs.
9805   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9806     // Make -1 and vspltisw -1:
9807     SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9808 
9809     // Make the VSLW intrinsic, computing 0x8000_0000.
9810     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9811                                    OnesV, DAG, dl);
9812 
9813     // xor by OnesV to invert it.
9814     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9815     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9816   }
9817 
9818   // Check to see if this is a wide variety of vsplti*, binop self cases.
9819   static const signed char SplatCsts[] = {
9820     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9821     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9822   };
9823 
9824   for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9825     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9826     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
9827     int i = SplatCsts[idx];
9828 
9829     // Figure out what shift amount will be used by altivec if shifted by i in
9830     // this splat size.
9831     unsigned TypeShiftAmt = i & (SplatBitSize-1);
9832 
9833     // vsplti + shl self.
9834     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9835       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9836       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9837         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9838         Intrinsic::ppc_altivec_vslw
9839       };
9840       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9841       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9842     }
9843 
9844     // vsplti + srl self.
9845     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9846       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9847       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9848         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9849         Intrinsic::ppc_altivec_vsrw
9850       };
9851       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9852       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9853     }
9854 
9855     // vsplti + rol self.
9856     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9857                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9858       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9859       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9860         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9861         Intrinsic::ppc_altivec_vrlw
9862       };
9863       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9864       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9865     }
9866 
9867     // t = vsplti c, result = vsldoi t, t, 1
9868     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9869       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9870       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9871       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9872     }
9873     // t = vsplti c, result = vsldoi t, t, 2
9874     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9875       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9876       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9877       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9878     }
9879     // t = vsplti c, result = vsldoi t, t, 3
9880     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9881       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9882       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9883       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9884     }
9885   }
9886 
9887   return SDValue();
9888 }
9889 
9890 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9891 /// the specified operations to build the shuffle.
9892 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9893                                       SDValue RHS, SelectionDAG &DAG,
9894                                       const SDLoc &dl) {
9895   unsigned OpNum = (PFEntry >> 26) & 0x0F;
9896   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9897   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
9898 
9899   enum {
9900     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9901     OP_VMRGHW,
9902     OP_VMRGLW,
9903     OP_VSPLTISW0,
9904     OP_VSPLTISW1,
9905     OP_VSPLTISW2,
9906     OP_VSPLTISW3,
9907     OP_VSLDOI4,
9908     OP_VSLDOI8,
9909     OP_VSLDOI12
9910   };
9911 
9912   if (OpNum == OP_COPY) {
9913     if (LHSID == (1*9+2)*9+3) return LHS;
9914     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9915     return RHS;
9916   }
9917 
9918   SDValue OpLHS, OpRHS;
9919   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9920   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9921 
9922   int ShufIdxs[16];
9923   switch (OpNum) {
9924   default: llvm_unreachable("Unknown i32 permute!");
9925   case OP_VMRGHW:
9926     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
9927     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9928     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
9929     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9930     break;
9931   case OP_VMRGLW:
9932     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9933     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9934     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9935     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9936     break;
9937   case OP_VSPLTISW0:
9938     for (unsigned i = 0; i != 16; ++i)
9939       ShufIdxs[i] = (i&3)+0;
9940     break;
9941   case OP_VSPLTISW1:
9942     for (unsigned i = 0; i != 16; ++i)
9943       ShufIdxs[i] = (i&3)+4;
9944     break;
9945   case OP_VSPLTISW2:
9946     for (unsigned i = 0; i != 16; ++i)
9947       ShufIdxs[i] = (i&3)+8;
9948     break;
9949   case OP_VSPLTISW3:
9950     for (unsigned i = 0; i != 16; ++i)
9951       ShufIdxs[i] = (i&3)+12;
9952     break;
9953   case OP_VSLDOI4:
9954     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9955   case OP_VSLDOI8:
9956     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9957   case OP_VSLDOI12:
9958     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9959   }
9960   EVT VT = OpLHS.getValueType();
9961   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9962   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9963   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9964   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9965 }
9966 
9967 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9968 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9969 /// SDValue.
9970 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9971                                            SelectionDAG &DAG) const {
9972   const unsigned BytesInVector = 16;
9973   bool IsLE = Subtarget.isLittleEndian();
9974   SDLoc dl(N);
9975   SDValue V1 = N->getOperand(0);
9976   SDValue V2 = N->getOperand(1);
9977   unsigned ShiftElts = 0, InsertAtByte = 0;
9978   bool Swap = false;
9979 
9980   // Shifts required to get the byte we want at element 7.
9981   unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
9982                                    0, 15, 14, 13, 12, 11, 10, 9};
9983   unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9984                                 1, 2,  3,  4,  5,  6,  7,  8};
9985 
9986   ArrayRef<int> Mask = N->getMask();
9987   int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9988 
9989   // For each mask element, find out if we're just inserting something
9990   // from V2 into V1 or vice versa.
9991   // Possible permutations inserting an element from V2 into V1:
9992   //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9993   //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9994   //   ...
9995   //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9996   // Inserting from V1 into V2 will be similar, except mask range will be
9997   // [16,31].
9998 
9999   bool FoundCandidate = false;
10000   // If both vector operands for the shuffle are the same vector, the mask
10001   // will contain only elements from the first one and the second one will be
10002   // undef.
10003   unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10004   // Go through the mask of half-words to find an element that's being moved
10005   // from one vector to the other.
10006   for (unsigned i = 0; i < BytesInVector; ++i) {
10007     unsigned CurrentElement = Mask[i];
10008     // If 2nd operand is undefined, we should only look for element 7 in the
10009     // Mask.
10010     if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10011       continue;
10012 
10013     bool OtherElementsInOrder = true;
10014     // Examine the other elements in the Mask to see if they're in original
10015     // order.
10016     for (unsigned j = 0; j < BytesInVector; ++j) {
10017       if (j == i)
10018         continue;
10019       // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10020       // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
10021       // in which we always assume we're always picking from the 1st operand.
10022       int MaskOffset =
10023           (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10024       if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10025         OtherElementsInOrder = false;
10026         break;
10027       }
10028     }
10029     // If other elements are in original order, we record the number of shifts
10030     // we need to get the element we want into element 7. Also record which byte
10031     // in the vector we should insert into.
10032     if (OtherElementsInOrder) {
10033       // If 2nd operand is undefined, we assume no shifts and no swapping.
10034       if (V2.isUndef()) {
10035         ShiftElts = 0;
10036         Swap = false;
10037       } else {
10038         // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10039         ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10040                          : BigEndianShifts[CurrentElement & 0xF];
10041         Swap = CurrentElement < BytesInVector;
10042       }
10043       InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10044       FoundCandidate = true;
10045       break;
10046     }
10047   }
10048 
10049   if (!FoundCandidate)
10050     return SDValue();
10051 
10052   // Candidate found, construct the proper SDAG sequence with VINSERTB,
10053   // optionally with VECSHL if shift is required.
10054   if (Swap)
10055     std::swap(V1, V2);
10056   if (V2.isUndef())
10057     V2 = V1;
10058   if (ShiftElts) {
10059     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10060                               DAG.getConstant(ShiftElts, dl, MVT::i32));
10061     return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10062                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
10063   }
10064   return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10065                      DAG.getConstant(InsertAtByte, dl, MVT::i32));
10066 }
10067 
10068 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10069 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10070 /// SDValue.
10071 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10072                                            SelectionDAG &DAG) const {
10073   const unsigned NumHalfWords = 8;
10074   const unsigned BytesInVector = NumHalfWords * 2;
10075   // Check that the shuffle is on half-words.
10076   if (!isNByteElemShuffleMask(N, 2, 1))
10077     return SDValue();
10078 
10079   bool IsLE = Subtarget.isLittleEndian();
10080   SDLoc dl(N);
10081   SDValue V1 = N->getOperand(0);
10082   SDValue V2 = N->getOperand(1);
10083   unsigned ShiftElts = 0, InsertAtByte = 0;
10084   bool Swap = false;
10085 
10086   // Shifts required to get the half-word we want at element 3.
10087   unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10088   unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10089 
10090   uint32_t Mask = 0;
10091   uint32_t OriginalOrderLow = 0x1234567;
10092   uint32_t OriginalOrderHigh = 0x89ABCDEF;
10093   // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
10094   // 32-bit space, only need 4-bit nibbles per element.
10095   for (unsigned i = 0; i < NumHalfWords; ++i) {
10096     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10097     Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10098   }
10099 
10100   // For each mask element, find out if we're just inserting something
10101   // from V2 into V1 or vice versa.  Possible permutations inserting an element
10102   // from V2 into V1:
10103   //   X, 1, 2, 3, 4, 5, 6, 7
10104   //   0, X, 2, 3, 4, 5, 6, 7
10105   //   0, 1, X, 3, 4, 5, 6, 7
10106   //   0, 1, 2, X, 4, 5, 6, 7
10107   //   0, 1, 2, 3, X, 5, 6, 7
10108   //   0, 1, 2, 3, 4, X, 6, 7
10109   //   0, 1, 2, 3, 4, 5, X, 7
10110   //   0, 1, 2, 3, 4, 5, 6, X
10111   // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10112 
10113   bool FoundCandidate = false;
10114   // Go through the mask of half-words to find an element that's being moved
10115   // from one vector to the other.
10116   for (unsigned i = 0; i < NumHalfWords; ++i) {
10117     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10118     uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10119     uint32_t MaskOtherElts = ~(0xF << MaskShift);
10120     uint32_t TargetOrder = 0x0;
10121 
10122     // If both vector operands for the shuffle are the same vector, the mask
10123     // will contain only elements from the first one and the second one will be
10124     // undef.
10125     if (V2.isUndef()) {
10126       ShiftElts = 0;
10127       unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10128       TargetOrder = OriginalOrderLow;
10129       Swap = false;
10130       // Skip if not the correct element or mask of other elements don't equal
10131       // to our expected order.
10132       if (MaskOneElt == VINSERTHSrcElem &&
10133           (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10134         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10135         FoundCandidate = true;
10136         break;
10137       }
10138     } else { // If both operands are defined.
10139       // Target order is [8,15] if the current mask is between [0,7].
10140       TargetOrder =
10141           (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10142       // Skip if mask of other elements don't equal our expected order.
10143       if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10144         // We only need the last 3 bits for the number of shifts.
10145         ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10146                          : BigEndianShifts[MaskOneElt & 0x7];
10147         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10148         Swap = MaskOneElt < NumHalfWords;
10149         FoundCandidate = true;
10150         break;
10151       }
10152     }
10153   }
10154 
10155   if (!FoundCandidate)
10156     return SDValue();
10157 
10158   // Candidate found, construct the proper SDAG sequence with VINSERTH,
10159   // optionally with VECSHL if shift is required.
10160   if (Swap)
10161     std::swap(V1, V2);
10162   if (V2.isUndef())
10163     V2 = V1;
10164   SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10165   if (ShiftElts) {
10166     // Double ShiftElts because we're left shifting on v16i8 type.
10167     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10168                               DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10169     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10170     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10171                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
10172     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10173   }
10174   SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10175   SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10176                             DAG.getConstant(InsertAtByte, dl, MVT::i32));
10177   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10178 }
10179 
10180 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10181 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10182 /// return the default SDValue.
10183 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10184                                               SelectionDAG &DAG) const {
10185   // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10186   // to v16i8. Peek through the bitcasts to get the actual operands.
10187   SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
10188   SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
10189 
10190   auto ShuffleMask = SVN->getMask();
10191   SDValue VecShuffle(SVN, 0);
10192   SDLoc DL(SVN);
10193 
10194   // Check that we have a four byte shuffle.
10195   if (!isNByteElemShuffleMask(SVN, 4, 1))
10196     return SDValue();
10197 
10198   // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10199   if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10200     std::swap(LHS, RHS);
10201     VecShuffle = peekThroughBitcasts(DAG.getCommutedVectorShuffle(*SVN));
10202     ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10203     if (!CommutedSV)
10204       return SDValue();
10205     ShuffleMask = CommutedSV->getMask();
10206   }
10207 
10208   // Ensure that the RHS is a vector of constants.
10209   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10210   if (!BVN)
10211     return SDValue();
10212 
10213   // Check if RHS is a splat of 4-bytes (or smaller).
10214   APInt APSplatValue, APSplatUndef;
10215   unsigned SplatBitSize;
10216   bool HasAnyUndefs;
10217   if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10218                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10219       SplatBitSize > 32)
10220     return SDValue();
10221 
10222   // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10223   // The instruction splats a constant C into two words of the source vector
10224   // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10225   // Thus we check that the shuffle mask is the equivalent  of
10226   // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10227   // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10228   // within each word are consecutive, so we only need to check the first byte.
10229   SDValue Index;
10230   bool IsLE = Subtarget.isLittleEndian();
10231   if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10232       (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10233        ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10234     Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10235   else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10236            (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10237             ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10238     Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10239   else
10240     return SDValue();
10241 
10242   // If the splat is narrower than 32-bits, we need to get the 32-bit value
10243   // for XXSPLTI32DX.
10244   unsigned SplatVal = APSplatValue.getZExtValue();
10245   for (; SplatBitSize < 32; SplatBitSize <<= 1)
10246     SplatVal |= (SplatVal << SplatBitSize);
10247 
10248   SDValue SplatNode = DAG.getNode(
10249       PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10250       Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10251   return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10252 }
10253 
10254 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10255 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10256 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10257 /// i.e (or (shl x, C1), (srl x, 128-C1)).
10258 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10259   assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10260   assert(Op.getValueType() == MVT::v1i128 &&
10261          "Only set v1i128 as custom, other type shouldn't reach here!");
10262   SDLoc dl(Op);
10263   SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10264   SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10265   unsigned SHLAmt = N1.getConstantOperandVal(0);
10266   if (SHLAmt % 8 == 0) {
10267     std::array<int, 16> Mask;
10268     std::iota(Mask.begin(), Mask.end(), 0);
10269     std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10270     if (SDValue Shuffle =
10271             DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10272                                  DAG.getUNDEF(MVT::v16i8), Mask))
10273       return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10274   }
10275   SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10276   SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10277                               DAG.getConstant(SHLAmt, dl, MVT::i32));
10278   SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10279                               DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10280   SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10281   return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10282 }
10283 
10284 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
10285 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
10286 /// return the code it can be lowered into.  Worst case, it can always be
10287 /// lowered into a vperm.
10288 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10289                                                SelectionDAG &DAG) const {
10290   SDLoc dl(Op);
10291   SDValue V1 = Op.getOperand(0);
10292   SDValue V2 = Op.getOperand(1);
10293   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10294 
10295   // Any nodes that were combined in the target-independent combiner prior
10296   // to vector legalization will not be sent to the target combine. Try to
10297   // combine it here.
10298   if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10299     if (!isa<ShuffleVectorSDNode>(NewShuffle))
10300       return NewShuffle;
10301     Op = NewShuffle;
10302     SVOp = cast<ShuffleVectorSDNode>(Op);
10303     V1 = Op.getOperand(0);
10304     V2 = Op.getOperand(1);
10305   }
10306   EVT VT = Op.getValueType();
10307   bool isLittleEndian = Subtarget.isLittleEndian();
10308 
10309   unsigned ShiftElts, InsertAtByte;
10310   bool Swap = false;
10311 
10312   // If this is a load-and-splat, we can do that with a single instruction
10313   // in some cases. However if the load has multiple uses, we don't want to
10314   // combine it because that will just produce multiple loads.
10315   bool IsPermutedLoad = false;
10316   const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10317   if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10318       (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10319       InputLoad->hasOneUse()) {
10320     bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10321     int SplatIdx =
10322       PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10323 
10324     // The splat index for permuted loads will be in the left half of the vector
10325     // which is strictly wider than the loaded value by 8 bytes. So we need to
10326     // adjust the splat index to point to the correct address in memory.
10327     if (IsPermutedLoad) {
10328       assert((isLittleEndian || IsFourByte) &&
10329              "Unexpected size for permuted load on big endian target");
10330       SplatIdx += IsFourByte ? 2 : 1;
10331       assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10332              "Splat of a value outside of the loaded memory");
10333     }
10334 
10335     LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10336     // For 4-byte load-and-splat, we need Power9.
10337     if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10338       uint64_t Offset = 0;
10339       if (IsFourByte)
10340         Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10341       else
10342         Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10343 
10344       // If the width of the load is the same as the width of the splat,
10345       // loading with an offset would load the wrong memory.
10346       if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10347         Offset = 0;
10348 
10349       SDValue BasePtr = LD->getBasePtr();
10350       if (Offset != 0)
10351         BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
10352                               BasePtr, DAG.getIntPtrConstant(Offset, dl));
10353       SDValue Ops[] = {
10354         LD->getChain(),    // Chain
10355         BasePtr,           // BasePtr
10356         DAG.getValueType(Op.getValueType()) // VT
10357       };
10358       SDVTList VTL =
10359         DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10360       SDValue LdSplt =
10361         DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10362                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
10363       DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10364       if (LdSplt.getValueType() != SVOp->getValueType(0))
10365         LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10366       return LdSplt;
10367     }
10368   }
10369 
10370   // All v2i64 and v2f64 shuffles are legal
10371   if (VT == MVT::v2i64 || VT == MVT::v2f64)
10372     return Op;
10373 
10374   if (Subtarget.hasP9Vector() &&
10375       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10376                            isLittleEndian)) {
10377     if (V2.isUndef())
10378       V2 = V1;
10379     else if (Swap)
10380       std::swap(V1, V2);
10381     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10382     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10383     if (ShiftElts) {
10384       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10385                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
10386       SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10387                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10388       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10389     }
10390     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10391                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
10392     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10393   }
10394 
10395   if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10396     SDValue SplatInsertNode;
10397     if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10398       return SplatInsertNode;
10399   }
10400 
10401   if (Subtarget.hasP9Altivec()) {
10402     SDValue NewISDNode;
10403     if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10404       return NewISDNode;
10405 
10406     if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10407       return NewISDNode;
10408   }
10409 
10410   if (Subtarget.hasVSX() &&
10411       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10412     if (Swap)
10413       std::swap(V1, V2);
10414     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10415     SDValue Conv2 =
10416         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10417 
10418     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10419                               DAG.getConstant(ShiftElts, dl, MVT::i32));
10420     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10421   }
10422 
10423   if (Subtarget.hasVSX() &&
10424     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10425     if (Swap)
10426       std::swap(V1, V2);
10427     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10428     SDValue Conv2 =
10429         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10430 
10431     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10432                               DAG.getConstant(ShiftElts, dl, MVT::i32));
10433     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10434   }
10435 
10436   if (Subtarget.hasP9Vector()) {
10437      if (PPC::isXXBRHShuffleMask(SVOp)) {
10438       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10439       SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10440       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10441     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10442       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10443       SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10444       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10445     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10446       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10447       SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10448       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10449     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10450       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10451       SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10452       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10453     }
10454   }
10455 
10456   if (Subtarget.hasVSX()) {
10457     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10458       int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10459 
10460       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10461       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10462                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
10463       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10464     }
10465 
10466     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10467     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10468       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10469       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10470       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10471     }
10472   }
10473 
10474   // Cases that are handled by instructions that take permute immediates
10475   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10476   // selected by the instruction selector.
10477   if (V2.isUndef()) {
10478     if (PPC::isSplatShuffleMask(SVOp, 1) ||
10479         PPC::isSplatShuffleMask(SVOp, 2) ||
10480         PPC::isSplatShuffleMask(SVOp, 4) ||
10481         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10482         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10483         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10484         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10485         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10486         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10487         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10488         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10489         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10490         (Subtarget.hasP8Altivec() && (
10491          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10492          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10493          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10494       return Op;
10495     }
10496   }
10497 
10498   // Altivec has a variety of "shuffle immediates" that take two vector inputs
10499   // and produce a fixed permutation.  If any of these match, do not lower to
10500   // VPERM.
10501   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10502   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10503       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10504       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10505       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10506       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10507       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10508       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10509       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10510       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10511       (Subtarget.hasP8Altivec() && (
10512        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10513        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10514        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10515     return Op;
10516 
10517   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
10518   // perfect shuffle table to emit an optimal matching sequence.
10519   ArrayRef<int> PermMask = SVOp->getMask();
10520 
10521   if (!DisablePerfectShuffle && !isLittleEndian) {
10522     unsigned PFIndexes[4];
10523     bool isFourElementShuffle = true;
10524     for (unsigned i = 0; i != 4 && isFourElementShuffle;
10525          ++i) {                           // Element number
10526       unsigned EltNo = 8;                 // Start out undef.
10527       for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10528         if (PermMask[i * 4 + j] < 0)
10529           continue; // Undef, ignore it.
10530 
10531         unsigned ByteSource = PermMask[i * 4 + j];
10532         if ((ByteSource & 3) != j) {
10533           isFourElementShuffle = false;
10534           break;
10535         }
10536 
10537         if (EltNo == 8) {
10538           EltNo = ByteSource / 4;
10539         } else if (EltNo != ByteSource / 4) {
10540           isFourElementShuffle = false;
10541           break;
10542         }
10543       }
10544       PFIndexes[i] = EltNo;
10545     }
10546 
10547     // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10548     // perfect shuffle vector to determine if it is cost effective to do this as
10549     // discrete instructions, or whether we should use a vperm.
10550     // For now, we skip this for little endian until such time as we have a
10551     // little-endian perfect shuffle table.
10552     if (isFourElementShuffle) {
10553       // Compute the index in the perfect shuffle table.
10554       unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10555                               PFIndexes[2] * 9 + PFIndexes[3];
10556 
10557       unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10558       unsigned Cost = (PFEntry >> 30);
10559 
10560       // Determining when to avoid vperm is tricky.  Many things affect the cost
10561       // of vperm, particularly how many times the perm mask needs to be
10562       // computed. For example, if the perm mask can be hoisted out of a loop or
10563       // is already used (perhaps because there are multiple permutes with the
10564       // same shuffle mask?) the vperm has a cost of 1.  OTOH, hoisting the
10565       // permute mask out of the loop requires an extra register.
10566       //
10567       // As a compromise, we only emit discrete instructions if the shuffle can
10568       // be generated in 3 or fewer operations.  When we have loop information
10569       // available, if this block is within a loop, we should avoid using vperm
10570       // for 3-operation perms and use a constant pool load instead.
10571       if (Cost < 3)
10572         return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10573     }
10574   }
10575 
10576   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10577   // vector that will get spilled to the constant pool.
10578   if (V2.isUndef()) V2 = V1;
10579 
10580   return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10581 }
10582 
10583 SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10584                                       ArrayRef<int> PermMask, EVT VT,
10585                                       SDValue V1, SDValue V2) const {
10586   unsigned Opcode = PPCISD::VPERM;
10587   EVT ValType = V1.getValueType();
10588   SDLoc dl(Op);
10589   bool NeedSwap = false;
10590   bool isLittleEndian = Subtarget.isLittleEndian();
10591   bool isPPC64 = Subtarget.isPPC64();
10592 
10593   if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10594       (V1->hasOneUse() || V2->hasOneUse())) {
10595     LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10596                          "XXPERM instead\n");
10597     Opcode = PPCISD::XXPERM;
10598 
10599     // The second input to XXPERM is also an output so if the second input has
10600     // multiple uses then copying is necessary, as a result we want the
10601     // single-use operand to be used as the second input to prevent copying.
10602     if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10603         (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10604       std::swap(V1, V2);
10605       NeedSwap = !NeedSwap;
10606     }
10607   }
10608 
10609   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10610   // that it is in input element units, not in bytes.  Convert now.
10611 
10612   // For little endian, the order of the input vectors is reversed, and
10613   // the permutation mask is complemented with respect to 31.  This is
10614   // necessary to produce proper semantics with the big-endian-based vperm
10615   // instruction.
10616   EVT EltVT = V1.getValueType().getVectorElementType();
10617   unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10618 
10619   bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10620   bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10621 
10622   /*
10623   Vectors will be appended like so: [ V1 | v2 ]
10624   XXSWAPD on V1:
10625   [   A   |   B   |   C   |   D   ] -> [   C   |   D   |   A   |   B   ]
10626      0-3     4-7     8-11   12-15         0-3     4-7     8-11   12-15
10627   i.e.  index of A, B += 8, and index of C, D -= 8.
10628   XXSWAPD on V2:
10629   [   E   |   F   |   G   |   H   ] -> [   G   |   H   |   E   |   F   ]
10630     16-19   20-23   24-27   28-31        16-19   20-23   24-27   28-31
10631   i.e.  index of E, F += 8, index of G, H -= 8
10632   Swap V1 and V2:
10633   [   V1   |   V2  ] -> [   V2   |   V1   ]
10634      0-15     16-31        0-15     16-31
10635   i.e.  index of V1 += 16, index of V2 -= 16
10636   */
10637 
10638   SmallVector<SDValue, 16> ResultMask;
10639   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10640     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10641 
10642     if (V1HasXXSWAPD) {
10643       if (SrcElt < 8)
10644         SrcElt += 8;
10645       else if (SrcElt < 16)
10646         SrcElt -= 8;
10647     }
10648     if (V2HasXXSWAPD) {
10649       if (SrcElt > 23)
10650         SrcElt -= 8;
10651       else if (SrcElt > 15)
10652         SrcElt += 8;
10653     }
10654     if (NeedSwap) {
10655       if (SrcElt < 16)
10656         SrcElt += 16;
10657       else
10658         SrcElt -= 16;
10659     }
10660     for (unsigned j = 0; j != BytesPerElement; ++j)
10661       if (isLittleEndian)
10662         ResultMask.push_back(
10663             DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10664       else
10665         ResultMask.push_back(
10666             DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10667   }
10668 
10669   if (V1HasXXSWAPD) {
10670     dl = SDLoc(V1->getOperand(0));
10671     V1 = V1->getOperand(0)->getOperand(1);
10672   }
10673   if (V2HasXXSWAPD) {
10674     dl = SDLoc(V2->getOperand(0));
10675     V2 = V2->getOperand(0)->getOperand(1);
10676   }
10677 
10678   if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10679     if (ValType != MVT::v2f64)
10680       V1 = DAG.getBitcast(MVT::v2f64, V1);
10681     if (V2.getValueType() != MVT::v2f64)
10682       V2 = DAG.getBitcast(MVT::v2f64, V2);
10683   }
10684 
10685   ShufflesHandledWithVPERM++;
10686   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10687   LLVM_DEBUG({
10688     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10689     if (Opcode == PPCISD::XXPERM) {
10690       dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10691     } else {
10692       dbgs() << "Emitting a VPERM for the following shuffle:\n";
10693     }
10694     SVOp->dump();
10695     dbgs() << "With the following permute control vector:\n";
10696     VPermMask.dump();
10697   });
10698 
10699   if (Opcode == PPCISD::XXPERM)
10700     VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10701 
10702   // Only need to place items backwards in LE,
10703   // the mask was properly calculated.
10704   if (isLittleEndian)
10705     std::swap(V1, V2);
10706 
10707   SDValue VPERMNode =
10708       DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10709 
10710   VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10711   return VPERMNode;
10712 }
10713 
10714 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10715 /// vector comparison.  If it is, return true and fill in Opc/isDot with
10716 /// information about the intrinsic.
10717 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10718                                  bool &isDot, const PPCSubtarget &Subtarget) {
10719   unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10720   CompareOpc = -1;
10721   isDot = false;
10722   switch (IntrinsicID) {
10723   default:
10724     return false;
10725   // Comparison predicates.
10726   case Intrinsic::ppc_altivec_vcmpbfp_p:
10727     CompareOpc = 966;
10728     isDot = true;
10729     break;
10730   case Intrinsic::ppc_altivec_vcmpeqfp_p:
10731     CompareOpc = 198;
10732     isDot = true;
10733     break;
10734   case Intrinsic::ppc_altivec_vcmpequb_p:
10735     CompareOpc = 6;
10736     isDot = true;
10737     break;
10738   case Intrinsic::ppc_altivec_vcmpequh_p:
10739     CompareOpc = 70;
10740     isDot = true;
10741     break;
10742   case Intrinsic::ppc_altivec_vcmpequw_p:
10743     CompareOpc = 134;
10744     isDot = true;
10745     break;
10746   case Intrinsic::ppc_altivec_vcmpequd_p:
10747     if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10748       CompareOpc = 199;
10749       isDot = true;
10750     } else
10751       return false;
10752     break;
10753   case Intrinsic::ppc_altivec_vcmpneb_p:
10754   case Intrinsic::ppc_altivec_vcmpneh_p:
10755   case Intrinsic::ppc_altivec_vcmpnew_p:
10756   case Intrinsic::ppc_altivec_vcmpnezb_p:
10757   case Intrinsic::ppc_altivec_vcmpnezh_p:
10758   case Intrinsic::ppc_altivec_vcmpnezw_p:
10759     if (Subtarget.hasP9Altivec()) {
10760       switch (IntrinsicID) {
10761       default:
10762         llvm_unreachable("Unknown comparison intrinsic.");
10763       case Intrinsic::ppc_altivec_vcmpneb_p:
10764         CompareOpc = 7;
10765         break;
10766       case Intrinsic::ppc_altivec_vcmpneh_p:
10767         CompareOpc = 71;
10768         break;
10769       case Intrinsic::ppc_altivec_vcmpnew_p:
10770         CompareOpc = 135;
10771         break;
10772       case Intrinsic::ppc_altivec_vcmpnezb_p:
10773         CompareOpc = 263;
10774         break;
10775       case Intrinsic::ppc_altivec_vcmpnezh_p:
10776         CompareOpc = 327;
10777         break;
10778       case Intrinsic::ppc_altivec_vcmpnezw_p:
10779         CompareOpc = 391;
10780         break;
10781       }
10782       isDot = true;
10783     } else
10784       return false;
10785     break;
10786   case Intrinsic::ppc_altivec_vcmpgefp_p:
10787     CompareOpc = 454;
10788     isDot = true;
10789     break;
10790   case Intrinsic::ppc_altivec_vcmpgtfp_p:
10791     CompareOpc = 710;
10792     isDot = true;
10793     break;
10794   case Intrinsic::ppc_altivec_vcmpgtsb_p:
10795     CompareOpc = 774;
10796     isDot = true;
10797     break;
10798   case Intrinsic::ppc_altivec_vcmpgtsh_p:
10799     CompareOpc = 838;
10800     isDot = true;
10801     break;
10802   case Intrinsic::ppc_altivec_vcmpgtsw_p:
10803     CompareOpc = 902;
10804     isDot = true;
10805     break;
10806   case Intrinsic::ppc_altivec_vcmpgtsd_p:
10807     if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10808       CompareOpc = 967;
10809       isDot = true;
10810     } else
10811       return false;
10812     break;
10813   case Intrinsic::ppc_altivec_vcmpgtub_p:
10814     CompareOpc = 518;
10815     isDot = true;
10816     break;
10817   case Intrinsic::ppc_altivec_vcmpgtuh_p:
10818     CompareOpc = 582;
10819     isDot = true;
10820     break;
10821   case Intrinsic::ppc_altivec_vcmpgtuw_p:
10822     CompareOpc = 646;
10823     isDot = true;
10824     break;
10825   case Intrinsic::ppc_altivec_vcmpgtud_p:
10826     if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10827       CompareOpc = 711;
10828       isDot = true;
10829     } else
10830       return false;
10831     break;
10832 
10833   case Intrinsic::ppc_altivec_vcmpequq:
10834   case Intrinsic::ppc_altivec_vcmpgtsq:
10835   case Intrinsic::ppc_altivec_vcmpgtuq:
10836     if (!Subtarget.isISA3_1())
10837       return false;
10838     switch (IntrinsicID) {
10839     default:
10840       llvm_unreachable("Unknown comparison intrinsic.");
10841     case Intrinsic::ppc_altivec_vcmpequq:
10842       CompareOpc = 455;
10843       break;
10844     case Intrinsic::ppc_altivec_vcmpgtsq:
10845       CompareOpc = 903;
10846       break;
10847     case Intrinsic::ppc_altivec_vcmpgtuq:
10848       CompareOpc = 647;
10849       break;
10850     }
10851     break;
10852 
10853   // VSX predicate comparisons use the same infrastructure
10854   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10855   case Intrinsic::ppc_vsx_xvcmpgedp_p:
10856   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10857   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10858   case Intrinsic::ppc_vsx_xvcmpgesp_p:
10859   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10860     if (Subtarget.hasVSX()) {
10861       switch (IntrinsicID) {
10862       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10863         CompareOpc = 99;
10864         break;
10865       case Intrinsic::ppc_vsx_xvcmpgedp_p:
10866         CompareOpc = 115;
10867         break;
10868       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10869         CompareOpc = 107;
10870         break;
10871       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10872         CompareOpc = 67;
10873         break;
10874       case Intrinsic::ppc_vsx_xvcmpgesp_p:
10875         CompareOpc = 83;
10876         break;
10877       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10878         CompareOpc = 75;
10879         break;
10880       }
10881       isDot = true;
10882     } else
10883       return false;
10884     break;
10885 
10886   // Normal Comparisons.
10887   case Intrinsic::ppc_altivec_vcmpbfp:
10888     CompareOpc = 966;
10889     break;
10890   case Intrinsic::ppc_altivec_vcmpeqfp:
10891     CompareOpc = 198;
10892     break;
10893   case Intrinsic::ppc_altivec_vcmpequb:
10894     CompareOpc = 6;
10895     break;
10896   case Intrinsic::ppc_altivec_vcmpequh:
10897     CompareOpc = 70;
10898     break;
10899   case Intrinsic::ppc_altivec_vcmpequw:
10900     CompareOpc = 134;
10901     break;
10902   case Intrinsic::ppc_altivec_vcmpequd:
10903     if (Subtarget.hasP8Altivec())
10904       CompareOpc = 199;
10905     else
10906       return false;
10907     break;
10908   case Intrinsic::ppc_altivec_vcmpneb:
10909   case Intrinsic::ppc_altivec_vcmpneh:
10910   case Intrinsic::ppc_altivec_vcmpnew:
10911   case Intrinsic::ppc_altivec_vcmpnezb:
10912   case Intrinsic::ppc_altivec_vcmpnezh:
10913   case Intrinsic::ppc_altivec_vcmpnezw:
10914     if (Subtarget.hasP9Altivec())
10915       switch (IntrinsicID) {
10916       default:
10917         llvm_unreachable("Unknown comparison intrinsic.");
10918       case Intrinsic::ppc_altivec_vcmpneb:
10919         CompareOpc = 7;
10920         break;
10921       case Intrinsic::ppc_altivec_vcmpneh:
10922         CompareOpc = 71;
10923         break;
10924       case Intrinsic::ppc_altivec_vcmpnew:
10925         CompareOpc = 135;
10926         break;
10927       case Intrinsic::ppc_altivec_vcmpnezb:
10928         CompareOpc = 263;
10929         break;
10930       case Intrinsic::ppc_altivec_vcmpnezh:
10931         CompareOpc = 327;
10932         break;
10933       case Intrinsic::ppc_altivec_vcmpnezw:
10934         CompareOpc = 391;
10935         break;
10936       }
10937     else
10938       return false;
10939     break;
10940   case Intrinsic::ppc_altivec_vcmpgefp:
10941     CompareOpc = 454;
10942     break;
10943   case Intrinsic::ppc_altivec_vcmpgtfp:
10944     CompareOpc = 710;
10945     break;
10946   case Intrinsic::ppc_altivec_vcmpgtsb:
10947     CompareOpc = 774;
10948     break;
10949   case Intrinsic::ppc_altivec_vcmpgtsh:
10950     CompareOpc = 838;
10951     break;
10952   case Intrinsic::ppc_altivec_vcmpgtsw:
10953     CompareOpc = 902;
10954     break;
10955   case Intrinsic::ppc_altivec_vcmpgtsd:
10956     if (Subtarget.hasP8Altivec())
10957       CompareOpc = 967;
10958     else
10959       return false;
10960     break;
10961   case Intrinsic::ppc_altivec_vcmpgtub:
10962     CompareOpc = 518;
10963     break;
10964   case Intrinsic::ppc_altivec_vcmpgtuh:
10965     CompareOpc = 582;
10966     break;
10967   case Intrinsic::ppc_altivec_vcmpgtuw:
10968     CompareOpc = 646;
10969     break;
10970   case Intrinsic::ppc_altivec_vcmpgtud:
10971     if (Subtarget.hasP8Altivec())
10972       CompareOpc = 711;
10973     else
10974       return false;
10975     break;
10976   case Intrinsic::ppc_altivec_vcmpequq_p:
10977   case Intrinsic::ppc_altivec_vcmpgtsq_p:
10978   case Intrinsic::ppc_altivec_vcmpgtuq_p:
10979     if (!Subtarget.isISA3_1())
10980       return false;
10981     switch (IntrinsicID) {
10982     default:
10983       llvm_unreachable("Unknown comparison intrinsic.");
10984     case Intrinsic::ppc_altivec_vcmpequq_p:
10985       CompareOpc = 455;
10986       break;
10987     case Intrinsic::ppc_altivec_vcmpgtsq_p:
10988       CompareOpc = 903;
10989       break;
10990     case Intrinsic::ppc_altivec_vcmpgtuq_p:
10991       CompareOpc = 647;
10992       break;
10993     }
10994     isDot = true;
10995     break;
10996   }
10997   return true;
10998 }
10999 
11000 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11001 /// lower, do it, otherwise return null.
11002 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11003                                                    SelectionDAG &DAG) const {
11004   unsigned IntrinsicID = Op.getConstantOperandVal(0);
11005 
11006   SDLoc dl(Op);
11007 
11008   switch (IntrinsicID) {
11009   case Intrinsic::thread_pointer:
11010     // Reads the thread pointer register, used for __builtin_thread_pointer.
11011     if (Subtarget.isPPC64())
11012       return DAG.getRegister(PPC::X13, MVT::i64);
11013     return DAG.getRegister(PPC::R2, MVT::i32);
11014 
11015   case Intrinsic::ppc_rldimi: {
11016     assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11017     SDValue Src = Op.getOperand(1);
11018     APInt Mask = Op.getConstantOperandAPInt(4);
11019     if (Mask.isZero())
11020       return Op.getOperand(2);
11021     if (Mask.isAllOnes())
11022       return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11023     uint64_t SH = Op.getConstantOperandVal(3);
11024     unsigned MB = 0, ME = 0;
11025     if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11026       report_fatal_error("invalid rldimi mask!");
11027     // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11028     if (ME < 63 - SH) {
11029       Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11030                         DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11031     } else if (ME > 63 - SH) {
11032       Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11033                         DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11034     }
11035     return SDValue(
11036         DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11037                            {Op.getOperand(2), Src,
11038                             DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11039                             DAG.getTargetConstant(MB, dl, MVT::i32)}),
11040         0);
11041   }
11042 
11043   case Intrinsic::ppc_rlwimi: {
11044     APInt Mask = Op.getConstantOperandAPInt(4);
11045     if (Mask.isZero())
11046       return Op.getOperand(2);
11047     if (Mask.isAllOnes())
11048       return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11049                          Op.getOperand(3));
11050     unsigned MB = 0, ME = 0;
11051     if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11052       report_fatal_error("invalid rlwimi mask!");
11053     return SDValue(DAG.getMachineNode(
11054                        PPC::RLWIMI, dl, MVT::i32,
11055                        {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11056                         DAG.getTargetConstant(MB, dl, MVT::i32),
11057                         DAG.getTargetConstant(ME, dl, MVT::i32)}),
11058                    0);
11059   }
11060 
11061   case Intrinsic::ppc_rlwnm: {
11062     if (Op.getConstantOperandVal(3) == 0)
11063       return DAG.getConstant(0, dl, MVT::i32);
11064     unsigned MB = 0, ME = 0;
11065     if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11066       report_fatal_error("invalid rlwnm mask!");
11067     return SDValue(
11068         DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11069                            {Op.getOperand(1), Op.getOperand(2),
11070                             DAG.getTargetConstant(MB, dl, MVT::i32),
11071                             DAG.getTargetConstant(ME, dl, MVT::i32)}),
11072         0);
11073   }
11074 
11075   case Intrinsic::ppc_mma_disassemble_acc: {
11076     if (Subtarget.isISAFuture()) {
11077       EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11078       SDValue WideVec =
11079           SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11080                                      Op.getOperand(1)),
11081                   0);
11082       SmallVector<SDValue, 4> RetOps;
11083       SDValue Value = SDValue(WideVec.getNode(), 0);
11084       SDValue Value2 = SDValue(WideVec.getNode(), 1);
11085 
11086       SDValue Extract;
11087       Extract = DAG.getNode(
11088           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11089           Subtarget.isLittleEndian() ? Value2 : Value,
11090           DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11091                           dl, getPointerTy(DAG.getDataLayout())));
11092       RetOps.push_back(Extract);
11093       Extract = DAG.getNode(
11094           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11095           Subtarget.isLittleEndian() ? Value2 : Value,
11096           DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11097                           dl, getPointerTy(DAG.getDataLayout())));
11098       RetOps.push_back(Extract);
11099       Extract = DAG.getNode(
11100           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11101           Subtarget.isLittleEndian() ? Value : Value2,
11102           DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11103                           dl, getPointerTy(DAG.getDataLayout())));
11104       RetOps.push_back(Extract);
11105       Extract = DAG.getNode(
11106           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11107           Subtarget.isLittleEndian() ? Value : Value2,
11108           DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11109                           dl, getPointerTy(DAG.getDataLayout())));
11110       RetOps.push_back(Extract);
11111       return DAG.getMergeValues(RetOps, dl);
11112     }
11113     [[fallthrough]];
11114   }
11115   case Intrinsic::ppc_vsx_disassemble_pair: {
11116     int NumVecs = 2;
11117     SDValue WideVec = Op.getOperand(1);
11118     if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11119       NumVecs = 4;
11120       WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11121     }
11122     SmallVector<SDValue, 4> RetOps;
11123     for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11124       SDValue Extract = DAG.getNode(
11125           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11126           DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11127                                                      : VecNo,
11128                           dl, getPointerTy(DAG.getDataLayout())));
11129       RetOps.push_back(Extract);
11130     }
11131     return DAG.getMergeValues(RetOps, dl);
11132   }
11133 
11134   case Intrinsic::ppc_mma_xxmfacc:
11135   case Intrinsic::ppc_mma_xxmtacc: {
11136     // Allow pre-isa-future subtargets to lower as normal.
11137     if (!Subtarget.isISAFuture())
11138       return SDValue();
11139     // The intrinsics for xxmtacc and xxmfacc take one argument of
11140     // type v512i1, for future cpu the corresponding wacc instruction
11141     // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11142     // the need to produce the xxm[t|f]acc.
11143     SDValue WideVec = Op.getOperand(1);
11144     DAG.ReplaceAllUsesWith(Op, WideVec);
11145     return SDValue();
11146   }
11147 
11148   case Intrinsic::ppc_unpack_longdouble: {
11149     auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11150     assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11151            "Argument of long double unpack must be 0 or 1!");
11152     return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11153                        DAG.getConstant(!!(Idx->getSExtValue()), dl,
11154                                        Idx->getValueType(0)));
11155   }
11156 
11157   case Intrinsic::ppc_compare_exp_lt:
11158   case Intrinsic::ppc_compare_exp_gt:
11159   case Intrinsic::ppc_compare_exp_eq:
11160   case Intrinsic::ppc_compare_exp_uo: {
11161     unsigned Pred;
11162     switch (IntrinsicID) {
11163     case Intrinsic::ppc_compare_exp_lt:
11164       Pred = PPC::PRED_LT;
11165       break;
11166     case Intrinsic::ppc_compare_exp_gt:
11167       Pred = PPC::PRED_GT;
11168       break;
11169     case Intrinsic::ppc_compare_exp_eq:
11170       Pred = PPC::PRED_EQ;
11171       break;
11172     case Intrinsic::ppc_compare_exp_uo:
11173       Pred = PPC::PRED_UN;
11174       break;
11175     }
11176     return SDValue(
11177         DAG.getMachineNode(
11178             PPC::SELECT_CC_I4, dl, MVT::i32,
11179             {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11180                                         Op.getOperand(1), Op.getOperand(2)),
11181                      0),
11182              DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11183              DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11184         0);
11185   }
11186   case Intrinsic::ppc_test_data_class: {
11187     EVT OpVT = Op.getOperand(1).getValueType();
11188     unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11189                                          : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11190                                                              : PPC::XSTSTDCSP);
11191     return SDValue(
11192         DAG.getMachineNode(
11193             PPC::SELECT_CC_I4, dl, MVT::i32,
11194             {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11195                                         Op.getOperand(1)),
11196                      0),
11197              DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11198              DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11199         0);
11200   }
11201   case Intrinsic::ppc_fnmsub: {
11202     EVT VT = Op.getOperand(1).getValueType();
11203     if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11204       return DAG.getNode(
11205           ISD::FNEG, dl, VT,
11206           DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11207                       DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11208     return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11209                        Op.getOperand(2), Op.getOperand(3));
11210   }
11211   case Intrinsic::ppc_convert_f128_to_ppcf128:
11212   case Intrinsic::ppc_convert_ppcf128_to_f128: {
11213     RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11214                             ? RTLIB::CONVERT_PPCF128_F128
11215                             : RTLIB::CONVERT_F128_PPCF128;
11216     MakeLibCallOptions CallOptions;
11217     std::pair<SDValue, SDValue> Result =
11218         makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11219                     dl, SDValue());
11220     return Result.first;
11221   }
11222   case Intrinsic::ppc_maxfe:
11223   case Intrinsic::ppc_maxfl:
11224   case Intrinsic::ppc_maxfs:
11225   case Intrinsic::ppc_minfe:
11226   case Intrinsic::ppc_minfl:
11227   case Intrinsic::ppc_minfs: {
11228     EVT VT = Op.getValueType();
11229     assert(
11230         all_of(Op->ops().drop_front(4),
11231                [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11232         "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11233     (void)VT;
11234     ISD::CondCode CC = ISD::SETGT;
11235     if (IntrinsicID == Intrinsic::ppc_minfe ||
11236         IntrinsicID == Intrinsic::ppc_minfl ||
11237         IntrinsicID == Intrinsic::ppc_minfs)
11238       CC = ISD::SETLT;
11239     unsigned I = Op.getNumOperands() - 2, Cnt = I;
11240     SDValue Res = Op.getOperand(I);
11241     for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11242       Res =
11243           DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11244     }
11245     return Res;
11246   }
11247   }
11248 
11249   // If this is a lowered altivec predicate compare, CompareOpc is set to the
11250   // opcode number of the comparison.
11251   int CompareOpc;
11252   bool isDot;
11253   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11254     return SDValue();    // Don't custom lower most intrinsics.
11255 
11256   // If this is a non-dot comparison, make the VCMP node and we are done.
11257   if (!isDot) {
11258     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11259                               Op.getOperand(1), Op.getOperand(2),
11260                               DAG.getConstant(CompareOpc, dl, MVT::i32));
11261     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11262   }
11263 
11264   // Create the PPCISD altivec 'dot' comparison node.
11265   SDValue Ops[] = {
11266     Op.getOperand(2),  // LHS
11267     Op.getOperand(3),  // RHS
11268     DAG.getConstant(CompareOpc, dl, MVT::i32)
11269   };
11270   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11271   SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11272 
11273   // Unpack the result based on how the target uses it.
11274   unsigned BitNo; // Bit # of CR6.
11275   bool InvertBit; // Invert result?
11276   unsigned Bitx;
11277   unsigned SetOp;
11278   switch (Op.getConstantOperandVal(1)) {
11279   default: // Can't happen, don't crash on invalid number though.
11280   case 0:  // Return the value of the EQ bit of CR6.
11281     BitNo = 0;
11282     InvertBit = false;
11283     Bitx = PPC::sub_eq;
11284     SetOp = PPCISD::SETBC;
11285     break;
11286   case 1: // Return the inverted value of the EQ bit of CR6.
11287     BitNo = 0;
11288     InvertBit = true;
11289     Bitx = PPC::sub_eq;
11290     SetOp = PPCISD::SETBCR;
11291     break;
11292   case 2: // Return the value of the LT bit of CR6.
11293     BitNo = 2;
11294     InvertBit = false;
11295     Bitx = PPC::sub_lt;
11296     SetOp = PPCISD::SETBC;
11297     break;
11298   case 3: // Return the inverted value of the LT bit of CR6.
11299     BitNo = 2;
11300     InvertBit = true;
11301     Bitx = PPC::sub_lt;
11302     SetOp = PPCISD::SETBCR;
11303     break;
11304   }
11305 
11306   SDValue GlueOp = CompNode.getValue(1);
11307   if (Subtarget.isISA3_1()) {
11308     SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11309     SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11310     SDValue CRBit =
11311         SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11312                                    CR6Reg, SubRegIdx, GlueOp),
11313                 0);
11314     return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11315   }
11316 
11317   // Now that we have the comparison, emit a copy from the CR to a GPR.
11318   // This is flagged to the above dot comparison.
11319   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11320                               DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11321 
11322   // Shift the bit into the low position.
11323   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11324                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11325   // Isolate the bit.
11326   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11327                       DAG.getConstant(1, dl, MVT::i32));
11328 
11329   // If we are supposed to, toggle the bit.
11330   if (InvertBit)
11331     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11332                         DAG.getConstant(1, dl, MVT::i32));
11333   return Flags;
11334 }
11335 
11336 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11337                                                SelectionDAG &DAG) const {
11338   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11339   // the beginning of the argument list.
11340   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11341   SDLoc DL(Op);
11342   switch (Op.getConstantOperandVal(ArgStart)) {
11343   case Intrinsic::ppc_cfence: {
11344     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11345     SDValue Val = Op.getOperand(ArgStart + 1);
11346     EVT Ty = Val.getValueType();
11347     if (Ty == MVT::i128) {
11348       // FIXME: Testing one of two paired registers is sufficient to guarantee
11349       // ordering?
11350       Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11351     }
11352     unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11353     return SDValue(
11354         DAG.getMachineNode(
11355             Opcode, DL, MVT::Other,
11356             DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11357             Op.getOperand(0)),
11358         0);
11359   }
11360   default:
11361     break;
11362   }
11363   return SDValue();
11364 }
11365 
11366 // Lower scalar BSWAP64 to xxbrd.
11367 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11368   SDLoc dl(Op);
11369   if (!Subtarget.isPPC64())
11370     return Op;
11371   // MTVSRDD
11372   Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11373                    Op.getOperand(0));
11374   // XXBRD
11375   Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11376   // MFVSRD
11377   int VectorIndex = 0;
11378   if (Subtarget.isLittleEndian())
11379     VectorIndex = 1;
11380   Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11381                    DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11382   return Op;
11383 }
11384 
11385 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11386 // compared to a value that is atomically loaded (atomic loads zero-extend).
11387 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11388                                                 SelectionDAG &DAG) const {
11389   assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11390          "Expecting an atomic compare-and-swap here.");
11391   SDLoc dl(Op);
11392   auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11393   EVT MemVT = AtomicNode->getMemoryVT();
11394   if (MemVT.getSizeInBits() >= 32)
11395     return Op;
11396 
11397   SDValue CmpOp = Op.getOperand(2);
11398   // If this is already correctly zero-extended, leave it alone.
11399   auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11400   if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11401     return Op;
11402 
11403   // Clear the high bits of the compare operand.
11404   unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11405   SDValue NewCmpOp =
11406     DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11407                 DAG.getConstant(MaskVal, dl, MVT::i32));
11408 
11409   // Replace the existing compare operand with the properly zero-extended one.
11410   SmallVector<SDValue, 4> Ops;
11411   for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11412     Ops.push_back(AtomicNode->getOperand(i));
11413   Ops[2] = NewCmpOp;
11414   MachineMemOperand *MMO = AtomicNode->getMemOperand();
11415   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11416   auto NodeTy =
11417     (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11418   return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11419 }
11420 
11421 SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11422                                                   SelectionDAG &DAG) const {
11423   AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11424   EVT MemVT = N->getMemoryVT();
11425   assert(MemVT.getSimpleVT() == MVT::i128 &&
11426          "Expect quadword atomic operations");
11427   SDLoc dl(N);
11428   unsigned Opc = N->getOpcode();
11429   switch (Opc) {
11430   case ISD::ATOMIC_LOAD: {
11431     // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11432     // lowered to ppc instructions by pattern matching instruction selector.
11433     SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11434     SmallVector<SDValue, 4> Ops{
11435         N->getOperand(0),
11436         DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11437     for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11438       Ops.push_back(N->getOperand(I));
11439     SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11440                                                 Ops, MemVT, N->getMemOperand());
11441     SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11442     SDValue ValHi =
11443         DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11444     ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11445                         DAG.getConstant(64, dl, MVT::i32));
11446     SDValue Val =
11447         DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11448     return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11449                        {Val, LoadedVal.getValue(2)});
11450   }
11451   case ISD::ATOMIC_STORE: {
11452     // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11453     // lowered to ppc instructions by pattern matching instruction selector.
11454     SDVTList Tys = DAG.getVTList(MVT::Other);
11455     SmallVector<SDValue, 4> Ops{
11456         N->getOperand(0),
11457         DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11458     SDValue Val = N->getOperand(1);
11459     SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11460     SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11461                                 DAG.getConstant(64, dl, MVT::i32));
11462     ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11463     Ops.push_back(ValLo);
11464     Ops.push_back(ValHi);
11465     Ops.push_back(N->getOperand(2));
11466     return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11467                                    N->getMemOperand());
11468   }
11469   default:
11470     llvm_unreachable("Unexpected atomic opcode");
11471   }
11472 }
11473 
11474 static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl,
11475                                 SelectionDAG &DAG,
11476                                 const PPCSubtarget &Subtarget) {
11477   assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11478 
11479   enum DataClassMask {
11480     DC_NAN = 1 << 6,
11481     DC_NEG_INF = 1 << 4,
11482     DC_POS_INF = 1 << 5,
11483     DC_NEG_ZERO = 1 << 2,
11484     DC_POS_ZERO = 1 << 3,
11485     DC_NEG_SUBNORM = 1,
11486     DC_POS_SUBNORM = 1 << 1,
11487   };
11488 
11489   EVT VT = Op.getValueType();
11490 
11491   unsigned TestOp = VT == MVT::f128  ? PPC::XSTSTDCQP
11492                     : VT == MVT::f64 ? PPC::XSTSTDCDP
11493                                      : PPC::XSTSTDCSP;
11494 
11495   if (Mask == fcAllFlags)
11496     return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11497   if (Mask == 0)
11498     return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11499 
11500   // When it's cheaper or necessary to test reverse flags.
11501   if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11502     SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11503     return DAG.getNOT(Dl, Rev, MVT::i1);
11504   }
11505 
11506   // Power doesn't support testing whether a value is 'normal'. Test the rest
11507   // first, and test if it's 'not not-normal' with expected sign.
11508   if (Mask & fcNormal) {
11509     SDValue Rev(DAG.getMachineNode(
11510                     TestOp, Dl, MVT::i32,
11511                     DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11512                                               DC_NEG_ZERO | DC_POS_ZERO |
11513                                               DC_NEG_SUBNORM | DC_POS_SUBNORM,
11514                                           Dl, MVT::i32),
11515                     Op),
11516                 0);
11517     // Sign are stored in CR bit 0, result are in CR bit 2.
11518     SDValue Sign(
11519         DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11520                            DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11521         0);
11522     SDValue Normal(DAG.getNOT(
11523         Dl,
11524         SDValue(DAG.getMachineNode(
11525                     TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11526                     DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11527                 0),
11528         MVT::i1));
11529     if (Mask & fcPosNormal)
11530       Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11531     SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11532     if (Mask == fcPosNormal || Mask == fcNegNormal)
11533       return Result;
11534 
11535     return DAG.getNode(
11536         ISD::OR, Dl, MVT::i1,
11537         getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11538   }
11539 
11540   // The instruction doesn't differentiate between signaling or quiet NaN. Test
11541   // the rest first, and test if it 'is NaN and is signaling/quiet'.
11542   if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11543     bool IsQuiet = Mask & fcQNan;
11544     SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11545 
11546     // Quietness is determined by the first bit in fraction field.
11547     uint64_t QuietMask = 0;
11548     SDValue HighWord;
11549     if (VT == MVT::f128) {
11550       HighWord = DAG.getNode(
11551           ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11552           DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11553       QuietMask = 0x8000;
11554     } else if (VT == MVT::f64) {
11555       if (Subtarget.isPPC64()) {
11556         HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11557                                DAG.getBitcast(MVT::i64, Op),
11558                                DAG.getConstant(1, Dl, MVT::i32));
11559       } else {
11560         SDValue Vec = DAG.getBitcast(
11561             MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11562         HighWord = DAG.getNode(
11563             ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11564             DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11565       }
11566       QuietMask = 0x80000;
11567     } else if (VT == MVT::f32) {
11568       HighWord = DAG.getBitcast(MVT::i32, Op);
11569       QuietMask = 0x400000;
11570     }
11571     SDValue NanRes = DAG.getSetCC(
11572         Dl, MVT::i1,
11573         DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11574                     DAG.getConstant(QuietMask, Dl, MVT::i32)),
11575         DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11576     NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11577     if (Mask == fcQNan || Mask == fcSNan)
11578       return NanRes;
11579 
11580     return DAG.getNode(ISD::OR, Dl, MVT::i1,
11581                        getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11582                        NanRes);
11583   }
11584 
11585   unsigned NativeMask = 0;
11586   if ((Mask & fcNan) == fcNan)
11587     NativeMask |= DC_NAN;
11588   if (Mask & fcNegInf)
11589     NativeMask |= DC_NEG_INF;
11590   if (Mask & fcPosInf)
11591     NativeMask |= DC_POS_INF;
11592   if (Mask & fcNegZero)
11593     NativeMask |= DC_NEG_ZERO;
11594   if (Mask & fcPosZero)
11595     NativeMask |= DC_POS_ZERO;
11596   if (Mask & fcNegSubnormal)
11597     NativeMask |= DC_NEG_SUBNORM;
11598   if (Mask & fcPosSubnormal)
11599     NativeMask |= DC_POS_SUBNORM;
11600   return SDValue(
11601       DAG.getMachineNode(
11602           TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11603           SDValue(DAG.getMachineNode(
11604                       TestOp, Dl, MVT::i32,
11605                       DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11606                   0),
11607           DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11608       0);
11609 }
11610 
11611 SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11612                                            SelectionDAG &DAG) const {
11613   assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11614   SDValue LHS = Op.getOperand(0);
11615   uint64_t RHSC = Op.getConstantOperandVal(1);
11616   SDLoc Dl(Op);
11617   FPClassTest Category = static_cast<FPClassTest>(RHSC);
11618   if (LHS.getValueType() == MVT::ppcf128) {
11619     // The higher part determines the value class.
11620     LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11621                       DAG.getConstant(1, Dl, MVT::i32));
11622   }
11623 
11624   return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11625 }
11626 
11627 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11628                                                  SelectionDAG &DAG) const {
11629   SDLoc dl(Op);
11630 
11631   MachineFunction &MF = DAG.getMachineFunction();
11632   SDValue Op0 = Op.getOperand(0);
11633   EVT ValVT = Op0.getValueType();
11634   unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11635   if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11636     int64_t IntVal = Op.getConstantOperandVal(0);
11637     if (IntVal >= -16 && IntVal <= 15)
11638       return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11639                                     dl);
11640   }
11641 
11642   ReuseLoadInfo RLI;
11643   if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11644       Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11645       Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11646       canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11647 
11648     MachineMemOperand *MMO =
11649         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
11650                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11651     SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11652     SDValue Bits = DAG.getMemIntrinsicNode(
11653         PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11654         MVT::i32, MMO);
11655     if (RLI.ResChain)
11656       DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
11657     return Bits.getValue(0);
11658   }
11659 
11660   // Create a stack slot that is 16-byte aligned.
11661   MachineFrameInfo &MFI = MF.getFrameInfo();
11662   int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11663   EVT PtrVT = getPointerTy(DAG.getDataLayout());
11664   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11665 
11666   SDValue Val = Op0;
11667   // P10 hardware store forwarding requires that a single store contains all
11668   // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11669   // to avoid load hit store on P10 when running binaries compiled for older
11670   // processors by generating two mergeable scalar stores to forward with the
11671   // vector load.
11672   if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11673       !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11674       ValVT.getSizeInBits() <= 64) {
11675     Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11676     EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11677     SDValue ShiftBy = DAG.getConstant(
11678         64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11679     Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11680     SDValue Plus8 =
11681         DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11682     SDValue Store2 =
11683         DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11684     SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11685     return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11686                        MachinePointerInfo());
11687   }
11688 
11689   // Store the input value into Value#0 of the stack slot.
11690   SDValue Store =
11691       DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11692   // Load it out.
11693   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11694 }
11695 
11696 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11697                                                   SelectionDAG &DAG) const {
11698   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11699          "Should only be called for ISD::INSERT_VECTOR_ELT");
11700 
11701   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11702 
11703   EVT VT = Op.getValueType();
11704   SDLoc dl(Op);
11705   SDValue V1 = Op.getOperand(0);
11706   SDValue V2 = Op.getOperand(1);
11707 
11708   if (VT == MVT::v2f64 && C)
11709     return Op;
11710 
11711   if (Subtarget.hasP9Vector()) {
11712     // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11713     // because on P10, it allows this specific insert_vector_elt load pattern to
11714     // utilize the refactored load and store infrastructure in order to exploit
11715     // prefixed loads.
11716     // On targets with inexpensive direct moves (Power9 and up), a
11717     // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11718     // load since a single precision load will involve conversion to double
11719     // precision on the load followed by another conversion to single precision.
11720     if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11721         (isa<LoadSDNode>(V2))) {
11722       SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11723       SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11724       SDValue InsVecElt =
11725           DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11726                       BitcastLoad, Op.getOperand(2));
11727       return DAG.getBitcast(MVT::v4f32, InsVecElt);
11728     }
11729   }
11730 
11731   if (Subtarget.isISA3_1()) {
11732     if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11733       return SDValue();
11734     // On P10, we have legal lowering for constant and variable indices for
11735     // all vectors.
11736     if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11737         VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11738       return Op;
11739   }
11740 
11741   // Before P10, we have legal lowering for constant indices but not for
11742   // variable ones.
11743   if (!C)
11744     return SDValue();
11745 
11746   // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11747   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11748     SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11749     unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11750     unsigned InsertAtElement = C->getZExtValue();
11751     unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11752     if (Subtarget.isLittleEndian()) {
11753       InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11754     }
11755     return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11756                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
11757   }
11758   return Op;
11759 }
11760 
11761 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11762                                            SelectionDAG &DAG) const {
11763   SDLoc dl(Op);
11764   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11765   SDValue LoadChain = LN->getChain();
11766   SDValue BasePtr = LN->getBasePtr();
11767   EVT VT = Op.getValueType();
11768 
11769   if (VT != MVT::v256i1 && VT != MVT::v512i1)
11770     return Op;
11771 
11772   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11773   // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11774   // 2 or 4 vsx registers.
11775   assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11776          "Type unsupported without MMA");
11777   assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11778          "Type unsupported without paired vector support");
11779   Align Alignment = LN->getAlign();
11780   SmallVector<SDValue, 4> Loads;
11781   SmallVector<SDValue, 4> LoadChains;
11782   unsigned NumVecs = VT.getSizeInBits() / 128;
11783   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11784     SDValue Load =
11785         DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11786                     LN->getPointerInfo().getWithOffset(Idx * 16),
11787                     commonAlignment(Alignment, Idx * 16),
11788                     LN->getMemOperand()->getFlags(), LN->getAAInfo());
11789     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11790                           DAG.getConstant(16, dl, BasePtr.getValueType()));
11791     Loads.push_back(Load);
11792     LoadChains.push_back(Load.getValue(1));
11793   }
11794   if (Subtarget.isLittleEndian()) {
11795     std::reverse(Loads.begin(), Loads.end());
11796     std::reverse(LoadChains.begin(), LoadChains.end());
11797   }
11798   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11799   SDValue Value =
11800       DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11801                   dl, VT, Loads);
11802   SDValue RetOps[] = {Value, TF};
11803   return DAG.getMergeValues(RetOps, dl);
11804 }
11805 
11806 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11807                                             SelectionDAG &DAG) const {
11808   SDLoc dl(Op);
11809   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11810   SDValue StoreChain = SN->getChain();
11811   SDValue BasePtr = SN->getBasePtr();
11812   SDValue Value = SN->getValue();
11813   SDValue Value2 = SN->getValue();
11814   EVT StoreVT = Value.getValueType();
11815 
11816   if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11817     return Op;
11818 
11819   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11820   // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11821   // underlying registers individually.
11822   assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11823          "Type unsupported without MMA");
11824   assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11825          "Type unsupported without paired vector support");
11826   Align Alignment = SN->getAlign();
11827   SmallVector<SDValue, 4> Stores;
11828   unsigned NumVecs = 2;
11829   if (StoreVT == MVT::v512i1) {
11830     if (Subtarget.isISAFuture()) {
11831       EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11832       MachineSDNode *ExtNode = DAG.getMachineNode(
11833           PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
11834 
11835       Value = SDValue(ExtNode, 0);
11836       Value2 = SDValue(ExtNode, 1);
11837     } else
11838       Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11839     NumVecs = 4;
11840   }
11841   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11842     unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11843     SDValue Elt;
11844     if (Subtarget.isISAFuture()) {
11845       VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11846       Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11847                         Idx > 1 ? Value2 : Value,
11848                         DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11849     } else
11850       Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11851                         DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11852 
11853     SDValue Store =
11854         DAG.getStore(StoreChain, dl, Elt, BasePtr,
11855                      SN->getPointerInfo().getWithOffset(Idx * 16),
11856                      commonAlignment(Alignment, Idx * 16),
11857                      SN->getMemOperand()->getFlags(), SN->getAAInfo());
11858     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11859                           DAG.getConstant(16, dl, BasePtr.getValueType()));
11860     Stores.push_back(Store);
11861   }
11862   SDValue TF = DAG.getTokenFactor(dl, Stores);
11863   return TF;
11864 }
11865 
11866 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11867   SDLoc dl(Op);
11868   if (Op.getValueType() == MVT::v4i32) {
11869     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11870 
11871     SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11872     // +16 as shift amt.
11873     SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11874     SDValue RHSSwap =   // = vrlw RHS, 16
11875       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11876 
11877     // Shrinkify inputs to v8i16.
11878     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11879     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11880     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11881 
11882     // Low parts multiplied together, generating 32-bit results (we ignore the
11883     // top parts).
11884     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11885                                         LHS, RHS, DAG, dl, MVT::v4i32);
11886 
11887     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11888                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11889     // Shift the high parts up 16 bits.
11890     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11891                               Neg16, DAG, dl);
11892     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11893   } else if (Op.getValueType() == MVT::v16i8) {
11894     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11895     bool isLittleEndian = Subtarget.isLittleEndian();
11896 
11897     // Multiply the even 8-bit parts, producing 16-bit sums.
11898     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11899                                            LHS, RHS, DAG, dl, MVT::v8i16);
11900     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11901 
11902     // Multiply the odd 8-bit parts, producing 16-bit sums.
11903     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11904                                           LHS, RHS, DAG, dl, MVT::v8i16);
11905     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11906 
11907     // Merge the results together.  Because vmuleub and vmuloub are
11908     // instructions with a big-endian bias, we must reverse the
11909     // element numbering and reverse the meaning of "odd" and "even"
11910     // when generating little endian code.
11911     int Ops[16];
11912     for (unsigned i = 0; i != 8; ++i) {
11913       if (isLittleEndian) {
11914         Ops[i*2  ] = 2*i;
11915         Ops[i*2+1] = 2*i+16;
11916       } else {
11917         Ops[i*2  ] = 2*i+1;
11918         Ops[i*2+1] = 2*i+1+16;
11919       }
11920     }
11921     if (isLittleEndian)
11922       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11923     else
11924       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11925   } else {
11926     llvm_unreachable("Unknown mul to lower!");
11927   }
11928 }
11929 
11930 SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11931   bool IsStrict = Op->isStrictFPOpcode();
11932   if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11933       !Subtarget.hasP9Vector())
11934     return SDValue();
11935 
11936   return Op;
11937 }
11938 
11939 // Custom lowering for fpext vf32 to v2f64
11940 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11941 
11942   assert(Op.getOpcode() == ISD::FP_EXTEND &&
11943          "Should only be called for ISD::FP_EXTEND");
11944 
11945   // FIXME: handle extends from half precision float vectors on P9.
11946   // We only want to custom lower an extend from v2f32 to v2f64.
11947   if (Op.getValueType() != MVT::v2f64 ||
11948       Op.getOperand(0).getValueType() != MVT::v2f32)
11949     return SDValue();
11950 
11951   SDLoc dl(Op);
11952   SDValue Op0 = Op.getOperand(0);
11953 
11954   switch (Op0.getOpcode()) {
11955   default:
11956     return SDValue();
11957   case ISD::EXTRACT_SUBVECTOR: {
11958     assert(Op0.getNumOperands() == 2 &&
11959            isa<ConstantSDNode>(Op0->getOperand(1)) &&
11960            "Node should have 2 operands with second one being a constant!");
11961 
11962     if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11963       return SDValue();
11964 
11965     // Custom lower is only done for high or low doubleword.
11966     int Idx = Op0.getConstantOperandVal(1);
11967     if (Idx % 2 != 0)
11968       return SDValue();
11969 
11970     // Since input is v4f32, at this point Idx is either 0 or 2.
11971     // Shift to get the doubleword position we want.
11972     int DWord = Idx >> 1;
11973 
11974     // High and low word positions are different on little endian.
11975     if (Subtarget.isLittleEndian())
11976       DWord ^= 0x1;
11977 
11978     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11979                        Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11980   }
11981   case ISD::FADD:
11982   case ISD::FMUL:
11983   case ISD::FSUB: {
11984     SDValue NewLoad[2];
11985     for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11986       // Ensure both input are loads.
11987       SDValue LdOp = Op0.getOperand(i);
11988       if (LdOp.getOpcode() != ISD::LOAD)
11989         return SDValue();
11990       // Generate new load node.
11991       LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11992       SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11993       NewLoad[i] = DAG.getMemIntrinsicNode(
11994           PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11995           LD->getMemoryVT(), LD->getMemOperand());
11996     }
11997     SDValue NewOp =
11998         DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11999                     NewLoad[1], Op0.getNode()->getFlags());
12000     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12001                        DAG.getConstant(0, dl, MVT::i32));
12002   }
12003   case ISD::LOAD: {
12004     LoadSDNode *LD = cast<LoadSDNode>(Op0);
12005     SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12006     SDValue NewLd = DAG.getMemIntrinsicNode(
12007         PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12008         LD->getMemoryVT(), LD->getMemOperand());
12009     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12010                        DAG.getConstant(0, dl, MVT::i32));
12011   }
12012   }
12013   llvm_unreachable("ERROR:Should return for all cases within swtich.");
12014 }
12015 
12016 SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
12017   // Default to target independent lowering if there is a logical user of the
12018   // carry-bit.
12019   for (SDNode *U : Op->users()) {
12020     if (U->getOpcode() == ISD::SELECT)
12021       return SDValue();
12022     if (ISD::isBitwiseLogicOp(U->getOpcode())) {
12023       for (unsigned i = 0, ie = U->getNumOperands(); i != ie; ++i) {
12024         if (U->getOperand(i).getOpcode() != ISD::UADDO &&
12025             U->getOperand(i).getOpcode() != ISD::MERGE_VALUES)
12026           return SDValue();
12027       }
12028     }
12029   }
12030   SDValue LHS = Op.getOperand(0);
12031   SDValue RHS = Op.getOperand(1);
12032   SDLoc dl(Op);
12033 
12034   // Default to target independent lowering for special cases handled there.
12035   if (isOneConstant(RHS) || isAllOnesConstant(RHS))
12036     return SDValue();
12037 
12038   EVT VT = Op.getNode()->getValueType(0);
12039 
12040   SDValue ADDC;
12041   SDValue Overflow;
12042   SDVTList VTs = Op.getNode()->getVTList();
12043 
12044   ADDC = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), LHS, RHS);
12045   Overflow = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(VT, MVT::Glue),
12046                          DAG.getConstant(0, dl, VT), DAG.getConstant(0, dl, VT),
12047                          ADDC.getValue(1));
12048   SDValue OverflowTrunc =
12049       DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12050   SDValue Res =
12051       DAG.getNode(ISD::MERGE_VALUES, dl, VTs, ADDC.getValue(0), OverflowTrunc);
12052   return Res;
12053 }
12054 
12055 SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12056 
12057   SDLoc dl(Op);
12058   SDValue LHS = Op.getOperand(0);
12059   SDValue RHS = Op.getOperand(1);
12060   EVT VT = Op.getNode()->getValueType(0);
12061 
12062   SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12063 
12064   SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12065   SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12066 
12067   SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12068 
12069   SDValue Overflow =
12070       DAG.getNode(ISD::SRL, dl, VT, And,
12071                   DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12072 
12073   SDValue OverflowTrunc =
12074       DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12075 
12076   return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12077 }
12078 
12079 /// LowerOperation - Provide custom lowering hooks for some operations.
12080 ///
12081 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
12082   switch (Op.getOpcode()) {
12083   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
12084   case ISD::UADDO:              return LowerUaddo(Op, DAG);
12085   case ISD::FPOW:               return lowerPow(Op, DAG);
12086   case ISD::FSIN:               return lowerSin(Op, DAG);
12087   case ISD::FCOS:               return lowerCos(Op, DAG);
12088   case ISD::FLOG:               return lowerLog(Op, DAG);
12089   case ISD::FLOG10:             return lowerLog10(Op, DAG);
12090   case ISD::FEXP:               return lowerExp(Op, DAG);
12091   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
12092   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
12093   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
12094   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
12095   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
12096   case ISD::STRICT_FSETCC:
12097   case ISD::STRICT_FSETCCS:
12098   case ISD::SETCC:              return LowerSETCC(Op, DAG);
12099   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
12100   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
12101   case ISD::SSUBO:
12102     return LowerSSUBO(Op, DAG);
12103 
12104   case ISD::INLINEASM:
12105   case ISD::INLINEASM_BR:       return LowerINLINEASM(Op, DAG);
12106   // Variable argument lowering.
12107   case ISD::VASTART:            return LowerVASTART(Op, DAG);
12108   case ISD::VAARG:              return LowerVAARG(Op, DAG);
12109   case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
12110 
12111   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG);
12112   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12113   case ISD::GET_DYNAMIC_AREA_OFFSET:
12114     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12115 
12116   // Exception handling lowering.
12117   case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
12118   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
12119   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
12120 
12121   case ISD::LOAD:               return LowerLOAD(Op, DAG);
12122   case ISD::STORE:              return LowerSTORE(Op, DAG);
12123   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
12124   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
12125   case ISD::STRICT_FP_TO_UINT:
12126   case ISD::STRICT_FP_TO_SINT:
12127   case ISD::FP_TO_UINT:
12128   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12129   case ISD::STRICT_UINT_TO_FP:
12130   case ISD::STRICT_SINT_TO_FP:
12131   case ISD::UINT_TO_FP:
12132   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
12133   case ISD::GET_ROUNDING:       return LowerGET_ROUNDING(Op, DAG);
12134   case ISD::SET_ROUNDING:
12135     return LowerSET_ROUNDING(Op, DAG);
12136 
12137   // Lower 64-bit shifts.
12138   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
12139   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
12140   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
12141 
12142   case ISD::FSHL:               return LowerFunnelShift(Op, DAG);
12143   case ISD::FSHR:               return LowerFunnelShift(Op, DAG);
12144 
12145   // Vector-related lowering.
12146   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
12147   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
12148   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12149   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
12150   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
12151   case ISD::MUL:                return LowerMUL(Op, DAG);
12152   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
12153   case ISD::STRICT_FP_ROUND:
12154   case ISD::FP_ROUND:
12155     return LowerFP_ROUND(Op, DAG);
12156   case ISD::ROTL:               return LowerROTL(Op, DAG);
12157 
12158   // For counter-based loop handling.
12159   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
12160 
12161   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
12162 
12163   // Frame & Return address.
12164   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
12165   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
12166 
12167   case ISD::INTRINSIC_VOID:
12168     return LowerINTRINSIC_VOID(Op, DAG);
12169   case ISD::BSWAP:
12170     return LowerBSWAP(Op, DAG);
12171   case ISD::ATOMIC_CMP_SWAP:
12172     return LowerATOMIC_CMP_SWAP(Op, DAG);
12173   case ISD::ATOMIC_STORE:
12174     return LowerATOMIC_LOAD_STORE(Op, DAG);
12175   case ISD::IS_FPCLASS:
12176     return LowerIS_FPCLASS(Op, DAG);
12177   }
12178 }
12179 
12180 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
12181                                            SmallVectorImpl<SDValue>&Results,
12182                                            SelectionDAG &DAG) const {
12183   SDLoc dl(N);
12184   switch (N->getOpcode()) {
12185   default:
12186     llvm_unreachable("Do not know how to custom type legalize this operation!");
12187   case ISD::ATOMIC_LOAD: {
12188     SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12189     Results.push_back(Res);
12190     Results.push_back(Res.getValue(1));
12191     break;
12192   }
12193   case ISD::READCYCLECOUNTER: {
12194     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12195     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12196 
12197     Results.push_back(
12198         DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12199     Results.push_back(RTB.getValue(2));
12200     break;
12201   }
12202   case ISD::INTRINSIC_W_CHAIN: {
12203     if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12204       break;
12205 
12206     assert(N->getValueType(0) == MVT::i1 &&
12207            "Unexpected result type for CTR decrement intrinsic");
12208     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12209                                  N->getValueType(0));
12210     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12211     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12212                                  N->getOperand(1));
12213 
12214     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12215     Results.push_back(NewInt.getValue(1));
12216     break;
12217   }
12218   case ISD::INTRINSIC_WO_CHAIN: {
12219     switch (N->getConstantOperandVal(0)) {
12220     case Intrinsic::ppc_pack_longdouble:
12221       Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12222                                     N->getOperand(2), N->getOperand(1)));
12223       break;
12224     case Intrinsic::ppc_maxfe:
12225     case Intrinsic::ppc_minfe:
12226     case Intrinsic::ppc_fnmsub:
12227     case Intrinsic::ppc_convert_f128_to_ppcf128:
12228       Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12229       break;
12230     }
12231     break;
12232   }
12233   case ISD::VAARG: {
12234     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12235       return;
12236 
12237     EVT VT = N->getValueType(0);
12238 
12239     if (VT == MVT::i64) {
12240       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12241 
12242       Results.push_back(NewNode);
12243       Results.push_back(NewNode.getValue(1));
12244     }
12245     return;
12246   }
12247   case ISD::STRICT_FP_TO_SINT:
12248   case ISD::STRICT_FP_TO_UINT:
12249   case ISD::FP_TO_SINT:
12250   case ISD::FP_TO_UINT: {
12251     // LowerFP_TO_INT() can only handle f32 and f64.
12252     if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12253         MVT::ppcf128)
12254       return;
12255     SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12256     Results.push_back(LoweredValue);
12257     if (N->isStrictFPOpcode())
12258       Results.push_back(LoweredValue.getValue(1));
12259     return;
12260   }
12261   case ISD::TRUNCATE: {
12262     if (!N->getValueType(0).isVector())
12263       return;
12264     SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12265     if (Lowered)
12266       Results.push_back(Lowered);
12267     return;
12268   }
12269   case ISD::SCALAR_TO_VECTOR: {
12270     SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12271     if (Lowered)
12272       Results.push_back(Lowered);
12273     return;
12274   }
12275   case ISD::FSHL:
12276   case ISD::FSHR:
12277     // Don't handle funnel shifts here.
12278     return;
12279   case ISD::BITCAST:
12280     // Don't handle bitcast here.
12281     return;
12282   case ISD::FP_EXTEND:
12283     SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12284     if (Lowered)
12285       Results.push_back(Lowered);
12286     return;
12287   }
12288 }
12289 
12290 //===----------------------------------------------------------------------===//
12291 //  Other Lowering Code
12292 //===----------------------------------------------------------------------===//
12293 
12294 static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
12295   return Builder.CreateIntrinsic(Id, {}, {});
12296 }
12297 
12298 // The mappings for emitLeading/TrailingFence is taken from
12299 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12300 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
12301                                                  Instruction *Inst,
12302                                                  AtomicOrdering Ord) const {
12303   if (Ord == AtomicOrdering::SequentiallyConsistent)
12304     return callIntrinsic(Builder, Intrinsic::ppc_sync);
12305   if (isReleaseOrStronger(Ord))
12306     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12307   return nullptr;
12308 }
12309 
12310 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
12311                                                   Instruction *Inst,
12312                                                   AtomicOrdering Ord) const {
12313   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12314     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12315     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12316     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12317     if (isa<LoadInst>(Inst))
12318       return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
12319                                      {Inst});
12320     // FIXME: Can use isync for rmw operation.
12321     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12322   }
12323   return nullptr;
12324 }
12325 
12326 MachineBasicBlock *
12327 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
12328                                     unsigned AtomicSize,
12329                                     unsigned BinOpcode,
12330                                     unsigned CmpOpcode,
12331                                     unsigned CmpPred) const {
12332   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12333   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12334 
12335   auto LoadMnemonic = PPC::LDARX;
12336   auto StoreMnemonic = PPC::STDCX;
12337   switch (AtomicSize) {
12338   default:
12339     llvm_unreachable("Unexpected size of atomic entity");
12340   case 1:
12341     LoadMnemonic = PPC::LBARX;
12342     StoreMnemonic = PPC::STBCX;
12343     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12344     break;
12345   case 2:
12346     LoadMnemonic = PPC::LHARX;
12347     StoreMnemonic = PPC::STHCX;
12348     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12349     break;
12350   case 4:
12351     LoadMnemonic = PPC::LWARX;
12352     StoreMnemonic = PPC::STWCX;
12353     break;
12354   case 8:
12355     LoadMnemonic = PPC::LDARX;
12356     StoreMnemonic = PPC::STDCX;
12357     break;
12358   }
12359 
12360   const BasicBlock *LLVM_BB = BB->getBasicBlock();
12361   MachineFunction *F = BB->getParent();
12362   MachineFunction::iterator It = ++BB->getIterator();
12363 
12364   Register dest = MI.getOperand(0).getReg();
12365   Register ptrA = MI.getOperand(1).getReg();
12366   Register ptrB = MI.getOperand(2).getReg();
12367   Register incr = MI.getOperand(3).getReg();
12368   DebugLoc dl = MI.getDebugLoc();
12369 
12370   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12371   MachineBasicBlock *loop2MBB =
12372     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12373   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12374   F->insert(It, loopMBB);
12375   if (CmpOpcode)
12376     F->insert(It, loop2MBB);
12377   F->insert(It, exitMBB);
12378   exitMBB->splice(exitMBB->begin(), BB,
12379                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
12380   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12381 
12382   MachineRegisterInfo &RegInfo = F->getRegInfo();
12383   Register TmpReg = (!BinOpcode) ? incr :
12384     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12385                                            : &PPC::GPRCRegClass);
12386 
12387   //  thisMBB:
12388   //   ...
12389   //   fallthrough --> loopMBB
12390   BB->addSuccessor(loopMBB);
12391 
12392   //  loopMBB:
12393   //   l[wd]arx dest, ptr
12394   //   add r0, dest, incr
12395   //   st[wd]cx. r0, ptr
12396   //   bne- loopMBB
12397   //   fallthrough --> exitMBB
12398 
12399   // For max/min...
12400   //  loopMBB:
12401   //   l[wd]arx dest, ptr
12402   //   cmpl?[wd] dest, incr
12403   //   bgt exitMBB
12404   //  loop2MBB:
12405   //   st[wd]cx. dest, ptr
12406   //   bne- loopMBB
12407   //   fallthrough --> exitMBB
12408 
12409   BB = loopMBB;
12410   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12411     .addReg(ptrA).addReg(ptrB);
12412   if (BinOpcode)
12413     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12414   if (CmpOpcode) {
12415     Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12416     // Signed comparisons of byte or halfword values must be sign-extended.
12417     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12418       Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12419       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12420               ExtReg).addReg(dest);
12421       BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12422     } else
12423       BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12424 
12425     BuildMI(BB, dl, TII->get(PPC::BCC))
12426         .addImm(CmpPred)
12427         .addReg(CrReg)
12428         .addMBB(exitMBB);
12429     BB->addSuccessor(loop2MBB);
12430     BB->addSuccessor(exitMBB);
12431     BB = loop2MBB;
12432   }
12433   BuildMI(BB, dl, TII->get(StoreMnemonic))
12434     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12435   BuildMI(BB, dl, TII->get(PPC::BCC))
12436     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12437   BB->addSuccessor(loopMBB);
12438   BB->addSuccessor(exitMBB);
12439 
12440   //  exitMBB:
12441   //   ...
12442   BB = exitMBB;
12443   return BB;
12444 }
12445 
12446 static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
12447   switch(MI.getOpcode()) {
12448   default:
12449     return false;
12450   case PPC::COPY:
12451     return TII->isSignExtended(MI.getOperand(1).getReg(),
12452                                &MI.getMF()->getRegInfo());
12453   case PPC::LHA:
12454   case PPC::LHA8:
12455   case PPC::LHAU:
12456   case PPC::LHAU8:
12457   case PPC::LHAUX:
12458   case PPC::LHAUX8:
12459   case PPC::LHAX:
12460   case PPC::LHAX8:
12461   case PPC::LWA:
12462   case PPC::LWAUX:
12463   case PPC::LWAX:
12464   case PPC::LWAX_32:
12465   case PPC::LWA_32:
12466   case PPC::PLHA:
12467   case PPC::PLHA8:
12468   case PPC::PLHA8pc:
12469   case PPC::PLHApc:
12470   case PPC::PLWA:
12471   case PPC::PLWA8:
12472   case PPC::PLWA8pc:
12473   case PPC::PLWApc:
12474   case PPC::EXTSB:
12475   case PPC::EXTSB8:
12476   case PPC::EXTSB8_32_64:
12477   case PPC::EXTSB8_rec:
12478   case PPC::EXTSB_rec:
12479   case PPC::EXTSH:
12480   case PPC::EXTSH8:
12481   case PPC::EXTSH8_32_64:
12482   case PPC::EXTSH8_rec:
12483   case PPC::EXTSH_rec:
12484   case PPC::EXTSW:
12485   case PPC::EXTSWSLI:
12486   case PPC::EXTSWSLI_32_64:
12487   case PPC::EXTSWSLI_32_64_rec:
12488   case PPC::EXTSWSLI_rec:
12489   case PPC::EXTSW_32:
12490   case PPC::EXTSW_32_64:
12491   case PPC::EXTSW_32_64_rec:
12492   case PPC::EXTSW_rec:
12493   case PPC::SRAW:
12494   case PPC::SRAWI:
12495   case PPC::SRAWI_rec:
12496   case PPC::SRAW_rec:
12497     return true;
12498   }
12499   return false;
12500 }
12501 
12502 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
12503     MachineInstr &MI, MachineBasicBlock *BB,
12504     bool is8bit, // operation
12505     unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12506   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12507   const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12508 
12509   // If this is a signed comparison and the value being compared is not known
12510   // to be sign extended, sign extend it here.
12511   DebugLoc dl = MI.getDebugLoc();
12512   MachineFunction *F = BB->getParent();
12513   MachineRegisterInfo &RegInfo = F->getRegInfo();
12514   Register incr = MI.getOperand(3).getReg();
12515   bool IsSignExtended =
12516       incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12517 
12518   if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12519     Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12520     BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12521         .addReg(MI.getOperand(3).getReg());
12522     MI.getOperand(3).setReg(ValueReg);
12523     incr = ValueReg;
12524   }
12525   // If we support part-word atomic mnemonics, just use them
12526   if (Subtarget.hasPartwordAtomics())
12527     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12528                             CmpPred);
12529 
12530   // In 64 bit mode we have to use 64 bits for addresses, even though the
12531   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
12532   // registers without caring whether they're 32 or 64, but here we're
12533   // doing actual arithmetic on the addresses.
12534   bool is64bit = Subtarget.isPPC64();
12535   bool isLittleEndian = Subtarget.isLittleEndian();
12536   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12537 
12538   const BasicBlock *LLVM_BB = BB->getBasicBlock();
12539   MachineFunction::iterator It = ++BB->getIterator();
12540 
12541   Register dest = MI.getOperand(0).getReg();
12542   Register ptrA = MI.getOperand(1).getReg();
12543   Register ptrB = MI.getOperand(2).getReg();
12544 
12545   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12546   MachineBasicBlock *loop2MBB =
12547       CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12548   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12549   F->insert(It, loopMBB);
12550   if (CmpOpcode)
12551     F->insert(It, loop2MBB);
12552   F->insert(It, exitMBB);
12553   exitMBB->splice(exitMBB->begin(), BB,
12554                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
12555   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12556 
12557   const TargetRegisterClass *RC =
12558       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12559   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12560 
12561   Register PtrReg = RegInfo.createVirtualRegister(RC);
12562   Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12563   Register ShiftReg =
12564       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12565   Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12566   Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12567   Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12568   Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12569   Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12570   Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12571   Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12572   Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12573   Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12574   Register Ptr1Reg;
12575   Register TmpReg =
12576       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12577 
12578   //  thisMBB:
12579   //   ...
12580   //   fallthrough --> loopMBB
12581   BB->addSuccessor(loopMBB);
12582 
12583   // The 4-byte load must be aligned, while a char or short may be
12584   // anywhere in the word.  Hence all this nasty bookkeeping code.
12585   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
12586   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12587   //   xori shift, shift1, 24 [16]
12588   //   rlwinm ptr, ptr1, 0, 0, 29
12589   //   slw incr2, incr, shift
12590   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12591   //   slw mask, mask2, shift
12592   //  loopMBB:
12593   //   lwarx tmpDest, ptr
12594   //   add tmp, tmpDest, incr2
12595   //   andc tmp2, tmpDest, mask
12596   //   and tmp3, tmp, mask
12597   //   or tmp4, tmp3, tmp2
12598   //   stwcx. tmp4, ptr
12599   //   bne- loopMBB
12600   //   fallthrough --> exitMBB
12601   //   srw SrwDest, tmpDest, shift
12602   //   rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12603   if (ptrA != ZeroReg) {
12604     Ptr1Reg = RegInfo.createVirtualRegister(RC);
12605     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12606         .addReg(ptrA)
12607         .addReg(ptrB);
12608   } else {
12609     Ptr1Reg = ptrB;
12610   }
12611   // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12612   // mode.
12613   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12614       .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12615       .addImm(3)
12616       .addImm(27)
12617       .addImm(is8bit ? 28 : 27);
12618   if (!isLittleEndian)
12619     BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12620         .addReg(Shift1Reg)
12621         .addImm(is8bit ? 24 : 16);
12622   if (is64bit)
12623     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12624         .addReg(Ptr1Reg)
12625         .addImm(0)
12626         .addImm(61);
12627   else
12628     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12629         .addReg(Ptr1Reg)
12630         .addImm(0)
12631         .addImm(0)
12632         .addImm(29);
12633   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12634   if (is8bit)
12635     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12636   else {
12637     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12638     BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12639         .addReg(Mask3Reg)
12640         .addImm(65535);
12641   }
12642   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12643       .addReg(Mask2Reg)
12644       .addReg(ShiftReg);
12645 
12646   BB = loopMBB;
12647   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12648       .addReg(ZeroReg)
12649       .addReg(PtrReg);
12650   if (BinOpcode)
12651     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12652         .addReg(Incr2Reg)
12653         .addReg(TmpDestReg);
12654   BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12655       .addReg(TmpDestReg)
12656       .addReg(MaskReg);
12657   BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12658   if (CmpOpcode) {
12659     // For unsigned comparisons, we can directly compare the shifted values.
12660     // For signed comparisons we shift and sign extend.
12661     Register SReg = RegInfo.createVirtualRegister(GPRC);
12662     Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12663     BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12664         .addReg(TmpDestReg)
12665         .addReg(MaskReg);
12666     unsigned ValueReg = SReg;
12667     unsigned CmpReg = Incr2Reg;
12668     if (CmpOpcode == PPC::CMPW) {
12669       ValueReg = RegInfo.createVirtualRegister(GPRC);
12670       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12671           .addReg(SReg)
12672           .addReg(ShiftReg);
12673       Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12674       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12675           .addReg(ValueReg);
12676       ValueReg = ValueSReg;
12677       CmpReg = incr;
12678     }
12679     BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12680     BuildMI(BB, dl, TII->get(PPC::BCC))
12681         .addImm(CmpPred)
12682         .addReg(CrReg)
12683         .addMBB(exitMBB);
12684     BB->addSuccessor(loop2MBB);
12685     BB->addSuccessor(exitMBB);
12686     BB = loop2MBB;
12687   }
12688   BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12689   BuildMI(BB, dl, TII->get(PPC::STWCX))
12690       .addReg(Tmp4Reg)
12691       .addReg(ZeroReg)
12692       .addReg(PtrReg);
12693   BuildMI(BB, dl, TII->get(PPC::BCC))
12694       .addImm(PPC::PRED_NE)
12695       .addReg(PPC::CR0)
12696       .addMBB(loopMBB);
12697   BB->addSuccessor(loopMBB);
12698   BB->addSuccessor(exitMBB);
12699 
12700   //  exitMBB:
12701   //   ...
12702   BB = exitMBB;
12703   // Since the shift amount is not a constant, we need to clear
12704   // the upper bits with a separate RLWINM.
12705   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12706       .addReg(SrwDestReg)
12707       .addImm(0)
12708       .addImm(is8bit ? 24 : 16)
12709       .addImm(31);
12710   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12711       .addReg(TmpDestReg)
12712       .addReg(ShiftReg);
12713   return BB;
12714 }
12715 
12716 llvm::MachineBasicBlock *
12717 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
12718                                     MachineBasicBlock *MBB) const {
12719   DebugLoc DL = MI.getDebugLoc();
12720   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12721   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12722 
12723   MachineFunction *MF = MBB->getParent();
12724   MachineRegisterInfo &MRI = MF->getRegInfo();
12725 
12726   const BasicBlock *BB = MBB->getBasicBlock();
12727   MachineFunction::iterator I = ++MBB->getIterator();
12728 
12729   Register DstReg = MI.getOperand(0).getReg();
12730   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12731   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12732   Register mainDstReg = MRI.createVirtualRegister(RC);
12733   Register restoreDstReg = MRI.createVirtualRegister(RC);
12734 
12735   MVT PVT = getPointerTy(MF->getDataLayout());
12736   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12737          "Invalid Pointer Size!");
12738   // For v = setjmp(buf), we generate
12739   //
12740   // thisMBB:
12741   //  SjLjSetup mainMBB
12742   //  bl mainMBB
12743   //  v_restore = 1
12744   //  b sinkMBB
12745   //
12746   // mainMBB:
12747   //  buf[LabelOffset] = LR
12748   //  v_main = 0
12749   //
12750   // sinkMBB:
12751   //  v = phi(main, restore)
12752   //
12753 
12754   MachineBasicBlock *thisMBB = MBB;
12755   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12756   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12757   MF->insert(I, mainMBB);
12758   MF->insert(I, sinkMBB);
12759 
12760   MachineInstrBuilder MIB;
12761 
12762   // Transfer the remainder of BB and its successor edges to sinkMBB.
12763   sinkMBB->splice(sinkMBB->begin(), MBB,
12764                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12765   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
12766 
12767   // Note that the structure of the jmp_buf used here is not compatible
12768   // with that used by libc, and is not designed to be. Specifically, it
12769   // stores only those 'reserved' registers that LLVM does not otherwise
12770   // understand how to spill. Also, by convention, by the time this
12771   // intrinsic is called, Clang has already stored the frame address in the
12772   // first slot of the buffer and stack address in the third. Following the
12773   // X86 target code, we'll store the jump address in the second slot. We also
12774   // need to save the TOC pointer (R2) to handle jumps between shared
12775   // libraries, and that will be stored in the fourth slot. The thread
12776   // identifier (R13) is not affected.
12777 
12778   // thisMBB:
12779   const int64_t LabelOffset = 1 * PVT.getStoreSize();
12780   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
12781   const int64_t BPOffset    = 4 * PVT.getStoreSize();
12782 
12783   // Prepare IP either in reg.
12784   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12785   Register LabelReg = MRI.createVirtualRegister(PtrRC);
12786   Register BufReg = MI.getOperand(1).getReg();
12787 
12788   if (Subtarget.is64BitELFABI()) {
12789     setUsesTOCBasePtr(*MBB->getParent());
12790     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12791               .addReg(PPC::X2)
12792               .addImm(TOCOffset)
12793               .addReg(BufReg)
12794               .cloneMemRefs(MI);
12795   }
12796 
12797   // Naked functions never have a base pointer, and so we use r1. For all
12798   // other functions, this decision must be delayed until during PEI.
12799   unsigned BaseReg;
12800   if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12801     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12802   else
12803     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12804 
12805   MIB = BuildMI(*thisMBB, MI, DL,
12806                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12807             .addReg(BaseReg)
12808             .addImm(BPOffset)
12809             .addReg(BufReg)
12810             .cloneMemRefs(MI);
12811 
12812   // Setup
12813   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12814   MIB.addRegMask(TRI->getNoPreservedMask());
12815 
12816   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12817 
12818   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12819           .addMBB(mainMBB);
12820   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12821 
12822   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12823   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12824 
12825   // mainMBB:
12826   //  mainDstReg = 0
12827   MIB =
12828       BuildMI(mainMBB, DL,
12829               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12830 
12831   // Store IP
12832   if (Subtarget.isPPC64()) {
12833     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12834             .addReg(LabelReg)
12835             .addImm(LabelOffset)
12836             .addReg(BufReg);
12837   } else {
12838     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12839             .addReg(LabelReg)
12840             .addImm(LabelOffset)
12841             .addReg(BufReg);
12842   }
12843   MIB.cloneMemRefs(MI);
12844 
12845   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12846   mainMBB->addSuccessor(sinkMBB);
12847 
12848   // sinkMBB:
12849   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12850           TII->get(PPC::PHI), DstReg)
12851     .addReg(mainDstReg).addMBB(mainMBB)
12852     .addReg(restoreDstReg).addMBB(thisMBB);
12853 
12854   MI.eraseFromParent();
12855   return sinkMBB;
12856 }
12857 
12858 MachineBasicBlock *
12859 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
12860                                      MachineBasicBlock *MBB) const {
12861   DebugLoc DL = MI.getDebugLoc();
12862   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12863 
12864   MachineFunction *MF = MBB->getParent();
12865   MachineRegisterInfo &MRI = MF->getRegInfo();
12866 
12867   MVT PVT = getPointerTy(MF->getDataLayout());
12868   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12869          "Invalid Pointer Size!");
12870 
12871   const TargetRegisterClass *RC =
12872     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12873   Register Tmp = MRI.createVirtualRegister(RC);
12874   // Since FP is only updated here but NOT referenced, it's treated as GPR.
12875   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12876   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12877   unsigned BP =
12878       (PVT == MVT::i64)
12879           ? PPC::X30
12880           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12881                                                               : PPC::R30);
12882 
12883   MachineInstrBuilder MIB;
12884 
12885   const int64_t LabelOffset = 1 * PVT.getStoreSize();
12886   const int64_t SPOffset    = 2 * PVT.getStoreSize();
12887   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
12888   const int64_t BPOffset    = 4 * PVT.getStoreSize();
12889 
12890   Register BufReg = MI.getOperand(0).getReg();
12891 
12892   // Reload FP (the jumped-to function may not have had a
12893   // frame pointer, and if so, then its r31 will be restored
12894   // as necessary).
12895   if (PVT == MVT::i64) {
12896     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12897             .addImm(0)
12898             .addReg(BufReg);
12899   } else {
12900     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12901             .addImm(0)
12902             .addReg(BufReg);
12903   }
12904   MIB.cloneMemRefs(MI);
12905 
12906   // Reload IP
12907   if (PVT == MVT::i64) {
12908     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12909             .addImm(LabelOffset)
12910             .addReg(BufReg);
12911   } else {
12912     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12913             .addImm(LabelOffset)
12914             .addReg(BufReg);
12915   }
12916   MIB.cloneMemRefs(MI);
12917 
12918   // Reload SP
12919   if (PVT == MVT::i64) {
12920     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12921             .addImm(SPOffset)
12922             .addReg(BufReg);
12923   } else {
12924     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12925             .addImm(SPOffset)
12926             .addReg(BufReg);
12927   }
12928   MIB.cloneMemRefs(MI);
12929 
12930   // Reload BP
12931   if (PVT == MVT::i64) {
12932     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12933             .addImm(BPOffset)
12934             .addReg(BufReg);
12935   } else {
12936     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12937             .addImm(BPOffset)
12938             .addReg(BufReg);
12939   }
12940   MIB.cloneMemRefs(MI);
12941 
12942   // Reload TOC
12943   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12944     setUsesTOCBasePtr(*MBB->getParent());
12945     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12946               .addImm(TOCOffset)
12947               .addReg(BufReg)
12948               .cloneMemRefs(MI);
12949   }
12950 
12951   // Jump
12952   BuildMI(*MBB, MI, DL,
12953           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12954   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12955 
12956   MI.eraseFromParent();
12957   return MBB;
12958 }
12959 
12960 bool PPCTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
12961   // If the function specifically requests inline stack probes, emit them.
12962   if (MF.getFunction().hasFnAttribute("probe-stack"))
12963     return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12964            "inline-asm";
12965   return false;
12966 }
12967 
12968 unsigned PPCTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
12969   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12970   unsigned StackAlign = TFI->getStackAlignment();
12971   assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12972          "Unexpected stack alignment");
12973   // The default stack probe size is 4096 if the function has no
12974   // stack-probe-size attribute.
12975   const Function &Fn = MF.getFunction();
12976   unsigned StackProbeSize =
12977       Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12978   // Round down to the stack alignment.
12979   StackProbeSize &= ~(StackAlign - 1);
12980   return StackProbeSize ? StackProbeSize : StackAlign;
12981 }
12982 
12983 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12984 // into three phases. In the first phase, it uses pseudo instruction
12985 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12986 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12987 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12988 // MaxCallFrameSize so that it can calculate correct data area pointer.
12989 MachineBasicBlock *
12990 PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
12991                                     MachineBasicBlock *MBB) const {
12992   const bool isPPC64 = Subtarget.isPPC64();
12993   MachineFunction *MF = MBB->getParent();
12994   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12995   DebugLoc DL = MI.getDebugLoc();
12996   const unsigned ProbeSize = getStackProbeSize(*MF);
12997   const BasicBlock *ProbedBB = MBB->getBasicBlock();
12998   MachineRegisterInfo &MRI = MF->getRegInfo();
12999   // The CFG of probing stack looks as
13000   //         +-----+
13001   //         | MBB |
13002   //         +--+--+
13003   //            |
13004   //       +----v----+
13005   //  +--->+ TestMBB +---+
13006   //  |    +----+----+   |
13007   //  |         |        |
13008   //  |   +-----v----+   |
13009   //  +---+ BlockMBB |   |
13010   //      +----------+   |
13011   //                     |
13012   //       +---------+   |
13013   //       | TailMBB +<--+
13014   //       +---------+
13015   // In MBB, calculate previous frame pointer and final stack pointer.
13016   // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13017   // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13018   // TailMBB is spliced via \p MI.
13019   MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13020   MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13021   MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13022 
13023   MachineFunction::iterator MBBIter = ++MBB->getIterator();
13024   MF->insert(MBBIter, TestMBB);
13025   MF->insert(MBBIter, BlockMBB);
13026   MF->insert(MBBIter, TailMBB);
13027 
13028   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13029   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13030 
13031   Register DstReg = MI.getOperand(0).getReg();
13032   Register NegSizeReg = MI.getOperand(1).getReg();
13033   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13034   Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13035   Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13036   Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13037 
13038   // Since value of NegSizeReg might be realigned in prologepilog, insert a
13039   // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13040   // NegSize.
13041   unsigned ProbeOpc;
13042   if (!MRI.hasOneNonDBGUse(NegSizeReg))
13043     ProbeOpc =
13044         isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13045   else
13046     // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13047     // and NegSizeReg will be allocated in the same phyreg to avoid
13048     // redundant copy when NegSizeReg has only one use which is current MI and
13049     // will be replaced by PREPARE_PROBED_ALLOCA then.
13050     ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13051                        : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13052   BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13053       .addDef(ActualNegSizeReg)
13054       .addReg(NegSizeReg)
13055       .add(MI.getOperand(2))
13056       .add(MI.getOperand(3));
13057 
13058   // Calculate final stack pointer, which equals to SP + ActualNegSize.
13059   BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13060           FinalStackPtr)
13061       .addReg(SPReg)
13062       .addReg(ActualNegSizeReg);
13063 
13064   // Materialize a scratch register for update.
13065   int64_t NegProbeSize = -(int64_t)ProbeSize;
13066   assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13067   Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13068   if (!isInt<16>(NegProbeSize)) {
13069     Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13070     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13071         .addImm(NegProbeSize >> 16);
13072     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13073             ScratchReg)
13074         .addReg(TempReg)
13075         .addImm(NegProbeSize & 0xFFFF);
13076   } else
13077     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13078         .addImm(NegProbeSize);
13079 
13080   {
13081     // Probing leading residual part.
13082     Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13083     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13084         .addReg(ActualNegSizeReg)
13085         .addReg(ScratchReg);
13086     Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13087     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13088         .addReg(Div)
13089         .addReg(ScratchReg);
13090     Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13091     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13092         .addReg(Mul)
13093         .addReg(ActualNegSizeReg);
13094     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13095         .addReg(FramePointer)
13096         .addReg(SPReg)
13097         .addReg(NegMod);
13098   }
13099 
13100   {
13101     // Remaining part should be multiple of ProbeSize.
13102     Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13103     BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13104         .addReg(SPReg)
13105         .addReg(FinalStackPtr);
13106     BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13107         .addImm(PPC::PRED_EQ)
13108         .addReg(CmpResult)
13109         .addMBB(TailMBB);
13110     TestMBB->addSuccessor(BlockMBB);
13111     TestMBB->addSuccessor(TailMBB);
13112   }
13113 
13114   {
13115     // Touch the block.
13116     // |P...|P...|P...
13117     BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13118         .addReg(FramePointer)
13119         .addReg(SPReg)
13120         .addReg(ScratchReg);
13121     BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13122     BlockMBB->addSuccessor(TestMBB);
13123   }
13124 
13125   // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13126   // DYNAREAOFFSET pseudo instruction to get the future result.
13127   Register MaxCallFrameSizeReg =
13128       MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13129   BuildMI(TailMBB, DL,
13130           TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13131           MaxCallFrameSizeReg)
13132       .add(MI.getOperand(2))
13133       .add(MI.getOperand(3));
13134   BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13135       .addReg(SPReg)
13136       .addReg(MaxCallFrameSizeReg);
13137 
13138   // Splice instructions after MI to TailMBB.
13139   TailMBB->splice(TailMBB->end(), MBB,
13140                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13141   TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
13142   MBB->addSuccessor(TestMBB);
13143 
13144   // Delete the pseudo instruction.
13145   MI.eraseFromParent();
13146 
13147   ++NumDynamicAllocaProbed;
13148   return TailMBB;
13149 }
13150 
13151 static bool IsSelectCC(MachineInstr &MI) {
13152   switch (MI.getOpcode()) {
13153   case PPC::SELECT_CC_I4:
13154   case PPC::SELECT_CC_I8:
13155   case PPC::SELECT_CC_F4:
13156   case PPC::SELECT_CC_F8:
13157   case PPC::SELECT_CC_F16:
13158   case PPC::SELECT_CC_VRRC:
13159   case PPC::SELECT_CC_VSFRC:
13160   case PPC::SELECT_CC_VSSRC:
13161   case PPC::SELECT_CC_VSRC:
13162   case PPC::SELECT_CC_SPE4:
13163   case PPC::SELECT_CC_SPE:
13164     return true;
13165   default:
13166     return false;
13167   }
13168 }
13169 
13170 static bool IsSelect(MachineInstr &MI) {
13171   switch (MI.getOpcode()) {
13172   case PPC::SELECT_I4:
13173   case PPC::SELECT_I8:
13174   case PPC::SELECT_F4:
13175   case PPC::SELECT_F8:
13176   case PPC::SELECT_F16:
13177   case PPC::SELECT_SPE:
13178   case PPC::SELECT_SPE4:
13179   case PPC::SELECT_VRRC:
13180   case PPC::SELECT_VSFRC:
13181   case PPC::SELECT_VSSRC:
13182   case PPC::SELECT_VSRC:
13183     return true;
13184   default:
13185     return false;
13186   }
13187 }
13188 
13189 MachineBasicBlock *
13190 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
13191                                                MachineBasicBlock *BB) const {
13192   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13193       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13194     if (Subtarget.is64BitELFABI() &&
13195         MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13196         !Subtarget.isUsingPCRelativeCalls()) {
13197       // Call lowering should have added an r2 operand to indicate a dependence
13198       // on the TOC base pointer value. It can't however, because there is no
13199       // way to mark the dependence as implicit there, and so the stackmap code
13200       // will confuse it with a regular operand. Instead, add the dependence
13201       // here.
13202       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13203     }
13204 
13205     return emitPatchPoint(MI, BB);
13206   }
13207 
13208   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13209       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13210     return emitEHSjLjSetJmp(MI, BB);
13211   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13212              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13213     return emitEHSjLjLongJmp(MI, BB);
13214   }
13215 
13216   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13217 
13218   // To "insert" these instructions we actually have to insert their
13219   // control-flow patterns.
13220   const BasicBlock *LLVM_BB = BB->getBasicBlock();
13221   MachineFunction::iterator It = ++BB->getIterator();
13222 
13223   MachineFunction *F = BB->getParent();
13224   MachineRegisterInfo &MRI = F->getRegInfo();
13225 
13226   if (Subtarget.hasISEL() &&
13227       (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13228        MI.getOpcode() == PPC::SELECT_CC_I8 ||
13229        MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13230     SmallVector<MachineOperand, 2> Cond;
13231     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13232         MI.getOpcode() == PPC::SELECT_CC_I8)
13233       Cond.push_back(MI.getOperand(4));
13234     else
13235       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
13236     Cond.push_back(MI.getOperand(1));
13237 
13238     DebugLoc dl = MI.getDebugLoc();
13239     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13240                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13241   } else if (IsSelectCC(MI) || IsSelect(MI)) {
13242     // The incoming instruction knows the destination vreg to set, the
13243     // condition code register to branch on, the true/false values to
13244     // select between, and a branch opcode to use.
13245 
13246     //  thisMBB:
13247     //  ...
13248     //   TrueVal = ...
13249     //   cmpTY ccX, r1, r2
13250     //   bCC sinkMBB
13251     //   fallthrough --> copy0MBB
13252     MachineBasicBlock *thisMBB = BB;
13253     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13254     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13255     DebugLoc dl = MI.getDebugLoc();
13256     F->insert(It, copy0MBB);
13257     F->insert(It, sinkMBB);
13258 
13259     // Set the call frame size on entry to the new basic blocks.
13260     // See https://reviews.llvm.org/D156113.
13261     unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13262     copy0MBB->setCallFrameSize(CallFrameSize);
13263     sinkMBB->setCallFrameSize(CallFrameSize);
13264 
13265     // Transfer the remainder of BB and its successor edges to sinkMBB.
13266     sinkMBB->splice(sinkMBB->begin(), BB,
13267                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
13268     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
13269 
13270     // Next, add the true and fallthrough blocks as its successors.
13271     BB->addSuccessor(copy0MBB);
13272     BB->addSuccessor(sinkMBB);
13273 
13274     if (IsSelect(MI)) {
13275       BuildMI(BB, dl, TII->get(PPC::BC))
13276           .addReg(MI.getOperand(1).getReg())
13277           .addMBB(sinkMBB);
13278     } else {
13279       unsigned SelectPred = MI.getOperand(4).getImm();
13280       BuildMI(BB, dl, TII->get(PPC::BCC))
13281           .addImm(SelectPred)
13282           .addReg(MI.getOperand(1).getReg())
13283           .addMBB(sinkMBB);
13284     }
13285 
13286     //  copy0MBB:
13287     //   %FalseValue = ...
13288     //   # fallthrough to sinkMBB
13289     BB = copy0MBB;
13290 
13291     // Update machine-CFG edges
13292     BB->addSuccessor(sinkMBB);
13293 
13294     //  sinkMBB:
13295     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13296     //  ...
13297     BB = sinkMBB;
13298     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13299         .addReg(MI.getOperand(3).getReg())
13300         .addMBB(copy0MBB)
13301         .addReg(MI.getOperand(2).getReg())
13302         .addMBB(thisMBB);
13303   } else if (MI.getOpcode() == PPC::ReadTB) {
13304     // To read the 64-bit time-base register on a 32-bit target, we read the
13305     // two halves. Should the counter have wrapped while it was being read, we
13306     // need to try again.
13307     // ...
13308     // readLoop:
13309     // mfspr Rx,TBU # load from TBU
13310     // mfspr Ry,TB  # load from TB
13311     // mfspr Rz,TBU # load from TBU
13312     // cmpw crX,Rx,Rz # check if 'old'='new'
13313     // bne readLoop   # branch if they're not equal
13314     // ...
13315 
13316     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13317     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13318     DebugLoc dl = MI.getDebugLoc();
13319     F->insert(It, readMBB);
13320     F->insert(It, sinkMBB);
13321 
13322     // Transfer the remainder of BB and its successor edges to sinkMBB.
13323     sinkMBB->splice(sinkMBB->begin(), BB,
13324                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
13325     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
13326 
13327     BB->addSuccessor(readMBB);
13328     BB = readMBB;
13329 
13330     MachineRegisterInfo &RegInfo = F->getRegInfo();
13331     Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13332     Register LoReg = MI.getOperand(0).getReg();
13333     Register HiReg = MI.getOperand(1).getReg();
13334 
13335     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13336     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13337     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13338 
13339     Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13340 
13341     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13342         .addReg(HiReg)
13343         .addReg(ReadAgainReg);
13344     BuildMI(BB, dl, TII->get(PPC::BCC))
13345         .addImm(PPC::PRED_NE)
13346         .addReg(CmpReg)
13347         .addMBB(readMBB);
13348 
13349     BB->addSuccessor(readMBB);
13350     BB->addSuccessor(sinkMBB);
13351   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13352     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13353   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13354     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13355   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13356     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13357   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13358     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13359 
13360   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13361     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13362   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13363     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13364   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13365     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13366   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13367     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13368 
13369   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13370     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13371   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13372     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13373   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13374     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13375   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13376     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13377 
13378   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13379     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13380   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13381     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13382   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13383     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13384   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13385     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13386 
13387   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13388     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13389   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13390     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13391   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13392     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13393   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13394     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13395 
13396   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13397     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13398   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13399     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13400   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13401     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13402   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13403     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13404 
13405   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13406     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13407   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13408     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13409   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13410     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13411   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13412     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13413 
13414   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13415     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13416   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13417     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13418   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13419     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13420   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13421     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13422 
13423   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13424     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13425   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13426     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13427   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13428     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13429   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13430     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13431 
13432   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13433     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13434   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13435     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13436   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13437     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13438   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13439     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13440 
13441   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13442     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13443   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13444     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13445   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13446     BB = EmitAtomicBinary(MI, BB, 4, 0);
13447   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13448     BB = EmitAtomicBinary(MI, BB, 8, 0);
13449   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13450            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13451            (Subtarget.hasPartwordAtomics() &&
13452             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13453            (Subtarget.hasPartwordAtomics() &&
13454             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13455     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13456 
13457     auto LoadMnemonic = PPC::LDARX;
13458     auto StoreMnemonic = PPC::STDCX;
13459     switch (MI.getOpcode()) {
13460     default:
13461       llvm_unreachable("Compare and swap of unknown size");
13462     case PPC::ATOMIC_CMP_SWAP_I8:
13463       LoadMnemonic = PPC::LBARX;
13464       StoreMnemonic = PPC::STBCX;
13465       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13466       break;
13467     case PPC::ATOMIC_CMP_SWAP_I16:
13468       LoadMnemonic = PPC::LHARX;
13469       StoreMnemonic = PPC::STHCX;
13470       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13471       break;
13472     case PPC::ATOMIC_CMP_SWAP_I32:
13473       LoadMnemonic = PPC::LWARX;
13474       StoreMnemonic = PPC::STWCX;
13475       break;
13476     case PPC::ATOMIC_CMP_SWAP_I64:
13477       LoadMnemonic = PPC::LDARX;
13478       StoreMnemonic = PPC::STDCX;
13479       break;
13480     }
13481     MachineRegisterInfo &RegInfo = F->getRegInfo();
13482     Register dest = MI.getOperand(0).getReg();
13483     Register ptrA = MI.getOperand(1).getReg();
13484     Register ptrB = MI.getOperand(2).getReg();
13485     Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13486     Register oldval = MI.getOperand(3).getReg();
13487     Register newval = MI.getOperand(4).getReg();
13488     DebugLoc dl = MI.getDebugLoc();
13489 
13490     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13491     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13492     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13493     F->insert(It, loop1MBB);
13494     F->insert(It, loop2MBB);
13495     F->insert(It, exitMBB);
13496     exitMBB->splice(exitMBB->begin(), BB,
13497                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
13498     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
13499 
13500     //  thisMBB:
13501     //   ...
13502     //   fallthrough --> loopMBB
13503     BB->addSuccessor(loop1MBB);
13504 
13505     // loop1MBB:
13506     //   l[bhwd]arx dest, ptr
13507     //   cmp[wd] dest, oldval
13508     //   bne- exitBB
13509     // loop2MBB:
13510     //   st[bhwd]cx. newval, ptr
13511     //   bne- loopMBB
13512     //   b exitBB
13513     // exitBB:
13514     BB = loop1MBB;
13515     BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13516     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13517         .addReg(dest)
13518         .addReg(oldval);
13519     BuildMI(BB, dl, TII->get(PPC::BCC))
13520         .addImm(PPC::PRED_NE)
13521         .addReg(CrReg)
13522         .addMBB(exitMBB);
13523     BB->addSuccessor(loop2MBB);
13524     BB->addSuccessor(exitMBB);
13525 
13526     BB = loop2MBB;
13527     BuildMI(BB, dl, TII->get(StoreMnemonic))
13528         .addReg(newval)
13529         .addReg(ptrA)
13530         .addReg(ptrB);
13531     BuildMI(BB, dl, TII->get(PPC::BCC))
13532         .addImm(PPC::PRED_NE)
13533         .addReg(PPC::CR0)
13534         .addMBB(loop1MBB);
13535     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13536     BB->addSuccessor(loop1MBB);
13537     BB->addSuccessor(exitMBB);
13538 
13539     //  exitMBB:
13540     //   ...
13541     BB = exitMBB;
13542   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13543              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13544     // We must use 64-bit registers for addresses when targeting 64-bit,
13545     // since we're actually doing arithmetic on them.  Other registers
13546     // can be 32-bit.
13547     bool is64bit = Subtarget.isPPC64();
13548     bool isLittleEndian = Subtarget.isLittleEndian();
13549     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13550 
13551     Register dest = MI.getOperand(0).getReg();
13552     Register ptrA = MI.getOperand(1).getReg();
13553     Register ptrB = MI.getOperand(2).getReg();
13554     Register oldval = MI.getOperand(3).getReg();
13555     Register newval = MI.getOperand(4).getReg();
13556     DebugLoc dl = MI.getDebugLoc();
13557 
13558     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13559     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13560     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13561     F->insert(It, loop1MBB);
13562     F->insert(It, loop2MBB);
13563     F->insert(It, exitMBB);
13564     exitMBB->splice(exitMBB->begin(), BB,
13565                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
13566     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
13567 
13568     MachineRegisterInfo &RegInfo = F->getRegInfo();
13569     const TargetRegisterClass *RC =
13570         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13571     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13572 
13573     Register PtrReg = RegInfo.createVirtualRegister(RC);
13574     Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13575     Register ShiftReg =
13576         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13577     Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13578     Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13579     Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13580     Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13581     Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13582     Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13583     Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13584     Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13585     Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13586     Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13587     Register Ptr1Reg;
13588     Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13589     Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13590     Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13591     //  thisMBB:
13592     //   ...
13593     //   fallthrough --> loopMBB
13594     BB->addSuccessor(loop1MBB);
13595 
13596     // The 4-byte load must be aligned, while a char or short may be
13597     // anywhere in the word.  Hence all this nasty bookkeeping code.
13598     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
13599     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13600     //   xori shift, shift1, 24 [16]
13601     //   rlwinm ptr, ptr1, 0, 0, 29
13602     //   slw newval2, newval, shift
13603     //   slw oldval2, oldval,shift
13604     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13605     //   slw mask, mask2, shift
13606     //   and newval3, newval2, mask
13607     //   and oldval3, oldval2, mask
13608     // loop1MBB:
13609     //   lwarx tmpDest, ptr
13610     //   and tmp, tmpDest, mask
13611     //   cmpw tmp, oldval3
13612     //   bne- exitBB
13613     // loop2MBB:
13614     //   andc tmp2, tmpDest, mask
13615     //   or tmp4, tmp2, newval3
13616     //   stwcx. tmp4, ptr
13617     //   bne- loop1MBB
13618     //   b exitBB
13619     // exitBB:
13620     //   srw dest, tmpDest, shift
13621     if (ptrA != ZeroReg) {
13622       Ptr1Reg = RegInfo.createVirtualRegister(RC);
13623       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13624           .addReg(ptrA)
13625           .addReg(ptrB);
13626     } else {
13627       Ptr1Reg = ptrB;
13628     }
13629 
13630     // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13631     // mode.
13632     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13633         .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13634         .addImm(3)
13635         .addImm(27)
13636         .addImm(is8bit ? 28 : 27);
13637     if (!isLittleEndian)
13638       BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13639           .addReg(Shift1Reg)
13640           .addImm(is8bit ? 24 : 16);
13641     if (is64bit)
13642       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13643           .addReg(Ptr1Reg)
13644           .addImm(0)
13645           .addImm(61);
13646     else
13647       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13648           .addReg(Ptr1Reg)
13649           .addImm(0)
13650           .addImm(0)
13651           .addImm(29);
13652     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13653         .addReg(newval)
13654         .addReg(ShiftReg);
13655     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13656         .addReg(oldval)
13657         .addReg(ShiftReg);
13658     if (is8bit)
13659       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13660     else {
13661       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13662       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13663           .addReg(Mask3Reg)
13664           .addImm(65535);
13665     }
13666     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13667         .addReg(Mask2Reg)
13668         .addReg(ShiftReg);
13669     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13670         .addReg(NewVal2Reg)
13671         .addReg(MaskReg);
13672     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13673         .addReg(OldVal2Reg)
13674         .addReg(MaskReg);
13675 
13676     BB = loop1MBB;
13677     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13678         .addReg(ZeroReg)
13679         .addReg(PtrReg);
13680     BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13681         .addReg(TmpDestReg)
13682         .addReg(MaskReg);
13683     BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13684         .addReg(TmpReg)
13685         .addReg(OldVal3Reg);
13686     BuildMI(BB, dl, TII->get(PPC::BCC))
13687         .addImm(PPC::PRED_NE)
13688         .addReg(CrReg)
13689         .addMBB(exitMBB);
13690     BB->addSuccessor(loop2MBB);
13691     BB->addSuccessor(exitMBB);
13692 
13693     BB = loop2MBB;
13694     BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13695         .addReg(TmpDestReg)
13696         .addReg(MaskReg);
13697     BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13698         .addReg(Tmp2Reg)
13699         .addReg(NewVal3Reg);
13700     BuildMI(BB, dl, TII->get(PPC::STWCX))
13701         .addReg(Tmp4Reg)
13702         .addReg(ZeroReg)
13703         .addReg(PtrReg);
13704     BuildMI(BB, dl, TII->get(PPC::BCC))
13705         .addImm(PPC::PRED_NE)
13706         .addReg(PPC::CR0)
13707         .addMBB(loop1MBB);
13708     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13709     BB->addSuccessor(loop1MBB);
13710     BB->addSuccessor(exitMBB);
13711 
13712     //  exitMBB:
13713     //   ...
13714     BB = exitMBB;
13715     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13716         .addReg(TmpReg)
13717         .addReg(ShiftReg);
13718   } else if (MI.getOpcode() == PPC::FADDrtz) {
13719     // This pseudo performs an FADD with rounding mode temporarily forced
13720     // to round-to-zero.  We emit this via custom inserter since the FPSCR
13721     // is not modeled at the SelectionDAG level.
13722     Register Dest = MI.getOperand(0).getReg();
13723     Register Src1 = MI.getOperand(1).getReg();
13724     Register Src2 = MI.getOperand(2).getReg();
13725     DebugLoc dl = MI.getDebugLoc();
13726 
13727     MachineRegisterInfo &RegInfo = F->getRegInfo();
13728     Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13729 
13730     // Save FPSCR value.
13731     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13732 
13733     // Set rounding mode to round-to-zero.
13734     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13735         .addImm(31)
13736         .addReg(PPC::RM, RegState::ImplicitDefine);
13737 
13738     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13739         .addImm(30)
13740         .addReg(PPC::RM, RegState::ImplicitDefine);
13741 
13742     // Perform addition.
13743     auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13744                    .addReg(Src1)
13745                    .addReg(Src2);
13746     if (MI.getFlag(MachineInstr::NoFPExcept))
13747       MIB.setMIFlag(MachineInstr::NoFPExcept);
13748 
13749     // Restore FPSCR value.
13750     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13751   } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13752              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13753              MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13754              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13755     unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13756                        MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13757                           ? PPC::ANDI8_rec
13758                           : PPC::ANDI_rec;
13759     bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13760                  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13761 
13762     MachineRegisterInfo &RegInfo = F->getRegInfo();
13763     Register Dest = RegInfo.createVirtualRegister(
13764         Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13765 
13766     DebugLoc Dl = MI.getDebugLoc();
13767     BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13768         .addReg(MI.getOperand(1).getReg())
13769         .addImm(1);
13770     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13771             MI.getOperand(0).getReg())
13772         .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13773   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13774     DebugLoc Dl = MI.getDebugLoc();
13775     MachineRegisterInfo &RegInfo = F->getRegInfo();
13776     Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13777     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13778     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13779             MI.getOperand(0).getReg())
13780         .addReg(CRReg);
13781   } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13782     DebugLoc Dl = MI.getDebugLoc();
13783     unsigned Imm = MI.getOperand(1).getImm();
13784     BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13785     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13786             MI.getOperand(0).getReg())
13787         .addReg(PPC::CR0EQ);
13788   } else if (MI.getOpcode() == PPC::SETRNDi) {
13789     DebugLoc dl = MI.getDebugLoc();
13790     Register OldFPSCRReg = MI.getOperand(0).getReg();
13791 
13792     // Save FPSCR value.
13793     if (MRI.use_empty(OldFPSCRReg))
13794       BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13795     else
13796       BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13797 
13798     // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13799     // the following settings:
13800     //   00 Round to nearest
13801     //   01 Round to 0
13802     //   10 Round to +inf
13803     //   11 Round to -inf
13804 
13805     // When the operand is immediate, using the two least significant bits of
13806     // the immediate to set the bits 62:63 of FPSCR.
13807     unsigned Mode = MI.getOperand(1).getImm();
13808     BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13809         .addImm(31)
13810         .addReg(PPC::RM, RegState::ImplicitDefine);
13811 
13812     BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13813         .addImm(30)
13814         .addReg(PPC::RM, RegState::ImplicitDefine);
13815   } else if (MI.getOpcode() == PPC::SETRND) {
13816     DebugLoc dl = MI.getDebugLoc();
13817 
13818     // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13819     // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13820     // If the target doesn't have DirectMove, we should use stack to do the
13821     // conversion, because the target doesn't have the instructions like mtvsrd
13822     // or mfvsrd to do this conversion directly.
13823     auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13824       if (Subtarget.hasDirectMove()) {
13825         BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13826           .addReg(SrcReg);
13827       } else {
13828         // Use stack to do the register copy.
13829         unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13830         MachineRegisterInfo &RegInfo = F->getRegInfo();
13831         const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13832         if (RC == &PPC::F8RCRegClass) {
13833           // Copy register from F8RCRegClass to G8RCRegclass.
13834           assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13835                  "Unsupported RegClass.");
13836 
13837           StoreOp = PPC::STFD;
13838           LoadOp = PPC::LD;
13839         } else {
13840           // Copy register from G8RCRegClass to F8RCRegclass.
13841           assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13842                  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13843                  "Unsupported RegClass.");
13844         }
13845 
13846         MachineFrameInfo &MFI = F->getFrameInfo();
13847         int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13848 
13849         MachineMemOperand *MMOStore = F->getMachineMemOperand(
13850             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13851             MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
13852             MFI.getObjectAlign(FrameIdx));
13853 
13854         // Store the SrcReg into the stack.
13855         BuildMI(*BB, MI, dl, TII->get(StoreOp))
13856           .addReg(SrcReg)
13857           .addImm(0)
13858           .addFrameIndex(FrameIdx)
13859           .addMemOperand(MMOStore);
13860 
13861         MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13862             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13863             MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
13864             MFI.getObjectAlign(FrameIdx));
13865 
13866         // Load from the stack where SrcReg is stored, and save to DestReg,
13867         // so we have done the RegClass conversion from RegClass::SrcReg to
13868         // RegClass::DestReg.
13869         BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13870           .addImm(0)
13871           .addFrameIndex(FrameIdx)
13872           .addMemOperand(MMOLoad);
13873       }
13874     };
13875 
13876     Register OldFPSCRReg = MI.getOperand(0).getReg();
13877 
13878     // Save FPSCR value.
13879     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13880 
13881     // When the operand is gprc register, use two least significant bits of the
13882     // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13883     //
13884     // copy OldFPSCRTmpReg, OldFPSCRReg
13885     // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13886     // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13887     // copy NewFPSCRReg, NewFPSCRTmpReg
13888     // mtfsf 255, NewFPSCRReg
13889     MachineOperand SrcOp = MI.getOperand(1);
13890     MachineRegisterInfo &RegInfo = F->getRegInfo();
13891     Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13892 
13893     copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13894 
13895     Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13896     Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13897 
13898     // The first operand of INSERT_SUBREG should be a register which has
13899     // subregisters, we only care about its RegClass, so we should use an
13900     // IMPLICIT_DEF register.
13901     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13902     BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13903       .addReg(ImDefReg)
13904       .add(SrcOp)
13905       .addImm(1);
13906 
13907     Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13908     BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13909       .addReg(OldFPSCRTmpReg)
13910       .addReg(ExtSrcReg)
13911       .addImm(0)
13912       .addImm(62);
13913 
13914     Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13915     copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13916 
13917     // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13918     // bits of FPSCR.
13919     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13920       .addImm(255)
13921       .addReg(NewFPSCRReg)
13922       .addImm(0)
13923       .addImm(0);
13924   } else if (MI.getOpcode() == PPC::SETFLM) {
13925     DebugLoc Dl = MI.getDebugLoc();
13926 
13927     // Result of setflm is previous FPSCR content, so we need to save it first.
13928     Register OldFPSCRReg = MI.getOperand(0).getReg();
13929     if (MRI.use_empty(OldFPSCRReg))
13930       BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13931     else
13932       BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13933 
13934     // Put bits in 32:63 to FPSCR.
13935     Register NewFPSCRReg = MI.getOperand(1).getReg();
13936     BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13937         .addImm(255)
13938         .addReg(NewFPSCRReg)
13939         .addImm(0)
13940         .addImm(0);
13941   } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13942              MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13943     return emitProbedAlloca(MI, BB);
13944   } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13945     DebugLoc DL = MI.getDebugLoc();
13946     Register Src = MI.getOperand(2).getReg();
13947     Register Lo = MI.getOperand(0).getReg();
13948     Register Hi = MI.getOperand(1).getReg();
13949     BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13950         .addDef(Lo)
13951         .addUse(Src, 0, PPC::sub_gp8_x1);
13952     BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13953         .addDef(Hi)
13954         .addUse(Src, 0, PPC::sub_gp8_x0);
13955   } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13956              MI.getOpcode() == PPC::STQX_PSEUDO) {
13957     DebugLoc DL = MI.getDebugLoc();
13958     // Ptr is used as the ptr_rc_no_r0 part
13959     // of LQ/STQ's memory operand and adding result of RA and RB,
13960     // so it has to be g8rc_and_g8rc_nox0.
13961     Register Ptr =
13962         F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13963     Register Val = MI.getOperand(0).getReg();
13964     Register RA = MI.getOperand(1).getReg();
13965     Register RB = MI.getOperand(2).getReg();
13966     BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13967     BuildMI(*BB, MI, DL,
13968             MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13969                                               : TII->get(PPC::STQ))
13970         .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13971         .addImm(0)
13972         .addReg(Ptr);
13973   } else {
13974     llvm_unreachable("Unexpected instr type to insert");
13975   }
13976 
13977   MI.eraseFromParent(); // The pseudo instruction is gone now.
13978   return BB;
13979 }
13980 
13981 //===----------------------------------------------------------------------===//
13982 // Target Optimization Hooks
13983 //===----------------------------------------------------------------------===//
13984 
13985 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13986   // For the estimates, convergence is quadratic, so we essentially double the
13987   // number of digits correct after every iteration. For both FRE and FRSQRTE,
13988   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13989   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13990   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13991   if (VT.getScalarType() == MVT::f64)
13992     RefinementSteps++;
13993   return RefinementSteps;
13994 }
13995 
13996 SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13997                                             const DenormalMode &Mode) const {
13998   // We only have VSX Vector Test for software Square Root.
13999   EVT VT = Op.getValueType();
14000   if (!isTypeLegal(MVT::i1) ||
14001       (VT != MVT::f64 &&
14002        ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14003     return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
14004 
14005   SDLoc DL(Op);
14006   // The output register of FTSQRT is CR field.
14007   SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
14008   // ftsqrt BF,FRB
14009   // Let e_b be the unbiased exponent of the double-precision
14010   // floating-point operand in register FRB.
14011   // fe_flag is set to 1 if either of the following conditions occurs.
14012   //   - The double-precision floating-point operand in register FRB is a zero,
14013   //     a NaN, or an infinity, or a negative value.
14014   //   - e_b is less than or equal to -970.
14015   // Otherwise fe_flag is set to 0.
14016   // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14017   // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14018   // exponent is less than -970)
14019   SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14020   return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14021                                     FTSQRT, SRIdxVal),
14022                  0);
14023 }
14024 
14025 SDValue
14026 PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14027                                                SelectionDAG &DAG) const {
14028   // We only have VSX Vector Square Root.
14029   EVT VT = Op.getValueType();
14030   if (VT != MVT::f64 &&
14031       ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14032     return TargetLowering::getSqrtResultForDenormInput(Op, DAG);
14033 
14034   return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14035 }
14036 
14037 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14038                                            int Enabled, int &RefinementSteps,
14039                                            bool &UseOneConstNR,
14040                                            bool Reciprocal) const {
14041   EVT VT = Operand.getValueType();
14042   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14043       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14044       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14045       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14046     if (RefinementSteps == ReciprocalEstimate::Unspecified)
14047       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14048 
14049     // The Newton-Raphson computation with a single constant does not provide
14050     // enough accuracy on some CPUs.
14051     UseOneConstNR = !Subtarget.needsTwoConstNR();
14052     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14053   }
14054   return SDValue();
14055 }
14056 
14057 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14058                                             int Enabled,
14059                                             int &RefinementSteps) const {
14060   EVT VT = Operand.getValueType();
14061   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14062       (VT == MVT::f64 && Subtarget.hasFRE()) ||
14063       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14064       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14065     if (RefinementSteps == ReciprocalEstimate::Unspecified)
14066       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14067     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14068   }
14069   return SDValue();
14070 }
14071 
14072 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
14073   // Note: This functionality is used only when unsafe-fp-math is enabled, and
14074   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
14075   // enabled for division), this functionality is redundant with the default
14076   // combiner logic (once the division -> reciprocal/multiply transformation
14077   // has taken place). As a result, this matters more for older cores than for
14078   // newer ones.
14079 
14080   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14081   // reciprocal if there are two or more FDIVs (for embedded cores with only
14082   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14083   switch (Subtarget.getCPUDirective()) {
14084   default:
14085     return 3;
14086   case PPC::DIR_440:
14087   case PPC::DIR_A2:
14088   case PPC::DIR_E500:
14089   case PPC::DIR_E500mc:
14090   case PPC::DIR_E5500:
14091     return 2;
14092   }
14093 }
14094 
14095 // isConsecutiveLSLoc needs to work even if all adds have not yet been
14096 // collapsed, and so we need to look through chains of them.
14097 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
14098                                      int64_t& Offset, SelectionDAG &DAG) {
14099   if (DAG.isBaseWithConstantOffset(Loc)) {
14100     Base = Loc.getOperand(0);
14101     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14102 
14103     // The base might itself be a base plus an offset, and if so, accumulate
14104     // that as well.
14105     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
14106   }
14107 }
14108 
14109 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
14110                             unsigned Bytes, int Dist,
14111                             SelectionDAG &DAG) {
14112   if (VT.getSizeInBits() / 8 != Bytes)
14113     return false;
14114 
14115   SDValue BaseLoc = Base->getBasePtr();
14116   if (Loc.getOpcode() == ISD::FrameIndex) {
14117     if (BaseLoc.getOpcode() != ISD::FrameIndex)
14118       return false;
14119     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
14120     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
14121     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14122     int FS  = MFI.getObjectSize(FI);
14123     int BFS = MFI.getObjectSize(BFI);
14124     if (FS != BFS || FS != (int)Bytes) return false;
14125     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14126   }
14127 
14128   SDValue Base1 = Loc, Base2 = BaseLoc;
14129   int64_t Offset1 = 0, Offset2 = 0;
14130   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14131   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14132   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14133     return true;
14134 
14135   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14136   const GlobalValue *GV1 = nullptr;
14137   const GlobalValue *GV2 = nullptr;
14138   Offset1 = 0;
14139   Offset2 = 0;
14140   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14141   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14142   if (isGA1 && isGA2 && GV1 == GV2)
14143     return Offset1 == (Offset2 + Dist*Bytes);
14144   return false;
14145 }
14146 
14147 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14148 // not enforce equality of the chain operands.
14149 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
14150                             unsigned Bytes, int Dist,
14151                             SelectionDAG &DAG) {
14152   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
14153     EVT VT = LS->getMemoryVT();
14154     SDValue Loc = LS->getBasePtr();
14155     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14156   }
14157 
14158   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14159     EVT VT;
14160     switch (N->getConstantOperandVal(1)) {
14161     default: return false;
14162     case Intrinsic::ppc_altivec_lvx:
14163     case Intrinsic::ppc_altivec_lvxl:
14164     case Intrinsic::ppc_vsx_lxvw4x:
14165     case Intrinsic::ppc_vsx_lxvw4x_be:
14166       VT = MVT::v4i32;
14167       break;
14168     case Intrinsic::ppc_vsx_lxvd2x:
14169     case Intrinsic::ppc_vsx_lxvd2x_be:
14170       VT = MVT::v2f64;
14171       break;
14172     case Intrinsic::ppc_altivec_lvebx:
14173       VT = MVT::i8;
14174       break;
14175     case Intrinsic::ppc_altivec_lvehx:
14176       VT = MVT::i16;
14177       break;
14178     case Intrinsic::ppc_altivec_lvewx:
14179       VT = MVT::i32;
14180       break;
14181     }
14182 
14183     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14184   }
14185 
14186   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14187     EVT VT;
14188     switch (N->getConstantOperandVal(1)) {
14189     default: return false;
14190     case Intrinsic::ppc_altivec_stvx:
14191     case Intrinsic::ppc_altivec_stvxl:
14192     case Intrinsic::ppc_vsx_stxvw4x:
14193       VT = MVT::v4i32;
14194       break;
14195     case Intrinsic::ppc_vsx_stxvd2x:
14196       VT = MVT::v2f64;
14197       break;
14198     case Intrinsic::ppc_vsx_stxvw4x_be:
14199       VT = MVT::v4i32;
14200       break;
14201     case Intrinsic::ppc_vsx_stxvd2x_be:
14202       VT = MVT::v2f64;
14203       break;
14204     case Intrinsic::ppc_altivec_stvebx:
14205       VT = MVT::i8;
14206       break;
14207     case Intrinsic::ppc_altivec_stvehx:
14208       VT = MVT::i16;
14209       break;
14210     case Intrinsic::ppc_altivec_stvewx:
14211       VT = MVT::i32;
14212       break;
14213     }
14214 
14215     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14216   }
14217 
14218   return false;
14219 }
14220 
14221 // Return true is there is a nearyby consecutive load to the one provided
14222 // (regardless of alignment). We search up and down the chain, looking though
14223 // token factors and other loads (but nothing else). As a result, a true result
14224 // indicates that it is safe to create a new consecutive load adjacent to the
14225 // load provided.
14226 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
14227   SDValue Chain = LD->getChain();
14228   EVT VT = LD->getMemoryVT();
14229 
14230   SmallSet<SDNode *, 16> LoadRoots;
14231   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14232   SmallSet<SDNode *, 16> Visited;
14233 
14234   // First, search up the chain, branching to follow all token-factor operands.
14235   // If we find a consecutive load, then we're done, otherwise, record all
14236   // nodes just above the top-level loads and token factors.
14237   while (!Queue.empty()) {
14238     SDNode *ChainNext = Queue.pop_back_val();
14239     if (!Visited.insert(ChainNext).second)
14240       continue;
14241 
14242     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14243       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14244         return true;
14245 
14246       if (!Visited.count(ChainLD->getChain().getNode()))
14247         Queue.push_back(ChainLD->getChain().getNode());
14248     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14249       for (const SDUse &O : ChainNext->ops())
14250         if (!Visited.count(O.getNode()))
14251           Queue.push_back(O.getNode());
14252     } else
14253       LoadRoots.insert(ChainNext);
14254   }
14255 
14256   // Second, search down the chain, starting from the top-level nodes recorded
14257   // in the first phase. These top-level nodes are the nodes just above all
14258   // loads and token factors. Starting with their uses, recursively look though
14259   // all loads (just the chain uses) and token factors to find a consecutive
14260   // load.
14261   Visited.clear();
14262   Queue.clear();
14263 
14264   for (SDNode *I : LoadRoots) {
14265     Queue.push_back(I);
14266 
14267     while (!Queue.empty()) {
14268       SDNode *LoadRoot = Queue.pop_back_val();
14269       if (!Visited.insert(LoadRoot).second)
14270         continue;
14271 
14272       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14273         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14274           return true;
14275 
14276       for (SDNode *U : LoadRoot->users())
14277         if (((isa<MemSDNode>(U) &&
14278               cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14279              U->getOpcode() == ISD::TokenFactor) &&
14280             !Visited.count(U))
14281           Queue.push_back(U);
14282     }
14283   }
14284 
14285   return false;
14286 }
14287 
14288 /// This function is called when we have proved that a SETCC node can be replaced
14289 /// by subtraction (and other supporting instructions) so that the result of
14290 /// comparison is kept in a GPR instead of CR. This function is purely for
14291 /// codegen purposes and has some flags to guide the codegen process.
14292 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14293                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14294   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14295 
14296   // Zero extend the operands to the largest legal integer. Originally, they
14297   // must be of a strictly smaller size.
14298   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14299                          DAG.getConstant(Size, DL, MVT::i32));
14300   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14301                          DAG.getConstant(Size, DL, MVT::i32));
14302 
14303   // Swap if needed. Depends on the condition code.
14304   if (Swap)
14305     std::swap(Op0, Op1);
14306 
14307   // Subtract extended integers.
14308   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14309 
14310   // Move the sign bit to the least significant position and zero out the rest.
14311   // Now the least significant bit carries the result of original comparison.
14312   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14313                              DAG.getConstant(Size - 1, DL, MVT::i32));
14314   auto Final = Shifted;
14315 
14316   // Complement the result if needed. Based on the condition code.
14317   if (Complement)
14318     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14319                         DAG.getConstant(1, DL, MVT::i64));
14320 
14321   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14322 }
14323 
14324 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14325                                                   DAGCombinerInfo &DCI) const {
14326   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14327 
14328   SelectionDAG &DAG = DCI.DAG;
14329   SDLoc DL(N);
14330 
14331   // Size of integers being compared has a critical role in the following
14332   // analysis, so we prefer to do this when all types are legal.
14333   if (!DCI.isAfterLegalizeDAG())
14334     return SDValue();
14335 
14336   // If all users of SETCC extend its value to a legal integer type
14337   // then we replace SETCC with a subtraction
14338   for (const SDNode *U : N->users())
14339     if (U->getOpcode() != ISD::ZERO_EXTEND)
14340       return SDValue();
14341 
14342   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14343   auto OpSize = N->getOperand(0).getValueSizeInBits();
14344 
14345   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
14346 
14347   if (OpSize < Size) {
14348     switch (CC) {
14349     default: break;
14350     case ISD::SETULT:
14351       return generateEquivalentSub(N, Size, false, false, DL, DAG);
14352     case ISD::SETULE:
14353       return generateEquivalentSub(N, Size, true, true, DL, DAG);
14354     case ISD::SETUGT:
14355       return generateEquivalentSub(N, Size, false, true, DL, DAG);
14356     case ISD::SETUGE:
14357       return generateEquivalentSub(N, Size, true, false, DL, DAG);
14358     }
14359   }
14360 
14361   return SDValue();
14362 }
14363 
14364 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14365                                                   DAGCombinerInfo &DCI) const {
14366   SelectionDAG &DAG = DCI.DAG;
14367   SDLoc dl(N);
14368 
14369   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14370   // If we're tracking CR bits, we need to be careful that we don't have:
14371   //   trunc(binary-ops(zext(x), zext(y)))
14372   // or
14373   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14374   // such that we're unnecessarily moving things into GPRs when it would be
14375   // better to keep them in CR bits.
14376 
14377   // Note that trunc here can be an actual i1 trunc, or can be the effective
14378   // truncation that comes from a setcc or select_cc.
14379   if (N->getOpcode() == ISD::TRUNCATE &&
14380       N->getValueType(0) != MVT::i1)
14381     return SDValue();
14382 
14383   if (N->getOperand(0).getValueType() != MVT::i32 &&
14384       N->getOperand(0).getValueType() != MVT::i64)
14385     return SDValue();
14386 
14387   if (N->getOpcode() == ISD::SETCC ||
14388       N->getOpcode() == ISD::SELECT_CC) {
14389     // If we're looking at a comparison, then we need to make sure that the
14390     // high bits (all except for the first) don't matter the result.
14391     ISD::CondCode CC =
14392       cast<CondCodeSDNode>(N->getOperand(
14393         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14394     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14395 
14396     if (ISD::isSignedIntSetCC(CC)) {
14397       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14398           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14399         return SDValue();
14400     } else if (ISD::isUnsignedIntSetCC(CC)) {
14401       if (!DAG.MaskedValueIsZero(N->getOperand(0),
14402                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14403           !DAG.MaskedValueIsZero(N->getOperand(1),
14404                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
14405         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14406                                              : SDValue());
14407     } else {
14408       // This is neither a signed nor an unsigned comparison, just make sure
14409       // that the high bits are equal.
14410       KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14411       KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14412 
14413       // We don't really care about what is known about the first bit (if
14414       // anything), so pretend that it is known zero for both to ensure they can
14415       // be compared as constants.
14416       Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14417       Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14418 
14419       if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14420           Op1Known.getConstant() != Op2Known.getConstant())
14421         return SDValue();
14422     }
14423   }
14424 
14425   // We now know that the higher-order bits are irrelevant, we just need to
14426   // make sure that all of the intermediate operations are bit operations, and
14427   // all inputs are extensions.
14428   if (N->getOperand(0).getOpcode() != ISD::AND &&
14429       N->getOperand(0).getOpcode() != ISD::OR  &&
14430       N->getOperand(0).getOpcode() != ISD::XOR &&
14431       N->getOperand(0).getOpcode() != ISD::SELECT &&
14432       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14433       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14434       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14435       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14436       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14437     return SDValue();
14438 
14439   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14440       N->getOperand(1).getOpcode() != ISD::AND &&
14441       N->getOperand(1).getOpcode() != ISD::OR  &&
14442       N->getOperand(1).getOpcode() != ISD::XOR &&
14443       N->getOperand(1).getOpcode() != ISD::SELECT &&
14444       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14445       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14446       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14447       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14448       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14449     return SDValue();
14450 
14451   SmallVector<SDValue, 4> Inputs;
14452   SmallVector<SDValue, 8> BinOps, PromOps;
14453   SmallPtrSet<SDNode *, 16> Visited;
14454 
14455   for (unsigned i = 0; i < 2; ++i) {
14456     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14457           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14458           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14459           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14460         isa<ConstantSDNode>(N->getOperand(i)))
14461       Inputs.push_back(N->getOperand(i));
14462     else
14463       BinOps.push_back(N->getOperand(i));
14464 
14465     if (N->getOpcode() == ISD::TRUNCATE)
14466       break;
14467   }
14468 
14469   // Visit all inputs, collect all binary operations (and, or, xor and
14470   // select) that are all fed by extensions.
14471   while (!BinOps.empty()) {
14472     SDValue BinOp = BinOps.pop_back_val();
14473 
14474     if (!Visited.insert(BinOp.getNode()).second)
14475       continue;
14476 
14477     PromOps.push_back(BinOp);
14478 
14479     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14480       // The condition of the select is not promoted.
14481       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14482         continue;
14483       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14484         continue;
14485 
14486       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14487             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14488             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14489            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14490           isa<ConstantSDNode>(BinOp.getOperand(i))) {
14491         Inputs.push_back(BinOp.getOperand(i));
14492       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14493                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
14494                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14495                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14496                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14497                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14498                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14499                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14500                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14501         BinOps.push_back(BinOp.getOperand(i));
14502       } else {
14503         // We have an input that is not an extension or another binary
14504         // operation; we'll abort this transformation.
14505         return SDValue();
14506       }
14507     }
14508   }
14509 
14510   // Make sure that this is a self-contained cluster of operations (which
14511   // is not quite the same thing as saying that everything has only one
14512   // use).
14513   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14514     if (isa<ConstantSDNode>(Inputs[i]))
14515       continue;
14516 
14517     for (const SDNode *User : Inputs[i].getNode()->users()) {
14518       if (User != N && !Visited.count(User))
14519         return SDValue();
14520 
14521       // Make sure that we're not going to promote the non-output-value
14522       // operand(s) or SELECT or SELECT_CC.
14523       // FIXME: Although we could sometimes handle this, and it does occur in
14524       // practice that one of the condition inputs to the select is also one of
14525       // the outputs, we currently can't deal with this.
14526       if (User->getOpcode() == ISD::SELECT) {
14527         if (User->getOperand(0) == Inputs[i])
14528           return SDValue();
14529       } else if (User->getOpcode() == ISD::SELECT_CC) {
14530         if (User->getOperand(0) == Inputs[i] ||
14531             User->getOperand(1) == Inputs[i])
14532           return SDValue();
14533       }
14534     }
14535   }
14536 
14537   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14538     for (const SDNode *User : PromOps[i].getNode()->users()) {
14539       if (User != N && !Visited.count(User))
14540         return SDValue();
14541 
14542       // Make sure that we're not going to promote the non-output-value
14543       // operand(s) or SELECT or SELECT_CC.
14544       // FIXME: Although we could sometimes handle this, and it does occur in
14545       // practice that one of the condition inputs to the select is also one of
14546       // the outputs, we currently can't deal with this.
14547       if (User->getOpcode() == ISD::SELECT) {
14548         if (User->getOperand(0) == PromOps[i])
14549           return SDValue();
14550       } else if (User->getOpcode() == ISD::SELECT_CC) {
14551         if (User->getOperand(0) == PromOps[i] ||
14552             User->getOperand(1) == PromOps[i])
14553           return SDValue();
14554       }
14555     }
14556   }
14557 
14558   // Replace all inputs with the extension operand.
14559   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14560     // Constants may have users outside the cluster of to-be-promoted nodes,
14561     // and so we need to replace those as we do the promotions.
14562     if (isa<ConstantSDNode>(Inputs[i]))
14563       continue;
14564     else
14565       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14566   }
14567 
14568   std::list<HandleSDNode> PromOpHandles;
14569   for (auto &PromOp : PromOps)
14570     PromOpHandles.emplace_back(PromOp);
14571 
14572   // Replace all operations (these are all the same, but have a different
14573   // (i1) return type). DAG.getNode will validate that the types of
14574   // a binary operator match, so go through the list in reverse so that
14575   // we've likely promoted both operands first. Any intermediate truncations or
14576   // extensions disappear.
14577   while (!PromOpHandles.empty()) {
14578     SDValue PromOp = PromOpHandles.back().getValue();
14579     PromOpHandles.pop_back();
14580 
14581     if (PromOp.getOpcode() == ISD::TRUNCATE ||
14582         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14583         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14584         PromOp.getOpcode() == ISD::ANY_EXTEND) {
14585       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14586           PromOp.getOperand(0).getValueType() != MVT::i1) {
14587         // The operand is not yet ready (see comment below).
14588         PromOpHandles.emplace_front(PromOp);
14589         continue;
14590       }
14591 
14592       SDValue RepValue = PromOp.getOperand(0);
14593       if (isa<ConstantSDNode>(RepValue))
14594         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14595 
14596       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14597       continue;
14598     }
14599 
14600     unsigned C;
14601     switch (PromOp.getOpcode()) {
14602     default:             C = 0; break;
14603     case ISD::SELECT:    C = 1; break;
14604     case ISD::SELECT_CC: C = 2; break;
14605     }
14606 
14607     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14608          PromOp.getOperand(C).getValueType() != MVT::i1) ||
14609         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14610          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14611       // The to-be-promoted operands of this node have not yet been
14612       // promoted (this should be rare because we're going through the
14613       // list backward, but if one of the operands has several users in
14614       // this cluster of to-be-promoted nodes, it is possible).
14615       PromOpHandles.emplace_front(PromOp);
14616       continue;
14617     }
14618 
14619     SmallVector<SDValue, 3> Ops(PromOp.getNode()->ops());
14620 
14621     // If there are any constant inputs, make sure they're replaced now.
14622     for (unsigned i = 0; i < 2; ++i)
14623       if (isa<ConstantSDNode>(Ops[C+i]))
14624         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14625 
14626     DAG.ReplaceAllUsesOfValueWith(PromOp,
14627       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14628   }
14629 
14630   // Now we're left with the initial truncation itself.
14631   if (N->getOpcode() == ISD::TRUNCATE)
14632     return N->getOperand(0);
14633 
14634   // Otherwise, this is a comparison. The operands to be compared have just
14635   // changed type (to i1), but everything else is the same.
14636   return SDValue(N, 0);
14637 }
14638 
14639 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14640                                                   DAGCombinerInfo &DCI) const {
14641   SelectionDAG &DAG = DCI.DAG;
14642   SDLoc dl(N);
14643 
14644   // If we're tracking CR bits, we need to be careful that we don't have:
14645   //   zext(binary-ops(trunc(x), trunc(y)))
14646   // or
14647   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14648   // such that we're unnecessarily moving things into CR bits that can more
14649   // efficiently stay in GPRs. Note that if we're not certain that the high
14650   // bits are set as required by the final extension, we still may need to do
14651   // some masking to get the proper behavior.
14652 
14653   // This same functionality is important on PPC64 when dealing with
14654   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14655   // the return values of functions. Because it is so similar, it is handled
14656   // here as well.
14657 
14658   if (N->getValueType(0) != MVT::i32 &&
14659       N->getValueType(0) != MVT::i64)
14660     return SDValue();
14661 
14662   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14663         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14664     return SDValue();
14665 
14666   if (N->getOperand(0).getOpcode() != ISD::AND &&
14667       N->getOperand(0).getOpcode() != ISD::OR  &&
14668       N->getOperand(0).getOpcode() != ISD::XOR &&
14669       N->getOperand(0).getOpcode() != ISD::SELECT &&
14670       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14671     return SDValue();
14672 
14673   SmallVector<SDValue, 4> Inputs;
14674   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14675   SmallPtrSet<SDNode *, 16> Visited;
14676 
14677   // Visit all inputs, collect all binary operations (and, or, xor and
14678   // select) that are all fed by truncations.
14679   while (!BinOps.empty()) {
14680     SDValue BinOp = BinOps.pop_back_val();
14681 
14682     if (!Visited.insert(BinOp.getNode()).second)
14683       continue;
14684 
14685     PromOps.push_back(BinOp);
14686 
14687     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14688       // The condition of the select is not promoted.
14689       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14690         continue;
14691       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14692         continue;
14693 
14694       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14695           isa<ConstantSDNode>(BinOp.getOperand(i))) {
14696         Inputs.push_back(BinOp.getOperand(i));
14697       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14698                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
14699                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14700                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14701                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14702         BinOps.push_back(BinOp.getOperand(i));
14703       } else {
14704         // We have an input that is not a truncation or another binary
14705         // operation; we'll abort this transformation.
14706         return SDValue();
14707       }
14708     }
14709   }
14710 
14711   // The operands of a select that must be truncated when the select is
14712   // promoted because the operand is actually part of the to-be-promoted set.
14713   DenseMap<SDNode *, EVT> SelectTruncOp[2];
14714 
14715   // Make sure that this is a self-contained cluster of operations (which
14716   // is not quite the same thing as saying that everything has only one
14717   // use).
14718   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14719     if (isa<ConstantSDNode>(Inputs[i]))
14720       continue;
14721 
14722     for (SDNode *User : Inputs[i].getNode()->users()) {
14723       if (User != N && !Visited.count(User))
14724         return SDValue();
14725 
14726       // If we're going to promote the non-output-value operand(s) or SELECT or
14727       // SELECT_CC, record them for truncation.
14728       if (User->getOpcode() == ISD::SELECT) {
14729         if (User->getOperand(0) == Inputs[i])
14730           SelectTruncOp[0].insert(std::make_pair(User,
14731                                     User->getOperand(0).getValueType()));
14732       } else if (User->getOpcode() == ISD::SELECT_CC) {
14733         if (User->getOperand(0) == Inputs[i])
14734           SelectTruncOp[0].insert(std::make_pair(User,
14735                                     User->getOperand(0).getValueType()));
14736         if (User->getOperand(1) == Inputs[i])
14737           SelectTruncOp[1].insert(std::make_pair(User,
14738                                     User->getOperand(1).getValueType()));
14739       }
14740     }
14741   }
14742 
14743   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14744     for (SDNode *User : PromOps[i].getNode()->users()) {
14745       if (User != N && !Visited.count(User))
14746         return SDValue();
14747 
14748       // If we're going to promote the non-output-value operand(s) or SELECT or
14749       // SELECT_CC, record them for truncation.
14750       if (User->getOpcode() == ISD::SELECT) {
14751         if (User->getOperand(0) == PromOps[i])
14752           SelectTruncOp[0].insert(std::make_pair(User,
14753                                     User->getOperand(0).getValueType()));
14754       } else if (User->getOpcode() == ISD::SELECT_CC) {
14755         if (User->getOperand(0) == PromOps[i])
14756           SelectTruncOp[0].insert(std::make_pair(User,
14757                                     User->getOperand(0).getValueType()));
14758         if (User->getOperand(1) == PromOps[i])
14759           SelectTruncOp[1].insert(std::make_pair(User,
14760                                     User->getOperand(1).getValueType()));
14761       }
14762     }
14763   }
14764 
14765   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14766   bool ReallyNeedsExt = false;
14767   if (N->getOpcode() != ISD::ANY_EXTEND) {
14768     // If all of the inputs are not already sign/zero extended, then
14769     // we'll still need to do that at the end.
14770     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14771       if (isa<ConstantSDNode>(Inputs[i]))
14772         continue;
14773 
14774       unsigned OpBits =
14775         Inputs[i].getOperand(0).getValueSizeInBits();
14776       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14777 
14778       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14779            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14780                                   APInt::getHighBitsSet(OpBits,
14781                                                         OpBits-PromBits))) ||
14782           (N->getOpcode() == ISD::SIGN_EXTEND &&
14783            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14784              (OpBits-(PromBits-1)))) {
14785         ReallyNeedsExt = true;
14786         break;
14787       }
14788     }
14789   }
14790 
14791   // Replace all inputs, either with the truncation operand, or a
14792   // truncation or extension to the final output type.
14793   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14794     // Constant inputs need to be replaced with the to-be-promoted nodes that
14795     // use them because they might have users outside of the cluster of
14796     // promoted nodes.
14797     if (isa<ConstantSDNode>(Inputs[i]))
14798       continue;
14799 
14800     SDValue InSrc = Inputs[i].getOperand(0);
14801     if (Inputs[i].getValueType() == N->getValueType(0))
14802       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14803     else if (N->getOpcode() == ISD::SIGN_EXTEND)
14804       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14805         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14806     else if (N->getOpcode() == ISD::ZERO_EXTEND)
14807       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14808         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14809     else
14810       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14811         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14812   }
14813 
14814   std::list<HandleSDNode> PromOpHandles;
14815   for (auto &PromOp : PromOps)
14816     PromOpHandles.emplace_back(PromOp);
14817 
14818   // Replace all operations (these are all the same, but have a different
14819   // (promoted) return type). DAG.getNode will validate that the types of
14820   // a binary operator match, so go through the list in reverse so that
14821   // we've likely promoted both operands first.
14822   while (!PromOpHandles.empty()) {
14823     SDValue PromOp = PromOpHandles.back().getValue();
14824     PromOpHandles.pop_back();
14825 
14826     unsigned C;
14827     switch (PromOp.getOpcode()) {
14828     default:             C = 0; break;
14829     case ISD::SELECT:    C = 1; break;
14830     case ISD::SELECT_CC: C = 2; break;
14831     }
14832 
14833     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14834          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14835         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14836          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14837       // The to-be-promoted operands of this node have not yet been
14838       // promoted (this should be rare because we're going through the
14839       // list backward, but if one of the operands has several users in
14840       // this cluster of to-be-promoted nodes, it is possible).
14841       PromOpHandles.emplace_front(PromOp);
14842       continue;
14843     }
14844 
14845     // For SELECT and SELECT_CC nodes, we do a similar check for any
14846     // to-be-promoted comparison inputs.
14847     if (PromOp.getOpcode() == ISD::SELECT ||
14848         PromOp.getOpcode() == ISD::SELECT_CC) {
14849       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14850            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14851           (SelectTruncOp[1].count(PromOp.getNode()) &&
14852            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14853         PromOpHandles.emplace_front(PromOp);
14854         continue;
14855       }
14856     }
14857 
14858     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
14859                                 PromOp.getNode()->op_end());
14860 
14861     // If this node has constant inputs, then they'll need to be promoted here.
14862     for (unsigned i = 0; i < 2; ++i) {
14863       if (!isa<ConstantSDNode>(Ops[C+i]))
14864         continue;
14865       if (Ops[C+i].getValueType() == N->getValueType(0))
14866         continue;
14867 
14868       if (N->getOpcode() == ISD::SIGN_EXTEND)
14869         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14870       else if (N->getOpcode() == ISD::ZERO_EXTEND)
14871         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14872       else
14873         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14874     }
14875 
14876     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14877     // truncate them again to the original value type.
14878     if (PromOp.getOpcode() == ISD::SELECT ||
14879         PromOp.getOpcode() == ISD::SELECT_CC) {
14880       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14881       if (SI0 != SelectTruncOp[0].end())
14882         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14883       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14884       if (SI1 != SelectTruncOp[1].end())
14885         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14886     }
14887 
14888     DAG.ReplaceAllUsesOfValueWith(PromOp,
14889       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14890   }
14891 
14892   // Now we're left with the initial extension itself.
14893   if (!ReallyNeedsExt)
14894     return N->getOperand(0);
14895 
14896   // To zero extend, just mask off everything except for the first bit (in the
14897   // i1 case).
14898   if (N->getOpcode() == ISD::ZERO_EXTEND)
14899     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14900                        DAG.getConstant(APInt::getLowBitsSet(
14901                                          N->getValueSizeInBits(0), PromBits),
14902                                        dl, N->getValueType(0)));
14903 
14904   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14905          "Invalid extension type");
14906   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14907   SDValue ShiftCst =
14908       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14909   return DAG.getNode(
14910       ISD::SRA, dl, N->getValueType(0),
14911       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14912       ShiftCst);
14913 }
14914 
14915 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14916                                         DAGCombinerInfo &DCI) const {
14917   assert(N->getOpcode() == ISD::SETCC &&
14918          "Should be called with a SETCC node");
14919 
14920   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14921   if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14922     SDValue LHS = N->getOperand(0);
14923     SDValue RHS = N->getOperand(1);
14924 
14925     // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14926     if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14927         LHS.hasOneUse())
14928       std::swap(LHS, RHS);
14929 
14930     // x == 0-y --> x+y == 0
14931     // x != 0-y --> x+y != 0
14932     if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14933         RHS.hasOneUse()) {
14934       SDLoc DL(N);
14935       SelectionDAG &DAG = DCI.DAG;
14936       EVT VT = N->getValueType(0);
14937       EVT OpVT = LHS.getValueType();
14938       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14939       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14940     }
14941   }
14942 
14943   return DAGCombineTruncBoolExt(N, DCI);
14944 }
14945 
14946 // Is this an extending load from an f32 to an f64?
14947 static bool isFPExtLoad(SDValue Op) {
14948   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14949     return LD->getExtensionType() == ISD::EXTLOAD &&
14950       Op.getValueType() == MVT::f64;
14951   return false;
14952 }
14953 
14954 /// Reduces the number of fp-to-int conversion when building a vector.
14955 ///
14956 /// If this vector is built out of floating to integer conversions,
14957 /// transform it to a vector built out of floating point values followed by a
14958 /// single floating to integer conversion of the vector.
14959 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
14960 /// becomes (fptosi (build_vector ($A, $B, ...)))
14961 SDValue PPCTargetLowering::
14962 combineElementTruncationToVectorTruncation(SDNode *N,
14963                                            DAGCombinerInfo &DCI) const {
14964   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14965          "Should be called with a BUILD_VECTOR node");
14966 
14967   SelectionDAG &DAG = DCI.DAG;
14968   SDLoc dl(N);
14969 
14970   SDValue FirstInput = N->getOperand(0);
14971   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14972          "The input operand must be an fp-to-int conversion.");
14973 
14974   // This combine happens after legalization so the fp_to_[su]i nodes are
14975   // already converted to PPCSISD nodes.
14976   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14977   if (FirstConversion == PPCISD::FCTIDZ ||
14978       FirstConversion == PPCISD::FCTIDUZ ||
14979       FirstConversion == PPCISD::FCTIWZ ||
14980       FirstConversion == PPCISD::FCTIWUZ) {
14981     bool IsSplat = true;
14982     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14983       FirstConversion == PPCISD::FCTIWUZ;
14984     EVT SrcVT = FirstInput.getOperand(0).getValueType();
14985     SmallVector<SDValue, 4> Ops;
14986     EVT TargetVT = N->getValueType(0);
14987     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14988       SDValue NextOp = N->getOperand(i);
14989       if (NextOp.getOpcode() != PPCISD::MFVSR)
14990         return SDValue();
14991       unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14992       if (NextConversion != FirstConversion)
14993         return SDValue();
14994       // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14995       // This is not valid if the input was originally double precision. It is
14996       // also not profitable to do unless this is an extending load in which
14997       // case doing this combine will allow us to combine consecutive loads.
14998       if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14999         return SDValue();
15000       if (N->getOperand(i) != FirstInput)
15001         IsSplat = false;
15002     }
15003 
15004     // If this is a splat, we leave it as-is since there will be only a single
15005     // fp-to-int conversion followed by a splat of the integer. This is better
15006     // for 32-bit and smaller ints and neutral for 64-bit ints.
15007     if (IsSplat)
15008       return SDValue();
15009 
15010     // Now that we know we have the right type of node, get its operands
15011     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15012       SDValue In = N->getOperand(i).getOperand(0);
15013       if (Is32Bit) {
15014         // For 32-bit values, we need to add an FP_ROUND node (if we made it
15015         // here, we know that all inputs are extending loads so this is safe).
15016         if (In.isUndef())
15017           Ops.push_back(DAG.getUNDEF(SrcVT));
15018         else {
15019           SDValue Trunc =
15020               DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
15021                           DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
15022           Ops.push_back(Trunc);
15023         }
15024       } else
15025         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
15026     }
15027 
15028     unsigned Opcode;
15029     if (FirstConversion == PPCISD::FCTIDZ ||
15030         FirstConversion == PPCISD::FCTIWZ)
15031       Opcode = ISD::FP_TO_SINT;
15032     else
15033       Opcode = ISD::FP_TO_UINT;
15034 
15035     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15036     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
15037     return DAG.getNode(Opcode, dl, TargetVT, BV);
15038   }
15039   return SDValue();
15040 }
15041 
15042 /// Reduce the number of loads when building a vector.
15043 ///
15044 /// Building a vector out of multiple loads can be converted to a load
15045 /// of the vector type if the loads are consecutive. If the loads are
15046 /// consecutive but in descending order, a shuffle is added at the end
15047 /// to reorder the vector.
15048 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
15049   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15050          "Should be called with a BUILD_VECTOR node");
15051 
15052   SDLoc dl(N);
15053 
15054   // Return early for non byte-sized type, as they can't be consecutive.
15055   if (!N->getValueType(0).getVectorElementType().isByteSized())
15056     return SDValue();
15057 
15058   bool InputsAreConsecutiveLoads = true;
15059   bool InputsAreReverseConsecutive = true;
15060   unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
15061   SDValue FirstInput = N->getOperand(0);
15062   bool IsRoundOfExtLoad = false;
15063   LoadSDNode *FirstLoad = nullptr;
15064 
15065   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
15066       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
15067     FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
15068     IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
15069   }
15070   // Not a build vector of (possibly fp_rounded) loads.
15071   if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
15072       N->getNumOperands() == 1)
15073     return SDValue();
15074 
15075   if (!IsRoundOfExtLoad)
15076     FirstLoad = cast<LoadSDNode>(FirstInput);
15077 
15078   SmallVector<LoadSDNode *, 4> InputLoads;
15079   InputLoads.push_back(FirstLoad);
15080   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
15081     // If any inputs are fp_round(extload), they all must be.
15082     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
15083       return SDValue();
15084 
15085     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
15086       N->getOperand(i);
15087     if (NextInput.getOpcode() != ISD::LOAD)
15088       return SDValue();
15089 
15090     SDValue PreviousInput =
15091       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
15092     LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
15093     LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
15094 
15095     // If any inputs are fp_round(extload), they all must be.
15096     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
15097       return SDValue();
15098 
15099     // We only care about regular loads. The PPC-specific load intrinsics
15100     // will not lead to a merge opportunity.
15101     if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
15102       InputsAreConsecutiveLoads = false;
15103     if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
15104       InputsAreReverseConsecutive = false;
15105 
15106     // Exit early if the loads are neither consecutive nor reverse consecutive.
15107     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
15108       return SDValue();
15109     InputLoads.push_back(LD2);
15110   }
15111 
15112   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
15113          "The loads cannot be both consecutive and reverse consecutive.");
15114 
15115   SDValue WideLoad;
15116   SDValue ReturnSDVal;
15117   if (InputsAreConsecutiveLoads) {
15118     assert(FirstLoad && "Input needs to be a LoadSDNode.");
15119     WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
15120                            FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15121                            FirstLoad->getAlign());
15122     ReturnSDVal = WideLoad;
15123   } else if (InputsAreReverseConsecutive) {
15124     LoadSDNode *LastLoad = InputLoads.back();
15125     assert(LastLoad && "Input needs to be a LoadSDNode.");
15126     WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
15127                            LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
15128                            LastLoad->getAlign());
15129     SmallVector<int, 16> Ops;
15130     for (int i = N->getNumOperands() - 1; i >= 0; i--)
15131       Ops.push_back(i);
15132 
15133     ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
15134                                        DAG.getUNDEF(N->getValueType(0)), Ops);
15135   } else
15136     return SDValue();
15137 
15138   for (auto *LD : InputLoads)
15139     DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
15140   return ReturnSDVal;
15141 }
15142 
15143 // This function adds the required vector_shuffle needed to get
15144 // the elements of the vector extract in the correct position
15145 // as specified by the CorrectElems encoding.
15146 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
15147                                       SDValue Input, uint64_t Elems,
15148                                       uint64_t CorrectElems) {
15149   SDLoc dl(N);
15150 
15151   unsigned NumElems = Input.getValueType().getVectorNumElements();
15152   SmallVector<int, 16> ShuffleMask(NumElems, -1);
15153 
15154   // Knowing the element indices being extracted from the original
15155   // vector and the order in which they're being inserted, just put
15156   // them at element indices required for the instruction.
15157   for (unsigned i = 0; i < N->getNumOperands(); i++) {
15158     if (DAG.getDataLayout().isLittleEndian())
15159       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
15160     else
15161       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
15162     CorrectElems = CorrectElems >> 8;
15163     Elems = Elems >> 8;
15164   }
15165 
15166   SDValue Shuffle =
15167       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
15168                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
15169 
15170   EVT VT = N->getValueType(0);
15171   SDValue Conv = DAG.getBitcast(VT, Shuffle);
15172 
15173   EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
15174                                Input.getValueType().getVectorElementType(),
15175                                VT.getVectorNumElements());
15176   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
15177                      DAG.getValueType(ExtVT));
15178 }
15179 
15180 // Look for build vector patterns where input operands come from sign
15181 // extended vector_extract elements of specific indices. If the correct indices
15182 // aren't used, add a vector shuffle to fix up the indices and create
15183 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
15184 // during instruction selection.
15185 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
15186   // This array encodes the indices that the vector sign extend instructions
15187   // extract from when extending from one type to another for both BE and LE.
15188   // The right nibble of each byte corresponds to the LE incides.
15189   // and the left nibble of each byte corresponds to the BE incides.
15190   // For example: 0x3074B8FC  byte->word
15191   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
15192   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
15193   // For example: 0x000070F8  byte->double word
15194   // For LE: the allowed indices are: 0x0,0x8
15195   // For BE: the allowed indices are: 0x7,0xF
15196   uint64_t TargetElems[] = {
15197       0x3074B8FC, // b->w
15198       0x000070F8, // b->d
15199       0x10325476, // h->w
15200       0x00003074, // h->d
15201       0x00001032, // w->d
15202   };
15203 
15204   uint64_t Elems = 0;
15205   int Index;
15206   SDValue Input;
15207 
15208   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
15209     if (!Op)
15210       return false;
15211     if (Op.getOpcode() != ISD::SIGN_EXTEND &&
15212         Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
15213       return false;
15214 
15215     // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
15216     // of the right width.
15217     SDValue Extract = Op.getOperand(0);
15218     if (Extract.getOpcode() == ISD::ANY_EXTEND)
15219       Extract = Extract.getOperand(0);
15220     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15221       return false;
15222 
15223     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
15224     if (!ExtOp)
15225       return false;
15226 
15227     Index = ExtOp->getZExtValue();
15228     if (Input && Input != Extract.getOperand(0))
15229       return false;
15230 
15231     if (!Input)
15232       Input = Extract.getOperand(0);
15233 
15234     Elems = Elems << 8;
15235     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
15236     Elems |= Index;
15237 
15238     return true;
15239   };
15240 
15241   // If the build vector operands aren't sign extended vector extracts,
15242   // of the same input vector, then return.
15243   for (unsigned i = 0; i < N->getNumOperands(); i++) {
15244     if (!isSExtOfVecExtract(N->getOperand(i))) {
15245       return SDValue();
15246     }
15247   }
15248 
15249   // If the vector extract indices are not correct, add the appropriate
15250   // vector_shuffle.
15251   int TgtElemArrayIdx;
15252   int InputSize = Input.getValueType().getScalarSizeInBits();
15253   int OutputSize = N->getValueType(0).getScalarSizeInBits();
15254   if (InputSize + OutputSize == 40)
15255     TgtElemArrayIdx = 0;
15256   else if (InputSize + OutputSize == 72)
15257     TgtElemArrayIdx = 1;
15258   else if (InputSize + OutputSize == 48)
15259     TgtElemArrayIdx = 2;
15260   else if (InputSize + OutputSize == 80)
15261     TgtElemArrayIdx = 3;
15262   else if (InputSize + OutputSize == 96)
15263     TgtElemArrayIdx = 4;
15264   else
15265     return SDValue();
15266 
15267   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
15268   CorrectElems = DAG.getDataLayout().isLittleEndian()
15269                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
15270                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
15271   if (Elems != CorrectElems) {
15272     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
15273   }
15274 
15275   // Regular lowering will catch cases where a shuffle is not needed.
15276   return SDValue();
15277 }
15278 
15279 // Look for the pattern of a load from a narrow width to i128, feeding
15280 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
15281 // (LXVRZX). This node represents a zero extending load that will be matched
15282 // to the Load VSX Vector Rightmost instructions.
15283 static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
15284   SDLoc DL(N);
15285 
15286   // This combine is only eligible for a BUILD_VECTOR of v1i128.
15287   if (N->getValueType(0) != MVT::v1i128)
15288     return SDValue();
15289 
15290   SDValue Operand = N->getOperand(0);
15291   // Proceed with the transformation if the operand to the BUILD_VECTOR
15292   // is a load instruction.
15293   if (Operand.getOpcode() != ISD::LOAD)
15294     return SDValue();
15295 
15296   auto *LD = cast<LoadSDNode>(Operand);
15297   EVT MemoryType = LD->getMemoryVT();
15298 
15299   // This transformation is only valid if the we are loading either a byte,
15300   // halfword, word, or doubleword.
15301   bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
15302                      MemoryType == MVT::i32 || MemoryType == MVT::i64;
15303 
15304   // Ensure that the load from the narrow width is being zero extended to i128.
15305   if (!ValidLDType ||
15306       (LD->getExtensionType() != ISD::ZEXTLOAD &&
15307        LD->getExtensionType() != ISD::EXTLOAD))
15308     return SDValue();
15309 
15310   SDValue LoadOps[] = {
15311       LD->getChain(), LD->getBasePtr(),
15312       DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
15313 
15314   return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
15315                                  DAG.getVTList(MVT::v1i128, MVT::Other),
15316                                  LoadOps, MemoryType, LD->getMemOperand());
15317 }
15318 
15319 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
15320                                                  DAGCombinerInfo &DCI) const {
15321   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15322          "Should be called with a BUILD_VECTOR node");
15323 
15324   SelectionDAG &DAG = DCI.DAG;
15325   SDLoc dl(N);
15326 
15327   if (!Subtarget.hasVSX())
15328     return SDValue();
15329 
15330   // The target independent DAG combiner will leave a build_vector of
15331   // float-to-int conversions intact. We can generate MUCH better code for
15332   // a float-to-int conversion of a vector of floats.
15333   SDValue FirstInput = N->getOperand(0);
15334   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
15335     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
15336     if (Reduced)
15337       return Reduced;
15338   }
15339 
15340   // If we're building a vector out of consecutive loads, just load that
15341   // vector type.
15342   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
15343   if (Reduced)
15344     return Reduced;
15345 
15346   // If we're building a vector out of extended elements from another vector
15347   // we have P9 vector integer extend instructions. The code assumes legal
15348   // input types (i.e. it can't handle things like v4i16) so do not run before
15349   // legalization.
15350   if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
15351     Reduced = combineBVOfVecSExt(N, DAG);
15352     if (Reduced)
15353       return Reduced;
15354   }
15355 
15356   // On Power10, the Load VSX Vector Rightmost instructions can be utilized
15357   // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
15358   // is a load from <valid narrow width> to i128.
15359   if (Subtarget.isISA3_1()) {
15360     SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15361     if (BVOfZLoad)
15362       return BVOfZLoad;
15363   }
15364 
15365   if (N->getValueType(0) != MVT::v2f64)
15366     return SDValue();
15367 
15368   // Looking for:
15369   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15370   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15371       FirstInput.getOpcode() != ISD::UINT_TO_FP)
15372     return SDValue();
15373   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15374       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15375     return SDValue();
15376   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15377     return SDValue();
15378 
15379   SDValue Ext1 = FirstInput.getOperand(0);
15380   SDValue Ext2 = N->getOperand(1).getOperand(0);
15381   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15382      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
15383     return SDValue();
15384 
15385   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
15386   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
15387   if (!Ext1Op || !Ext2Op)
15388     return SDValue();
15389   if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
15390       Ext1.getOperand(0) != Ext2.getOperand(0))
15391     return SDValue();
15392 
15393   int FirstElem = Ext1Op->getZExtValue();
15394   int SecondElem = Ext2Op->getZExtValue();
15395   int SubvecIdx;
15396   if (FirstElem == 0 && SecondElem == 1)
15397     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15398   else if (FirstElem == 2 && SecondElem == 3)
15399     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15400   else
15401     return SDValue();
15402 
15403   SDValue SrcVec = Ext1.getOperand(0);
15404   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15405     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
15406   return DAG.getNode(NodeType, dl, MVT::v2f64,
15407                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15408 }
15409 
15410 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15411                                               DAGCombinerInfo &DCI) const {
15412   assert((N->getOpcode() == ISD::SINT_TO_FP ||
15413           N->getOpcode() == ISD::UINT_TO_FP) &&
15414          "Need an int -> FP conversion node here");
15415 
15416   if (useSoftFloat() || !Subtarget.has64BitSupport())
15417     return SDValue();
15418 
15419   SelectionDAG &DAG = DCI.DAG;
15420   SDLoc dl(N);
15421   SDValue Op(N, 0);
15422 
15423   // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15424   // from the hardware.
15425   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15426     return SDValue();
15427   if (!Op.getOperand(0).getValueType().isSimple())
15428     return SDValue();
15429   if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15430       Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15431     return SDValue();
15432 
15433   SDValue FirstOperand(Op.getOperand(0));
15434   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15435     (FirstOperand.getValueType() == MVT::i8 ||
15436      FirstOperand.getValueType() == MVT::i16);
15437   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15438     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15439     bool DstDouble = Op.getValueType() == MVT::f64;
15440     unsigned ConvOp = Signed ?
15441       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
15442       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15443     SDValue WidthConst =
15444       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15445                             dl, false);
15446     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15447     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15448     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
15449                                          DAG.getVTList(MVT::f64, MVT::Other),
15450                                          Ops, MVT::i8, LDN->getMemOperand());
15451     DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15452 
15453     // For signed conversion, we need to sign-extend the value in the VSR
15454     if (Signed) {
15455       SDValue ExtOps[] = { Ld, WidthConst };
15456       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15457       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15458     } else
15459       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15460   }
15461 
15462 
15463   // For i32 intermediate values, unfortunately, the conversion functions
15464   // leave the upper 32 bits of the value are undefined. Within the set of
15465   // scalar instructions, we have no method for zero- or sign-extending the
15466   // value. Thus, we cannot handle i32 intermediate values here.
15467   if (Op.getOperand(0).getValueType() == MVT::i32)
15468     return SDValue();
15469 
15470   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15471          "UINT_TO_FP is supported only with FPCVT");
15472 
15473   // If we have FCFIDS, then use it when converting to single-precision.
15474   // Otherwise, convert to double-precision and then round.
15475   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15476                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15477                                                             : PPCISD::FCFIDS)
15478                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15479                                                             : PPCISD::FCFID);
15480   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15481                   ? MVT::f32
15482                   : MVT::f64;
15483 
15484   // If we're converting from a float, to an int, and back to a float again,
15485   // then we don't need the store/load pair at all.
15486   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15487        Subtarget.hasFPCVT()) ||
15488       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15489     SDValue Src = Op.getOperand(0).getOperand(0);
15490     if (Src.getValueType() == MVT::f32) {
15491       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15492       DCI.AddToWorklist(Src.getNode());
15493     } else if (Src.getValueType() != MVT::f64) {
15494       // Make sure that we don't pick up a ppc_fp128 source value.
15495       return SDValue();
15496     }
15497 
15498     unsigned FCTOp =
15499       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15500                                                         PPCISD::FCTIDUZ;
15501 
15502     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15503     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15504 
15505     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15506       FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15507                        DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15508       DCI.AddToWorklist(FP.getNode());
15509     }
15510 
15511     return FP;
15512   }
15513 
15514   return SDValue();
15515 }
15516 
15517 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15518 // builtins) into loads with swaps.
15519 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
15520                                               DAGCombinerInfo &DCI) const {
15521   // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15522   // load combines.
15523   if (DCI.isBeforeLegalizeOps())
15524     return SDValue();
15525 
15526   SelectionDAG &DAG = DCI.DAG;
15527   SDLoc dl(N);
15528   SDValue Chain;
15529   SDValue Base;
15530   MachineMemOperand *MMO;
15531 
15532   switch (N->getOpcode()) {
15533   default:
15534     llvm_unreachable("Unexpected opcode for little endian VSX load");
15535   case ISD::LOAD: {
15536     LoadSDNode *LD = cast<LoadSDNode>(N);
15537     Chain = LD->getChain();
15538     Base = LD->getBasePtr();
15539     MMO = LD->getMemOperand();
15540     // If the MMO suggests this isn't a load of a full vector, leave
15541     // things alone.  For a built-in, we have to make the change for
15542     // correctness, so if there is a size problem that will be a bug.
15543     if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15544       return SDValue();
15545     break;
15546   }
15547   case ISD::INTRINSIC_W_CHAIN: {
15548     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15549     Chain = Intrin->getChain();
15550     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15551     // us what we want. Get operand 2 instead.
15552     Base = Intrin->getOperand(2);
15553     MMO = Intrin->getMemOperand();
15554     break;
15555   }
15556   }
15557 
15558   MVT VecTy = N->getValueType(0).getSimpleVT();
15559 
15560   SDValue LoadOps[] = { Chain, Base };
15561   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
15562                                          DAG.getVTList(MVT::v2f64, MVT::Other),
15563                                          LoadOps, MVT::v2f64, MMO);
15564 
15565   DCI.AddToWorklist(Load.getNode());
15566   Chain = Load.getValue(1);
15567   SDValue Swap = DAG.getNode(
15568       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15569   DCI.AddToWorklist(Swap.getNode());
15570 
15571   // Add a bitcast if the resulting load type doesn't match v2f64.
15572   if (VecTy != MVT::v2f64) {
15573     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15574     DCI.AddToWorklist(N.getNode());
15575     // Package {bitcast value, swap's chain} to match Load's shape.
15576     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15577                        N, Swap.getValue(1));
15578   }
15579 
15580   return Swap;
15581 }
15582 
15583 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15584 // builtins) into stores with swaps.
15585 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
15586                                                DAGCombinerInfo &DCI) const {
15587   // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15588   // store combines.
15589   if (DCI.isBeforeLegalizeOps())
15590     return SDValue();
15591 
15592   SelectionDAG &DAG = DCI.DAG;
15593   SDLoc dl(N);
15594   SDValue Chain;
15595   SDValue Base;
15596   unsigned SrcOpnd;
15597   MachineMemOperand *MMO;
15598 
15599   switch (N->getOpcode()) {
15600   default:
15601     llvm_unreachable("Unexpected opcode for little endian VSX store");
15602   case ISD::STORE: {
15603     StoreSDNode *ST = cast<StoreSDNode>(N);
15604     Chain = ST->getChain();
15605     Base = ST->getBasePtr();
15606     MMO = ST->getMemOperand();
15607     SrcOpnd = 1;
15608     // If the MMO suggests this isn't a store of a full vector, leave
15609     // things alone.  For a built-in, we have to make the change for
15610     // correctness, so if there is a size problem that will be a bug.
15611     if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15612       return SDValue();
15613     break;
15614   }
15615   case ISD::INTRINSIC_VOID: {
15616     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15617     Chain = Intrin->getChain();
15618     // Intrin->getBasePtr() oddly does not get what we want.
15619     Base = Intrin->getOperand(3);
15620     MMO = Intrin->getMemOperand();
15621     SrcOpnd = 2;
15622     break;
15623   }
15624   }
15625 
15626   SDValue Src = N->getOperand(SrcOpnd);
15627   MVT VecTy = Src.getValueType().getSimpleVT();
15628 
15629   // All stores are done as v2f64 and possible bit cast.
15630   if (VecTy != MVT::v2f64) {
15631     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15632     DCI.AddToWorklist(Src.getNode());
15633   }
15634 
15635   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15636                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15637   DCI.AddToWorklist(Swap.getNode());
15638   Chain = Swap.getValue(1);
15639   SDValue StoreOps[] = { Chain, Swap, Base };
15640   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
15641                                           DAG.getVTList(MVT::Other),
15642                                           StoreOps, VecTy, MMO);
15643   DCI.AddToWorklist(Store.getNode());
15644   return Store;
15645 }
15646 
15647 // Handle DAG combine for STORE (FP_TO_INT F).
15648 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15649                                                DAGCombinerInfo &DCI) const {
15650   SelectionDAG &DAG = DCI.DAG;
15651   SDLoc dl(N);
15652   unsigned Opcode = N->getOperand(1).getOpcode();
15653   (void)Opcode;
15654   bool Strict = N->getOperand(1)->isStrictFPOpcode();
15655 
15656   assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15657           Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15658          && "Not a FP_TO_INT Instruction!");
15659 
15660   SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15661   EVT Op1VT = N->getOperand(1).getValueType();
15662   EVT ResVT = Val.getValueType();
15663 
15664   if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15665     return SDValue();
15666 
15667   // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15668   bool ValidTypeForStoreFltAsInt =
15669         (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15670          (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15671 
15672   // TODO: Lower conversion from f128 on all VSX targets
15673   if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15674     return SDValue();
15675 
15676   if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15677       cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15678     return SDValue();
15679 
15680   Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15681 
15682   // Set number of bytes being converted.
15683   unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15684   SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15685                    DAG.getIntPtrConstant(ByteSize, dl, false),
15686                    DAG.getValueType(Op1VT)};
15687 
15688   Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
15689           DAG.getVTList(MVT::Other), Ops,
15690           cast<StoreSDNode>(N)->getMemoryVT(),
15691           cast<StoreSDNode>(N)->getMemOperand());
15692 
15693   return Val;
15694 }
15695 
15696 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15697   // Check that the source of the element keeps flipping
15698   // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15699   bool PrevElemFromFirstVec = Mask[0] < NumElts;
15700   for (int i = 1, e = Mask.size(); i < e; i++) {
15701     if (PrevElemFromFirstVec && Mask[i] < NumElts)
15702       return false;
15703     if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15704       return false;
15705     PrevElemFromFirstVec = !PrevElemFromFirstVec;
15706   }
15707   return true;
15708 }
15709 
15710 static bool isSplatBV(SDValue Op) {
15711   if (Op.getOpcode() != ISD::BUILD_VECTOR)
15712     return false;
15713   SDValue FirstOp;
15714 
15715   // Find first non-undef input.
15716   for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15717     FirstOp = Op.getOperand(i);
15718     if (!FirstOp.isUndef())
15719       break;
15720   }
15721 
15722   // All inputs are undef or the same as the first non-undef input.
15723   for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15724     if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15725       return false;
15726   return true;
15727 }
15728 
15729 static SDValue isScalarToVec(SDValue Op) {
15730   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15731     return Op;
15732   if (Op.getOpcode() != ISD::BITCAST)
15733     return SDValue();
15734   Op = Op.getOperand(0);
15735   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15736     return Op;
15737   return SDValue();
15738 }
15739 
15740 // Fix up the shuffle mask to account for the fact that the result of
15741 // scalar_to_vector is not in lane zero. This just takes all values in
15742 // the ranges specified by the min/max indices and adds the number of
15743 // elements required to ensure each element comes from the respective
15744 // position in the valid lane.
15745 // On little endian, that's just the corresponding element in the other
15746 // half of the vector. On big endian, it is in the same half but right
15747 // justified rather than left justified in that half.
15748 static void fixupShuffleMaskForPermutedSToV(
15749     SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15750     int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15751     unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15752   int LHSEltFixup =
15753       Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15754   int RHSEltFixup =
15755       Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15756   for (int I = 0, E = ShuffV.size(); I < E; ++I) {
15757     int Idx = ShuffV[I];
15758     if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15759       ShuffV[I] += LHSEltFixup;
15760     else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15761       ShuffV[I] += RHSEltFixup;
15762   }
15763 }
15764 
15765 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15766 // the original is:
15767 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15768 // In such a case, just change the shuffle mask to extract the element
15769 // from the permuted index.
15770 static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
15771                                const PPCSubtarget &Subtarget) {
15772   SDLoc dl(OrigSToV);
15773   EVT VT = OrigSToV.getValueType();
15774   assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15775          "Expecting a SCALAR_TO_VECTOR here");
15776   SDValue Input = OrigSToV.getOperand(0);
15777 
15778   if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15779     ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15780     SDValue OrigVector = Input.getOperand(0);
15781 
15782     // Can't handle non-const element indices or different vector types
15783     // for the input to the extract and the output of the scalar_to_vector.
15784     if (Idx && VT == OrigVector.getValueType()) {
15785       unsigned NumElts = VT.getVectorNumElements();
15786       assert(
15787           NumElts > 1 &&
15788           "Cannot produce a permuted scalar_to_vector for one element vector");
15789       SmallVector<int, 16> NewMask(NumElts, -1);
15790       unsigned ResultInElt = NumElts / 2;
15791       ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15792       NewMask[ResultInElt] = Idx->getZExtValue();
15793       return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15794     }
15795   }
15796   return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15797                      OrigSToV.getOperand(0));
15798 }
15799 
15800 static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15801                                  int HalfVec, int LHSLastElementDefined,
15802                                  int RHSLastElementDefined) {
15803   for (int Index : ShuffV) {
15804     if (Index < 0) // Skip explicitly undefined mask indices.
15805       continue;
15806     // Handle first input vector of the vector_shuffle.
15807     if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15808         (Index > LHSLastElementDefined))
15809       return false;
15810     // Handle second input vector of the vector_shuffle.
15811     if ((RHSLastElementDefined >= 0) &&
15812         (Index > HalfVec + RHSLastElementDefined))
15813       return false;
15814   }
15815   return true;
15816 }
15817 
15818 static SDValue generateSToVPermutedForVecShuffle(
15819     int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15820     int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15821     SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15822   EVT VecShuffOperandType = VecShuffOperand.getValueType();
15823   // Set up the values for the shuffle vector fixup.
15824   NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15825   // The last element depends on if the input comes from the LHS or RHS.
15826   //
15827   // For example:
15828   // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15829   //
15830   // For the LHS: The last element that comes from the LHS is actually 0, not 3
15831   // because elements 1 and higher of a scalar_to_vector are undefined.
15832   // For the RHS: The last element that comes from the RHS is actually 5, not 7
15833   // because elements 1 and higher of a scalar_to_vector are undefined.
15834   // It is also not 4 because the original scalar_to_vector is wider and
15835   // actually contains two i32 elements.
15836   LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
15837                 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
15838                 : FirstElt;
15839   SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15840   if (SToVPermuted.getValueType() != VecShuffOperandType)
15841     SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15842   return SToVPermuted;
15843 }
15844 
15845 // On little endian subtargets, combine shuffles such as:
15846 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15847 // into:
15848 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15849 // because the latter can be matched to a single instruction merge.
15850 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15851 // to put the value into element zero. Adjust the shuffle mask so that the
15852 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
15853 // On big endian targets, this is still useful for SCALAR_TO_VECTOR
15854 // nodes with elements smaller than doubleword because all the ways
15855 // of getting scalar data into a vector register put the value in the
15856 // rightmost element of the left half of the vector.
15857 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15858                                                 SelectionDAG &DAG) const {
15859   SDValue LHS = SVN->getOperand(0);
15860   SDValue RHS = SVN->getOperand(1);
15861   auto Mask = SVN->getMask();
15862   int NumElts = LHS.getValueType().getVectorNumElements();
15863   SDValue Res(SVN, 0);
15864   SDLoc dl(SVN);
15865   bool IsLittleEndian = Subtarget.isLittleEndian();
15866 
15867   // On big endian targets this is only useful for subtargets with direct moves.
15868   // On little endian targets it would be useful for all subtargets with VSX.
15869   // However adding special handling for LE subtargets without direct moves
15870   // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15871   // which includes direct moves.
15872   if (!Subtarget.hasDirectMove())
15873     return Res;
15874 
15875   // If this is not a shuffle of a shuffle and the first element comes from
15876   // the second vector, canonicalize to the commuted form. This will make it
15877   // more likely to match one of the single instruction patterns.
15878   if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15879       RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15880     std::swap(LHS, RHS);
15881     Res = DAG.getCommutedVectorShuffle(*SVN);
15882     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15883   }
15884 
15885   // Adjust the shuffle mask if either input vector comes from a
15886   // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15887   // form (to prevent the need for a swap).
15888   SmallVector<int, 16> ShuffV(Mask);
15889   SDValue SToVLHS = isScalarToVec(LHS);
15890   SDValue SToVRHS = isScalarToVec(RHS);
15891   if (SToVLHS || SToVRHS) {
15892     EVT VT = SVN->getValueType(0);
15893     uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15894     int ShuffleNumElts = ShuffV.size();
15895     int HalfVec = ShuffleNumElts / 2;
15896     // The width of the "valid lane" (i.e. the lane that contains the value that
15897     // is vectorized) needs to be expressed in terms of the number of elements
15898     // of the shuffle. It is thereby the ratio of the values before and after
15899     // any bitcast, which will be set later on if the LHS or RHS are
15900     // SCALAR_TO_VECTOR nodes.
15901     unsigned LHSNumValidElts = HalfVec;
15902     unsigned RHSNumValidElts = HalfVec;
15903 
15904     // Initially assume that neither input is permuted. These will be adjusted
15905     // accordingly if either input is. Note, that -1 means that all elements
15906     // are undefined.
15907     int LHSFirstElt = 0;
15908     int RHSFirstElt = ShuffleNumElts;
15909     int LHSLastElt = -1;
15910     int RHSLastElt = -1;
15911 
15912     // Get the permuted scalar to vector nodes for the source(s) that come from
15913     // ISD::SCALAR_TO_VECTOR.
15914     // On big endian systems, this only makes sense for element sizes smaller
15915     // than 64 bits since for 64-bit elements, all instructions already put
15916     // the value into element zero. Since scalar size of LHS and RHS may differ
15917     // after isScalarToVec, this should be checked using their own sizes.
15918     int LHSScalarSize = 0;
15919     int RHSScalarSize = 0;
15920     if (SToVLHS) {
15921       LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15922       if (!IsLittleEndian && LHSScalarSize >= 64)
15923         return Res;
15924     }
15925     if (SToVRHS) {
15926       RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15927       if (!IsLittleEndian && RHSScalarSize >= 64)
15928         return Res;
15929     }
15930     if (LHSScalarSize != 0)
15931       LHS = generateSToVPermutedForVecShuffle(
15932           LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15933           LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
15934     if (RHSScalarSize != 0)
15935       RHS = generateSToVPermutedForVecShuffle(
15936           RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15937           RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
15938 
15939     if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15940       return Res;
15941 
15942     // Fix up the shuffle mask to reflect where the desired element actually is.
15943     // The minimum and maximum indices that correspond to element zero for both
15944     // the LHS and RHS are computed and will control which shuffle mask entries
15945     // are to be changed. For example, if the RHS is permuted, any shuffle mask
15946     // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15947     fixupShuffleMaskForPermutedSToV(
15948         ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15949         LHSNumValidElts, RHSNumValidElts, Subtarget);
15950     Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15951 
15952     // We may have simplified away the shuffle. We won't be able to do anything
15953     // further with it here.
15954     if (!isa<ShuffleVectorSDNode>(Res))
15955       return Res;
15956     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15957   }
15958 
15959   SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15960   // The common case after we commuted the shuffle is that the RHS is a splat
15961   // and we have elements coming in from the splat at indices that are not
15962   // conducive to using a merge.
15963   // Example:
15964   // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15965   if (!isSplatBV(TheSplat))
15966     return Res;
15967 
15968   // We are looking for a mask such that all even elements are from
15969   // one vector and all odd elements from the other.
15970   if (!isAlternatingShuffMask(Mask, NumElts))
15971     return Res;
15972 
15973   // Adjust the mask so we are pulling in the same index from the splat
15974   // as the index from the interesting vector in consecutive elements.
15975   if (IsLittleEndian) {
15976     // Example (even elements from first vector):
15977     // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15978     if (Mask[0] < NumElts)
15979       for (int i = 1, e = Mask.size(); i < e; i += 2) {
15980         if (ShuffV[i] < 0)
15981           continue;
15982         // If element from non-splat is undef, pick first element from splat.
15983         ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15984       }
15985     // Example (odd elements from first vector):
15986     // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15987     else
15988       for (int i = 0, e = Mask.size(); i < e; i += 2) {
15989         if (ShuffV[i] < 0)
15990           continue;
15991         // If element from non-splat is undef, pick first element from splat.
15992         ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
15993       }
15994   } else {
15995     // Example (even elements from first vector):
15996     // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15997     if (Mask[0] < NumElts)
15998       for (int i = 0, e = Mask.size(); i < e; i += 2) {
15999         if (ShuffV[i] < 0)
16000           continue;
16001         // If element from non-splat is undef, pick first element from splat.
16002         ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
16003       }
16004     // Example (odd elements from first vector):
16005     // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
16006     else
16007       for (int i = 1, e = Mask.size(); i < e; i += 2) {
16008         if (ShuffV[i] < 0)
16009           continue;
16010         // If element from non-splat is undef, pick first element from splat.
16011         ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
16012       }
16013   }
16014 
16015   // If the RHS has undefs, we need to remove them since we may have created
16016   // a shuffle that adds those instead of the splat value.
16017   SDValue SplatVal =
16018       cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
16019   TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
16020 
16021   if (IsLittleEndian)
16022     RHS = TheSplat;
16023   else
16024     LHS = TheSplat;
16025   return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16026 }
16027 
16028 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
16029                                                 LSBaseSDNode *LSBase,
16030                                                 DAGCombinerInfo &DCI) const {
16031   assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
16032         "Not a reverse memop pattern!");
16033 
16034   auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
16035     auto Mask = SVN->getMask();
16036     int i = 0;
16037     auto I = Mask.rbegin();
16038     auto E = Mask.rend();
16039 
16040     for (; I != E; ++I) {
16041       if (*I != i)
16042         return false;
16043       i++;
16044     }
16045     return true;
16046   };
16047 
16048   SelectionDAG &DAG = DCI.DAG;
16049   EVT VT = SVN->getValueType(0);
16050 
16051   if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
16052     return SDValue();
16053 
16054   // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
16055   // See comment in PPCVSXSwapRemoval.cpp.
16056   // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
16057   if (!Subtarget.hasP9Vector())
16058     return SDValue();
16059 
16060   if(!IsElementReverse(SVN))
16061     return SDValue();
16062 
16063   if (LSBase->getOpcode() == ISD::LOAD) {
16064     // If the load return value 0 has more than one user except the
16065     // shufflevector instruction, it is not profitable to replace the
16066     // shufflevector with a reverse load.
16067     for (SDUse &Use : LSBase->uses())
16068       if (Use.getResNo() == 0 &&
16069           Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
16070         return SDValue();
16071 
16072     SDLoc dl(LSBase);
16073     SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
16074     return DAG.getMemIntrinsicNode(
16075         PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
16076         LSBase->getMemoryVT(), LSBase->getMemOperand());
16077   }
16078 
16079   if (LSBase->getOpcode() == ISD::STORE) {
16080     // If there are other uses of the shuffle, the swap cannot be avoided.
16081     // Forcing the use of an X-Form (since swapped stores only have
16082     // X-Forms) without removing the swap is unprofitable.
16083     if (!SVN->hasOneUse())
16084       return SDValue();
16085 
16086     SDLoc dl(LSBase);
16087     SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
16088                           LSBase->getBasePtr()};
16089     return DAG.getMemIntrinsicNode(
16090         PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
16091         LSBase->getMemoryVT(), LSBase->getMemOperand());
16092   }
16093 
16094   llvm_unreachable("Expected a load or store node here");
16095 }
16096 
16097 static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
16098   unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
16099   if (IntrinsicID == Intrinsic::ppc_stdcx)
16100     StoreWidth = 8;
16101   else if (IntrinsicID == Intrinsic::ppc_stwcx)
16102     StoreWidth = 4;
16103   else if (IntrinsicID == Intrinsic::ppc_sthcx)
16104     StoreWidth = 2;
16105   else if (IntrinsicID == Intrinsic::ppc_stbcx)
16106     StoreWidth = 1;
16107   else
16108     return false;
16109   return true;
16110 }
16111 
16112 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
16113                                              DAGCombinerInfo &DCI) const {
16114   SelectionDAG &DAG = DCI.DAG;
16115   SDLoc dl(N);
16116   switch (N->getOpcode()) {
16117   default: break;
16118   case ISD::ADD:
16119     return combineADD(N, DCI);
16120   case ISD::AND: {
16121     // We don't want (and (zext (shift...)), C) if C fits in the width of the
16122     // original input as that will prevent us from selecting optimal rotates.
16123     // This only matters if the input to the extend is i32 widened to i64.
16124     SDValue Op1 = N->getOperand(0);
16125     SDValue Op2 = N->getOperand(1);
16126     if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
16127          Op1.getOpcode() != ISD::ANY_EXTEND) ||
16128         !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
16129         Op1.getOperand(0).getValueType() != MVT::i32)
16130       break;
16131     SDValue NarrowOp = Op1.getOperand(0);
16132     if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
16133         NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
16134       break;
16135 
16136     uint64_t Imm = Op2->getAsZExtVal();
16137     // Make sure that the constant is narrow enough to fit in the narrow type.
16138     if (!isUInt<32>(Imm))
16139       break;
16140     SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
16141     SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
16142     return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
16143   }
16144   case ISD::SHL:
16145     return combineSHL(N, DCI);
16146   case ISD::SRA:
16147     return combineSRA(N, DCI);
16148   case ISD::SRL:
16149     return combineSRL(N, DCI);
16150   case ISD::MUL:
16151     return combineMUL(N, DCI);
16152   case ISD::FMA:
16153   case PPCISD::FNMSUB:
16154     return combineFMALike(N, DCI);
16155   case PPCISD::SHL:
16156     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
16157         return N->getOperand(0);
16158     break;
16159   case PPCISD::SRL:
16160     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
16161         return N->getOperand(0);
16162     break;
16163   case PPCISD::SRA:
16164     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
16165       if (C->isZero() ||  //  0 >>s V -> 0.
16166           C->isAllOnes()) // -1 >>s V -> -1.
16167         return N->getOperand(0);
16168     }
16169     break;
16170   case ISD::SIGN_EXTEND:
16171   case ISD::ZERO_EXTEND:
16172   case ISD::ANY_EXTEND:
16173     return DAGCombineExtBoolTrunc(N, DCI);
16174   case ISD::TRUNCATE:
16175     return combineTRUNCATE(N, DCI);
16176   case ISD::SETCC:
16177     if (SDValue CSCC = combineSetCC(N, DCI))
16178       return CSCC;
16179     [[fallthrough]];
16180   case ISD::SELECT_CC:
16181     return DAGCombineTruncBoolExt(N, DCI);
16182   case ISD::SINT_TO_FP:
16183   case ISD::UINT_TO_FP:
16184     return combineFPToIntToFP(N, DCI);
16185   case ISD::VECTOR_SHUFFLE:
16186     if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
16187       LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
16188       return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
16189     }
16190     return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
16191   case ISD::STORE: {
16192 
16193     EVT Op1VT = N->getOperand(1).getValueType();
16194     unsigned Opcode = N->getOperand(1).getOpcode();
16195 
16196     if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16197         Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
16198       SDValue Val = combineStoreFPToInt(N, DCI);
16199       if (Val)
16200         return Val;
16201     }
16202 
16203     if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
16204       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
16205       SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
16206       if (Val)
16207         return Val;
16208     }
16209 
16210     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
16211     if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
16212         N->getOperand(1).getNode()->hasOneUse() &&
16213         (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
16214          (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
16215 
16216       // STBRX can only handle simple types and it makes no sense to store less
16217       // two bytes in byte-reversed order.
16218       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
16219       if (mVT.isExtended() || mVT.getSizeInBits() < 16)
16220         break;
16221 
16222       SDValue BSwapOp = N->getOperand(1).getOperand(0);
16223       // Do an any-extend to 32-bits if this is a half-word input.
16224       if (BSwapOp.getValueType() == MVT::i16)
16225         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
16226 
16227       // If the type of BSWAP operand is wider than stored memory width
16228       // it need to be shifted to the right side before STBRX.
16229       if (Op1VT.bitsGT(mVT)) {
16230         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
16231         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
16232                               DAG.getConstant(Shift, dl, MVT::i32));
16233         // Need to truncate if this is a bswap of i64 stored as i32/i16.
16234         if (Op1VT == MVT::i64)
16235           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
16236       }
16237 
16238       SDValue Ops[] = {
16239         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
16240       };
16241       return
16242         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
16243                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
16244                                 cast<StoreSDNode>(N)->getMemOperand());
16245     }
16246 
16247     // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
16248     // So it can increase the chance of CSE constant construction.
16249     if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
16250         isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
16251       // Need to sign-extended to 64-bits to handle negative values.
16252       EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
16253       uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
16254                                     MemVT.getSizeInBits());
16255       SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
16256 
16257       // DAG.getTruncStore() can't be used here because it doesn't accept
16258       // the general (base + offset) addressing mode.
16259       // So we use UpdateNodeOperands and setTruncatingStore instead.
16260       DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
16261                              N->getOperand(3));
16262       cast<StoreSDNode>(N)->setTruncatingStore(true);
16263       return SDValue(N, 0);
16264     }
16265 
16266     // For little endian, VSX stores require generating xxswapd/lxvd2x.
16267     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16268     if (Op1VT.isSimple()) {
16269       MVT StoreVT = Op1VT.getSimpleVT();
16270       if (Subtarget.needsSwapsForVSXMemOps() &&
16271           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
16272            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
16273         return expandVSXStoreForLE(N, DCI);
16274     }
16275     break;
16276   }
16277   case ISD::LOAD: {
16278     LoadSDNode *LD = cast<LoadSDNode>(N);
16279     EVT VT = LD->getValueType(0);
16280 
16281     // For little endian, VSX loads require generating lxvd2x/xxswapd.
16282     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16283     if (VT.isSimple()) {
16284       MVT LoadVT = VT.getSimpleVT();
16285       if (Subtarget.needsSwapsForVSXMemOps() &&
16286           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
16287            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
16288         return expandVSXLoadForLE(N, DCI);
16289     }
16290 
16291     // We sometimes end up with a 64-bit integer load, from which we extract
16292     // two single-precision floating-point numbers. This happens with
16293     // std::complex<float>, and other similar structures, because of the way we
16294     // canonicalize structure copies. However, if we lack direct moves,
16295     // then the final bitcasts from the extracted integer values to the
16296     // floating-point numbers turn into store/load pairs. Even with direct moves,
16297     // just loading the two floating-point numbers is likely better.
16298     auto ReplaceTwoFloatLoad = [&]() {
16299       if (VT != MVT::i64)
16300         return false;
16301 
16302       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
16303           LD->isVolatile())
16304         return false;
16305 
16306       //  We're looking for a sequence like this:
16307       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
16308       //      t16: i64 = srl t13, Constant:i32<32>
16309       //    t17: i32 = truncate t16
16310       //  t18: f32 = bitcast t17
16311       //    t19: i32 = truncate t13
16312       //  t20: f32 = bitcast t19
16313 
16314       if (!LD->hasNUsesOfValue(2, 0))
16315         return false;
16316 
16317       auto UI = LD->user_begin();
16318       while (UI.getUse().getResNo() != 0) ++UI;
16319       SDNode *Trunc = *UI++;
16320       while (UI.getUse().getResNo() != 0) ++UI;
16321       SDNode *RightShift = *UI;
16322       if (Trunc->getOpcode() != ISD::TRUNCATE)
16323         std::swap(Trunc, RightShift);
16324 
16325       if (Trunc->getOpcode() != ISD::TRUNCATE ||
16326           Trunc->getValueType(0) != MVT::i32 ||
16327           !Trunc->hasOneUse())
16328         return false;
16329       if (RightShift->getOpcode() != ISD::SRL ||
16330           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
16331           RightShift->getConstantOperandVal(1) != 32 ||
16332           !RightShift->hasOneUse())
16333         return false;
16334 
16335       SDNode *Trunc2 = *RightShift->user_begin();
16336       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
16337           Trunc2->getValueType(0) != MVT::i32 ||
16338           !Trunc2->hasOneUse())
16339         return false;
16340 
16341       SDNode *Bitcast = *Trunc->user_begin();
16342       SDNode *Bitcast2 = *Trunc2->user_begin();
16343 
16344       if (Bitcast->getOpcode() != ISD::BITCAST ||
16345           Bitcast->getValueType(0) != MVT::f32)
16346         return false;
16347       if (Bitcast2->getOpcode() != ISD::BITCAST ||
16348           Bitcast2->getValueType(0) != MVT::f32)
16349         return false;
16350 
16351       if (Subtarget.isLittleEndian())
16352         std::swap(Bitcast, Bitcast2);
16353 
16354       // Bitcast has the second float (in memory-layout order) and Bitcast2
16355       // has the first one.
16356 
16357       SDValue BasePtr = LD->getBasePtr();
16358       if (LD->isIndexed()) {
16359         assert(LD->getAddressingMode() == ISD::PRE_INC &&
16360                "Non-pre-inc AM on PPC?");
16361         BasePtr =
16362           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16363                       LD->getOffset());
16364       }
16365 
16366       auto MMOFlags =
16367           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
16368       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
16369                                       LD->getPointerInfo(), LD->getAlign(),
16370                                       MMOFlags, LD->getAAInfo());
16371       SDValue AddPtr =
16372         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
16373                     BasePtr, DAG.getIntPtrConstant(4, dl));
16374       SDValue FloatLoad2 = DAG.getLoad(
16375           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
16376           LD->getPointerInfo().getWithOffset(4),
16377           commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
16378 
16379       if (LD->isIndexed()) {
16380         // Note that DAGCombine should re-form any pre-increment load(s) from
16381         // what is produced here if that makes sense.
16382         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
16383       }
16384 
16385       DCI.CombineTo(Bitcast2, FloatLoad);
16386       DCI.CombineTo(Bitcast, FloatLoad2);
16387 
16388       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
16389                                     SDValue(FloatLoad2.getNode(), 1));
16390       return true;
16391     };
16392 
16393     if (ReplaceTwoFloatLoad())
16394       return SDValue(N, 0);
16395 
16396     EVT MemVT = LD->getMemoryVT();
16397     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
16398     Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
16399     if (LD->isUnindexed() && VT.isVector() &&
16400         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16401           // P8 and later hardware should just use LOAD.
16402           !Subtarget.hasP8Vector() &&
16403           (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16404            VT == MVT::v4f32))) &&
16405         LD->getAlign() < ABIAlignment) {
16406       // This is a type-legal unaligned Altivec load.
16407       SDValue Chain = LD->getChain();
16408       SDValue Ptr = LD->getBasePtr();
16409       bool isLittleEndian = Subtarget.isLittleEndian();
16410 
16411       // This implements the loading of unaligned vectors as described in
16412       // the venerable Apple Velocity Engine overview. Specifically:
16413       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16414       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16415       //
16416       // The general idea is to expand a sequence of one or more unaligned
16417       // loads into an alignment-based permutation-control instruction (lvsl
16418       // or lvsr), a series of regular vector loads (which always truncate
16419       // their input address to an aligned address), and a series of
16420       // permutations.  The results of these permutations are the requested
16421       // loaded values.  The trick is that the last "extra" load is not taken
16422       // from the address you might suspect (sizeof(vector) bytes after the
16423       // last requested load), but rather sizeof(vector) - 1 bytes after the
16424       // last requested vector. The point of this is to avoid a page fault if
16425       // the base address happened to be aligned. This works because if the
16426       // base address is aligned, then adding less than a full vector length
16427       // will cause the last vector in the sequence to be (re)loaded.
16428       // Otherwise, the next vector will be fetched as you might suspect was
16429       // necessary.
16430 
16431       // We might be able to reuse the permutation generation from
16432       // a different base address offset from this one by an aligned amount.
16433       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16434       // optimization later.
16435       Intrinsic::ID Intr, IntrLD, IntrPerm;
16436       MVT PermCntlTy, PermTy, LDTy;
16437       Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16438                             : Intrinsic::ppc_altivec_lvsl;
16439       IntrLD = Intrinsic::ppc_altivec_lvx;
16440       IntrPerm = Intrinsic::ppc_altivec_vperm;
16441       PermCntlTy = MVT::v16i8;
16442       PermTy = MVT::v4i32;
16443       LDTy = MVT::v4i32;
16444 
16445       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16446 
16447       // Create the new MMO for the new base load. It is like the original MMO,
16448       // but represents an area in memory almost twice the vector size centered
16449       // on the original address. If the address is unaligned, we might start
16450       // reading up to (sizeof(vector)-1) bytes below the address of the
16451       // original unaligned load.
16452       MachineFunction &MF = DAG.getMachineFunction();
16453       MachineMemOperand *BaseMMO =
16454         MF.getMachineMemOperand(LD->getMemOperand(),
16455                                 -(int64_t)MemVT.getStoreSize()+1,
16456                                 2*MemVT.getStoreSize()-1);
16457 
16458       // Create the new base load.
16459       SDValue LDXIntID =
16460           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16461       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16462       SDValue BaseLoad =
16463         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
16464                                 DAG.getVTList(PermTy, MVT::Other),
16465                                 BaseLoadOps, LDTy, BaseMMO);
16466 
16467       // Note that the value of IncOffset (which is provided to the next
16468       // load's pointer info offset value, and thus used to calculate the
16469       // alignment), and the value of IncValue (which is actually used to
16470       // increment the pointer value) are different! This is because we
16471       // require the next load to appear to be aligned, even though it
16472       // is actually offset from the base pointer by a lesser amount.
16473       int IncOffset = VT.getSizeInBits() / 8;
16474       int IncValue = IncOffset;
16475 
16476       // Walk (both up and down) the chain looking for another load at the real
16477       // (aligned) offset (the alignment of the other load does not matter in
16478       // this case). If found, then do not use the offset reduction trick, as
16479       // that will prevent the loads from being later combined (as they would
16480       // otherwise be duplicates).
16481       if (!findConsecutiveLoad(LD, DAG))
16482         --IncValue;
16483 
16484       SDValue Increment =
16485           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16486       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16487 
16488       MachineMemOperand *ExtraMMO =
16489         MF.getMachineMemOperand(LD->getMemOperand(),
16490                                 1, 2*MemVT.getStoreSize()-1);
16491       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16492       SDValue ExtraLoad =
16493         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
16494                                 DAG.getVTList(PermTy, MVT::Other),
16495                                 ExtraLoadOps, LDTy, ExtraMMO);
16496 
16497       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16498         BaseLoad.getValue(1), ExtraLoad.getValue(1));
16499 
16500       // Because vperm has a big-endian bias, we must reverse the order
16501       // of the input vectors and complement the permute control vector
16502       // when generating little endian code.  We have already handled the
16503       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16504       // and ExtraLoad here.
16505       SDValue Perm;
16506       if (isLittleEndian)
16507         Perm = BuildIntrinsicOp(IntrPerm,
16508                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16509       else
16510         Perm = BuildIntrinsicOp(IntrPerm,
16511                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16512 
16513       if (VT != PermTy)
16514         Perm = Subtarget.hasAltivec()
16515                    ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16516                    : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16517                                  DAG.getTargetConstant(1, dl, MVT::i64));
16518                                // second argument is 1 because this rounding
16519                                // is always exact.
16520 
16521       // The output of the permutation is our loaded result, the TokenFactor is
16522       // our new chain.
16523       DCI.CombineTo(N, Perm, TF);
16524       return SDValue(N, 0);
16525     }
16526     }
16527     break;
16528     case ISD::INTRINSIC_WO_CHAIN: {
16529       bool isLittleEndian = Subtarget.isLittleEndian();
16530       unsigned IID = N->getConstantOperandVal(0);
16531       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16532                                            : Intrinsic::ppc_altivec_lvsl);
16533       if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16534         SDValue Add = N->getOperand(1);
16535 
16536         int Bits = 4 /* 16 byte alignment */;
16537 
16538         if (DAG.MaskedValueIsZero(Add->getOperand(1),
16539                                   APInt::getAllOnes(Bits /* alignment */)
16540                                       .zext(Add.getScalarValueSizeInBits()))) {
16541           SDNode *BasePtr = Add->getOperand(0).getNode();
16542           for (SDNode *U : BasePtr->users()) {
16543             if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16544                 U->getConstantOperandVal(0) == IID) {
16545               // We've found another LVSL/LVSR, and this address is an aligned
16546               // multiple of that one. The results will be the same, so use the
16547               // one we've just found instead.
16548 
16549               return SDValue(U, 0);
16550             }
16551           }
16552         }
16553 
16554         if (isa<ConstantSDNode>(Add->getOperand(1))) {
16555           SDNode *BasePtr = Add->getOperand(0).getNode();
16556           for (SDNode *U : BasePtr->users()) {
16557             if (U->getOpcode() == ISD::ADD &&
16558                 isa<ConstantSDNode>(U->getOperand(1)) &&
16559                 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16560                         (1ULL << Bits) ==
16561                     0) {
16562               SDNode *OtherAdd = U;
16563               for (SDNode *V : OtherAdd->users()) {
16564                 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16565                     V->getConstantOperandVal(0) == IID) {
16566                   return SDValue(V, 0);
16567                 }
16568               }
16569             }
16570           }
16571         }
16572       }
16573 
16574       // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16575       // Expose the vabsduw/h/b opportunity for down stream
16576       if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16577           (IID == Intrinsic::ppc_altivec_vmaxsw ||
16578            IID == Intrinsic::ppc_altivec_vmaxsh ||
16579            IID == Intrinsic::ppc_altivec_vmaxsb)) {
16580         SDValue V1 = N->getOperand(1);
16581         SDValue V2 = N->getOperand(2);
16582         if ((V1.getSimpleValueType() == MVT::v4i32 ||
16583              V1.getSimpleValueType() == MVT::v8i16 ||
16584              V1.getSimpleValueType() == MVT::v16i8) &&
16585             V1.getSimpleValueType() == V2.getSimpleValueType()) {
16586           // (0-a, a)
16587           if (V1.getOpcode() == ISD::SUB &&
16588               ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
16589               V1.getOperand(1) == V2) {
16590             return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16591           }
16592           // (a, 0-a)
16593           if (V2.getOpcode() == ISD::SUB &&
16594               ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16595               V2.getOperand(1) == V1) {
16596             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16597           }
16598           // (x-y, y-x)
16599           if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16600               V1.getOperand(0) == V2.getOperand(1) &&
16601               V1.getOperand(1) == V2.getOperand(0)) {
16602             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16603           }
16604         }
16605       }
16606     }
16607 
16608     break;
16609   case ISD::INTRINSIC_W_CHAIN:
16610       switch (N->getConstantOperandVal(1)) {
16611       default:
16612         break;
16613       case Intrinsic::ppc_altivec_vsum4sbs:
16614       case Intrinsic::ppc_altivec_vsum4shs:
16615       case Intrinsic::ppc_altivec_vsum4ubs: {
16616         // These sum-across intrinsics only have a chain due to the side effect
16617         // that they may set the SAT bit. If we know the SAT bit will not be set
16618         // for some inputs, we can replace any uses of their chain with the
16619         // input chain.
16620         if (BuildVectorSDNode *BVN =
16621                 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16622           APInt APSplatBits, APSplatUndef;
16623           unsigned SplatBitSize;
16624           bool HasAnyUndefs;
16625           bool BVNIsConstantSplat = BVN->isConstantSplat(
16626               APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16627               !Subtarget.isLittleEndian());
16628           // If the constant splat vector is 0, the SAT bit will not be set.
16629           if (BVNIsConstantSplat && APSplatBits == 0)
16630             DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16631         }
16632         return SDValue();
16633       }
16634     case Intrinsic::ppc_vsx_lxvw4x:
16635     case Intrinsic::ppc_vsx_lxvd2x:
16636       // For little endian, VSX loads require generating lxvd2x/xxswapd.
16637       // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16638       if (Subtarget.needsSwapsForVSXMemOps())
16639         return expandVSXLoadForLE(N, DCI);
16640       break;
16641     }
16642     break;
16643   case ISD::INTRINSIC_VOID:
16644     // For little endian, VSX stores require generating xxswapd/stxvd2x.
16645     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16646     if (Subtarget.needsSwapsForVSXMemOps()) {
16647       switch (N->getConstantOperandVal(1)) {
16648       default:
16649         break;
16650       case Intrinsic::ppc_vsx_stxvw4x:
16651       case Intrinsic::ppc_vsx_stxvd2x:
16652         return expandVSXStoreForLE(N, DCI);
16653       }
16654     }
16655     break;
16656   case ISD::BSWAP: {
16657     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16658     // For subtargets without LDBRX, we can still do better than the default
16659     // expansion even for 64-bit BSWAP (LOAD).
16660     bool Is64BitBswapOn64BitTgt =
16661         Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16662     bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16663                                N->getOperand(0).hasOneUse();
16664     if (IsSingleUseNormalLd &&
16665         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16666          (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16667       SDValue Load = N->getOperand(0);
16668       LoadSDNode *LD = cast<LoadSDNode>(Load);
16669       // Create the byte-swapping load.
16670       SDValue Ops[] = {
16671         LD->getChain(),    // Chain
16672         LD->getBasePtr(),  // Ptr
16673         DAG.getValueType(N->getValueType(0)) // VT
16674       };
16675       SDValue BSLoad =
16676         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
16677                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16678                                               MVT::i64 : MVT::i32, MVT::Other),
16679                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
16680 
16681       // If this is an i16 load, insert the truncate.
16682       SDValue ResVal = BSLoad;
16683       if (N->getValueType(0) == MVT::i16)
16684         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16685 
16686       // First, combine the bswap away.  This makes the value produced by the
16687       // load dead.
16688       DCI.CombineTo(N, ResVal);
16689 
16690       // Next, combine the load away, we give it a bogus result value but a real
16691       // chain result.  The result value is dead because the bswap is dead.
16692       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16693 
16694       // Return N so it doesn't get rechecked!
16695       return SDValue(N, 0);
16696     }
16697     // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16698     // before legalization so that the BUILD_PAIR is handled correctly.
16699     if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16700         !IsSingleUseNormalLd)
16701       return SDValue();
16702     LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16703 
16704     // Can't split volatile or atomic loads.
16705     if (!LD->isSimple())
16706       return SDValue();
16707     SDValue BasePtr = LD->getBasePtr();
16708     SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16709                              LD->getPointerInfo(), LD->getAlign());
16710     Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16711     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16712                           DAG.getIntPtrConstant(4, dl));
16713     MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
16714         LD->getMemOperand(), 4, 4);
16715     SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16716     Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16717     SDValue Res;
16718     if (Subtarget.isLittleEndian())
16719       Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16720     else
16721       Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16722     SDValue TF =
16723         DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16724                     Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16725     DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16726     return Res;
16727   }
16728   case PPCISD::VCMP:
16729     // If a VCMP_rec node already exists with exactly the same operands as this
16730     // node, use its result instead of this node (VCMP_rec computes both a CR6
16731     // and a normal output).
16732     //
16733     if (!N->getOperand(0).hasOneUse() &&
16734         !N->getOperand(1).hasOneUse() &&
16735         !N->getOperand(2).hasOneUse()) {
16736 
16737       // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16738       SDNode *VCMPrecNode = nullptr;
16739 
16740       SDNode *LHSN = N->getOperand(0).getNode();
16741       for (SDNode *User : LHSN->users())
16742         if (User->getOpcode() == PPCISD::VCMP_rec &&
16743             User->getOperand(1) == N->getOperand(1) &&
16744             User->getOperand(2) == N->getOperand(2) &&
16745             User->getOperand(0) == N->getOperand(0)) {
16746           VCMPrecNode = User;
16747           break;
16748         }
16749 
16750       // If there is no VCMP_rec node, or if the flag value has a single use,
16751       // don't transform this.
16752       if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16753         break;
16754 
16755       // Look at the (necessarily single) use of the flag value.  If it has a
16756       // chain, this transformation is more complex.  Note that multiple things
16757       // could use the value result, which we should ignore.
16758       SDNode *FlagUser = nullptr;
16759       for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16760            FlagUser == nullptr; ++UI) {
16761         assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16762         SDNode *User = UI->getUser();
16763         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16764           if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16765             FlagUser = User;
16766             break;
16767           }
16768         }
16769       }
16770 
16771       // If the user is a MFOCRF instruction, we know this is safe.
16772       // Otherwise we give up for right now.
16773       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16774         return SDValue(VCMPrecNode, 0);
16775     }
16776     break;
16777   case ISD::BR_CC: {
16778     // If this is a branch on an altivec predicate comparison, lower this so
16779     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
16780     // lowering is done pre-legalize, because the legalizer lowers the predicate
16781     // compare down to code that is difficult to reassemble.
16782     // This code also handles branches that depend on the result of a store
16783     // conditional.
16784     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16785     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16786 
16787     int CompareOpc;
16788     bool isDot;
16789 
16790     if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16791       break;
16792 
16793     // Since we are doing this pre-legalize, the RHS can be a constant of
16794     // arbitrary bitwidth which may cause issues when trying to get the value
16795     // from the underlying APInt.
16796     auto RHSAPInt = RHS->getAsAPIntVal();
16797     if (!RHSAPInt.isIntN(64))
16798       break;
16799 
16800     unsigned Val = RHSAPInt.getZExtValue();
16801     auto isImpossibleCompare = [&]() {
16802       // If this is a comparison against something other than 0/1, then we know
16803       // that the condition is never/always true.
16804       if (Val != 0 && Val != 1) {
16805         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
16806           return N->getOperand(0);
16807         // Always !=, turn it into an unconditional branch.
16808         return DAG.getNode(ISD::BR, dl, MVT::Other,
16809                            N->getOperand(0), N->getOperand(4));
16810       }
16811       return SDValue();
16812     };
16813     // Combine branches fed by store conditional instructions (st[bhwd]cx).
16814     unsigned StoreWidth = 0;
16815     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16816         isStoreConditional(LHS, StoreWidth)) {
16817       if (SDValue Impossible = isImpossibleCompare())
16818         return Impossible;
16819       PPC::Predicate CompOpc;
16820       // eq 0 => ne
16821       // ne 0 => eq
16822       // eq 1 => eq
16823       // ne 1 => ne
16824       if (Val == 0)
16825         CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16826       else
16827         CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16828 
16829       SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16830                        DAG.getConstant(StoreWidth, dl, MVT::i32)};
16831       auto *MemNode = cast<MemSDNode>(LHS);
16832       SDValue ConstSt = DAG.getMemIntrinsicNode(
16833           PPCISD::STORE_COND, dl,
16834           DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16835           MemNode->getMemoryVT(), MemNode->getMemOperand());
16836 
16837       SDValue InChain;
16838       // Unchain the branch from the original store conditional.
16839       if (N->getOperand(0) == LHS.getValue(1))
16840         InChain = LHS.getOperand(0);
16841       else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16842         SmallVector<SDValue, 4> InChains;
16843         SDValue InTF = N->getOperand(0);
16844         for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16845           if (InTF.getOperand(i) != LHS.getValue(1))
16846             InChains.push_back(InTF.getOperand(i));
16847         InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16848       }
16849 
16850       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16851                          DAG.getConstant(CompOpc, dl, MVT::i32),
16852                          DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16853                          ConstSt.getValue(2));
16854     }
16855 
16856     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16857         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16858       assert(isDot && "Can't compare against a vector result!");
16859 
16860       if (SDValue Impossible = isImpossibleCompare())
16861         return Impossible;
16862 
16863       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16864       // Create the PPCISD altivec 'dot' comparison node.
16865       SDValue Ops[] = {
16866         LHS.getOperand(2),  // LHS of compare
16867         LHS.getOperand(3),  // RHS of compare
16868         DAG.getConstant(CompareOpc, dl, MVT::i32)
16869       };
16870       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16871       SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16872 
16873       // Unpack the result based on how the target uses it.
16874       PPC::Predicate CompOpc;
16875       switch (LHS.getConstantOperandVal(1)) {
16876       default:  // Can't happen, don't crash on invalid number though.
16877       case 0:   // Branch on the value of the EQ bit of CR6.
16878         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16879         break;
16880       case 1:   // Branch on the inverted value of the EQ bit of CR6.
16881         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16882         break;
16883       case 2:   // Branch on the value of the LT bit of CR6.
16884         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16885         break;
16886       case 3:   // Branch on the inverted value of the LT bit of CR6.
16887         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16888         break;
16889       }
16890 
16891       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16892                          DAG.getConstant(CompOpc, dl, MVT::i32),
16893                          DAG.getRegister(PPC::CR6, MVT::i32),
16894                          N->getOperand(4), CompNode.getValue(1));
16895     }
16896     break;
16897   }
16898   case ISD::BUILD_VECTOR:
16899     return DAGCombineBuildVector(N, DCI);
16900   }
16901 
16902   return SDValue();
16903 }
16904 
16905 SDValue
16906 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
16907                                  SelectionDAG &DAG,
16908                                  SmallVectorImpl<SDNode *> &Created) const {
16909   // fold (sdiv X, pow2)
16910   EVT VT = N->getValueType(0);
16911   if (VT == MVT::i64 && !Subtarget.isPPC64())
16912     return SDValue();
16913   if ((VT != MVT::i32 && VT != MVT::i64) ||
16914       !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16915     return SDValue();
16916 
16917   SDLoc DL(N);
16918   SDValue N0 = N->getOperand(0);
16919 
16920   bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16921   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16922   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16923 
16924   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16925   Created.push_back(Op.getNode());
16926 
16927   if (IsNegPow2) {
16928     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16929     Created.push_back(Op.getNode());
16930   }
16931 
16932   return Op;
16933 }
16934 
16935 //===----------------------------------------------------------------------===//
16936 // Inline Assembly Support
16937 //===----------------------------------------------------------------------===//
16938 
16939 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
16940                                                       KnownBits &Known,
16941                                                       const APInt &DemandedElts,
16942                                                       const SelectionDAG &DAG,
16943                                                       unsigned Depth) const {
16944   Known.resetAll();
16945   switch (Op.getOpcode()) {
16946   default: break;
16947   case PPCISD::LBRX: {
16948     // lhbrx is known to have the top bits cleared out.
16949     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16950       Known.Zero = 0xFFFF0000;
16951     break;
16952   }
16953   case ISD::INTRINSIC_WO_CHAIN: {
16954     switch (Op.getConstantOperandVal(0)) {
16955     default: break;
16956     case Intrinsic::ppc_altivec_vcmpbfp_p:
16957     case Intrinsic::ppc_altivec_vcmpeqfp_p:
16958     case Intrinsic::ppc_altivec_vcmpequb_p:
16959     case Intrinsic::ppc_altivec_vcmpequh_p:
16960     case Intrinsic::ppc_altivec_vcmpequw_p:
16961     case Intrinsic::ppc_altivec_vcmpequd_p:
16962     case Intrinsic::ppc_altivec_vcmpequq_p:
16963     case Intrinsic::ppc_altivec_vcmpgefp_p:
16964     case Intrinsic::ppc_altivec_vcmpgtfp_p:
16965     case Intrinsic::ppc_altivec_vcmpgtsb_p:
16966     case Intrinsic::ppc_altivec_vcmpgtsh_p:
16967     case Intrinsic::ppc_altivec_vcmpgtsw_p:
16968     case Intrinsic::ppc_altivec_vcmpgtsd_p:
16969     case Intrinsic::ppc_altivec_vcmpgtsq_p:
16970     case Intrinsic::ppc_altivec_vcmpgtub_p:
16971     case Intrinsic::ppc_altivec_vcmpgtuh_p:
16972     case Intrinsic::ppc_altivec_vcmpgtuw_p:
16973     case Intrinsic::ppc_altivec_vcmpgtud_p:
16974     case Intrinsic::ppc_altivec_vcmpgtuq_p:
16975       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
16976       break;
16977     }
16978     break;
16979   }
16980   case ISD::INTRINSIC_W_CHAIN: {
16981     switch (Op.getConstantOperandVal(1)) {
16982     default:
16983       break;
16984     case Intrinsic::ppc_load2r:
16985       // Top bits are cleared for load2r (which is the same as lhbrx).
16986       Known.Zero = 0xFFFF0000;
16987       break;
16988     }
16989     break;
16990   }
16991   }
16992 }
16993 
16994 Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
16995   switch (Subtarget.getCPUDirective()) {
16996   default: break;
16997   case PPC::DIR_970:
16998   case PPC::DIR_PWR4:
16999   case PPC::DIR_PWR5:
17000   case PPC::DIR_PWR5X:
17001   case PPC::DIR_PWR6:
17002   case PPC::DIR_PWR6X:
17003   case PPC::DIR_PWR7:
17004   case PPC::DIR_PWR8:
17005   case PPC::DIR_PWR9:
17006   case PPC::DIR_PWR10:
17007   case PPC::DIR_PWR11:
17008   case PPC::DIR_PWR_FUTURE: {
17009     if (!ML)
17010       break;
17011 
17012     if (!DisableInnermostLoopAlign32) {
17013       // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
17014       // so that we can decrease cache misses and branch-prediction misses.
17015       // Actual alignment of the loop will depend on the hotness check and other
17016       // logic in alignBlocks.
17017       if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
17018         return Align(32);
17019     }
17020 
17021     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
17022 
17023     // For small loops (between 5 and 8 instructions), align to a 32-byte
17024     // boundary so that the entire loop fits in one instruction-cache line.
17025     uint64_t LoopSize = 0;
17026     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
17027       for (const MachineInstr &J : **I) {
17028         LoopSize += TII->getInstSizeInBytes(J);
17029         if (LoopSize > 32)
17030           break;
17031       }
17032 
17033     if (LoopSize > 16 && LoopSize <= 32)
17034       return Align(32);
17035 
17036     break;
17037   }
17038   }
17039 
17040   return TargetLowering::getPrefLoopAlignment(ML);
17041 }
17042 
17043 /// getConstraintType - Given a constraint, return the type of
17044 /// constraint it is for this target.
17045 PPCTargetLowering::ConstraintType
17046 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
17047   if (Constraint.size() == 1) {
17048     switch (Constraint[0]) {
17049     default: break;
17050     case 'b':
17051     case 'r':
17052     case 'f':
17053     case 'd':
17054     case 'v':
17055     case 'y':
17056       return C_RegisterClass;
17057     case 'Z':
17058       // FIXME: While Z does indicate a memory constraint, it specifically
17059       // indicates an r+r address (used in conjunction with the 'y' modifier
17060       // in the replacement string). Currently, we're forcing the base
17061       // register to be r0 in the asm printer (which is interpreted as zero)
17062       // and forming the complete address in the second register. This is
17063       // suboptimal.
17064       return C_Memory;
17065     }
17066   } else if (Constraint == "wc") { // individual CR bits.
17067     return C_RegisterClass;
17068   } else if (Constraint == "wa" || Constraint == "wd" ||
17069              Constraint == "wf" || Constraint == "ws" ||
17070              Constraint == "wi" || Constraint == "ww") {
17071     return C_RegisterClass; // VSX registers.
17072   }
17073   return TargetLowering::getConstraintType(Constraint);
17074 }
17075 
17076 /// Examine constraint type and operand type and determine a weight value.
17077 /// This object must already have been set up with the operand type
17078 /// and the current alternative constraint selected.
17079 TargetLowering::ConstraintWeight
17080 PPCTargetLowering::getSingleConstraintMatchWeight(
17081     AsmOperandInfo &info, const char *constraint) const {
17082   ConstraintWeight weight = CW_Invalid;
17083   Value *CallOperandVal = info.CallOperandVal;
17084     // If we don't have a value, we can't do a match,
17085     // but allow it at the lowest weight.
17086   if (!CallOperandVal)
17087     return CW_Default;
17088   Type *type = CallOperandVal->getType();
17089 
17090   // Look at the constraint type.
17091   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
17092     return CW_Register; // an individual CR bit.
17093   else if ((StringRef(constraint) == "wa" ||
17094             StringRef(constraint) == "wd" ||
17095             StringRef(constraint) == "wf") &&
17096            type->isVectorTy())
17097     return CW_Register;
17098   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
17099     return CW_Register; // just hold 64-bit integers data.
17100   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
17101     return CW_Register;
17102   else if (StringRef(constraint) == "ww" && type->isFloatTy())
17103     return CW_Register;
17104 
17105   switch (*constraint) {
17106   default:
17107     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
17108     break;
17109   case 'b':
17110     if (type->isIntegerTy())
17111       weight = CW_Register;
17112     break;
17113   case 'f':
17114     if (type->isFloatTy())
17115       weight = CW_Register;
17116     break;
17117   case 'd':
17118     if (type->isDoubleTy())
17119       weight = CW_Register;
17120     break;
17121   case 'v':
17122     if (type->isVectorTy())
17123       weight = CW_Register;
17124     break;
17125   case 'y':
17126     weight = CW_Register;
17127     break;
17128   case 'Z':
17129     weight = CW_Memory;
17130     break;
17131   }
17132   return weight;
17133 }
17134 
17135 std::pair<unsigned, const TargetRegisterClass *>
17136 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
17137                                                 StringRef Constraint,
17138                                                 MVT VT) const {
17139   if (Constraint.size() == 1) {
17140     // GCC RS6000 Constraint Letters
17141     switch (Constraint[0]) {
17142     case 'b':   // R1-R31
17143       if (VT == MVT::i64 && Subtarget.isPPC64())
17144         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
17145       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
17146     case 'r':   // R0-R31
17147       if (VT == MVT::i64 && Subtarget.isPPC64())
17148         return std::make_pair(0U, &PPC::G8RCRegClass);
17149       return std::make_pair(0U, &PPC::GPRCRegClass);
17150     // 'd' and 'f' constraints are both defined to be "the floating point
17151     // registers", where one is for 32-bit and the other for 64-bit. We don't
17152     // really care overly much here so just give them all the same reg classes.
17153     case 'd':
17154     case 'f':
17155       if (Subtarget.hasSPE()) {
17156         if (VT == MVT::f32 || VT == MVT::i32)
17157           return std::make_pair(0U, &PPC::GPRCRegClass);
17158         if (VT == MVT::f64 || VT == MVT::i64)
17159           return std::make_pair(0U, &PPC::SPERCRegClass);
17160       } else {
17161         if (VT == MVT::f32 || VT == MVT::i32)
17162           return std::make_pair(0U, &PPC::F4RCRegClass);
17163         if (VT == MVT::f64 || VT == MVT::i64)
17164           return std::make_pair(0U, &PPC::F8RCRegClass);
17165       }
17166       break;
17167     case 'v':
17168       if (Subtarget.hasAltivec() && VT.isVector())
17169         return std::make_pair(0U, &PPC::VRRCRegClass);
17170       else if (Subtarget.hasVSX())
17171         // Scalars in Altivec registers only make sense with VSX.
17172         return std::make_pair(0U, &PPC::VFRCRegClass);
17173       break;
17174     case 'y':   // crrc
17175       return std::make_pair(0U, &PPC::CRRCRegClass);
17176     }
17177   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
17178     // An individual CR bit.
17179     return std::make_pair(0U, &PPC::CRBITRCRegClass);
17180   } else if ((Constraint == "wa" || Constraint == "wd" ||
17181              Constraint == "wf" || Constraint == "wi") &&
17182              Subtarget.hasVSX()) {
17183     // A VSX register for either a scalar (FP) or vector. There is no
17184     // support for single precision scalars on subtargets prior to Power8.
17185     if (VT.isVector())
17186       return std::make_pair(0U, &PPC::VSRCRegClass);
17187     if (VT == MVT::f32 && Subtarget.hasP8Vector())
17188       return std::make_pair(0U, &PPC::VSSRCRegClass);
17189     return std::make_pair(0U, &PPC::VSFRCRegClass);
17190   } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
17191     if (VT == MVT::f32 && Subtarget.hasP8Vector())
17192       return std::make_pair(0U, &PPC::VSSRCRegClass);
17193     else
17194       return std::make_pair(0U, &PPC::VSFRCRegClass);
17195   } else if (Constraint == "lr") {
17196     if (VT == MVT::i64)
17197       return std::make_pair(0U, &PPC::LR8RCRegClass);
17198     else
17199       return std::make_pair(0U, &PPC::LRRCRegClass);
17200   }
17201 
17202   // Handle special cases of physical registers that are not properly handled
17203   // by the base class.
17204   if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
17205     // If we name a VSX register, we can't defer to the base class because it
17206     // will not recognize the correct register (their names will be VSL{0-31}
17207     // and V{0-31} so they won't match). So we match them here.
17208     if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
17209       int VSNum = atoi(Constraint.data() + 3);
17210       assert(VSNum >= 0 && VSNum <= 63 &&
17211              "Attempted to access a vsr out of range");
17212       if (VSNum < 32)
17213         return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
17214       return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
17215     }
17216 
17217     // For float registers, we can't defer to the base class as it will match
17218     // the SPILLTOVSRRC class.
17219     if (Constraint.size() > 3 && Constraint[1] == 'f') {
17220       int RegNum = atoi(Constraint.data() + 2);
17221       if (RegNum > 31 || RegNum < 0)
17222         report_fatal_error("Invalid floating point register number");
17223       if (VT == MVT::f32 || VT == MVT::i32)
17224         return Subtarget.hasSPE()
17225                    ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
17226                    : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
17227       if (VT == MVT::f64 || VT == MVT::i64)
17228         return Subtarget.hasSPE()
17229                    ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
17230                    : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
17231     }
17232   }
17233 
17234   std::pair<unsigned, const TargetRegisterClass *> R =
17235       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
17236 
17237   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
17238   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
17239   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
17240   // register.
17241   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
17242   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
17243   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
17244       PPC::GPRCRegClass.contains(R.first))
17245     return std::make_pair(TRI->getMatchingSuperReg(R.first,
17246                             PPC::sub_32, &PPC::G8RCRegClass),
17247                           &PPC::G8RCRegClass);
17248 
17249   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
17250   if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
17251     R.first = PPC::CR0;
17252     R.second = &PPC::CRRCRegClass;
17253   }
17254   // FIXME: This warning should ideally be emitted in the front end.
17255   const auto &TM = getTargetMachine();
17256   if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
17257     if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
17258          (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
17259         (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
17260       errs() << "warning: vector registers 20 to 32 are reserved in the "
17261                 "default AIX AltiVec ABI and cannot be used\n";
17262   }
17263 
17264   return R;
17265 }
17266 
17267 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
17268 /// vector.  If it is invalid, don't add anything to Ops.
17269 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
17270                                                      StringRef Constraint,
17271                                                      std::vector<SDValue> &Ops,
17272                                                      SelectionDAG &DAG) const {
17273   SDValue Result;
17274 
17275   // Only support length 1 constraints.
17276   if (Constraint.size() > 1)
17277     return;
17278 
17279   char Letter = Constraint[0];
17280   switch (Letter) {
17281   default: break;
17282   case 'I':
17283   case 'J':
17284   case 'K':
17285   case 'L':
17286   case 'M':
17287   case 'N':
17288   case 'O':
17289   case 'P': {
17290     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
17291     if (!CST) return; // Must be an immediate to match.
17292     SDLoc dl(Op);
17293     int64_t Value = CST->getSExtValue();
17294     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
17295                          // numbers are printed as such.
17296     switch (Letter) {
17297     default: llvm_unreachable("Unknown constraint letter!");
17298     case 'I':  // "I" is a signed 16-bit constant.
17299       if (isInt<16>(Value))
17300         Result = DAG.getTargetConstant(Value, dl, TCVT);
17301       break;
17302     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
17303       if (isShiftedUInt<16, 16>(Value))
17304         Result = DAG.getTargetConstant(Value, dl, TCVT);
17305       break;
17306     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
17307       if (isShiftedInt<16, 16>(Value))
17308         Result = DAG.getTargetConstant(Value, dl, TCVT);
17309       break;
17310     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
17311       if (isUInt<16>(Value))
17312         Result = DAG.getTargetConstant(Value, dl, TCVT);
17313       break;
17314     case 'M':  // "M" is a constant that is greater than 31.
17315       if (Value > 31)
17316         Result = DAG.getTargetConstant(Value, dl, TCVT);
17317       break;
17318     case 'N':  // "N" is a positive constant that is an exact power of two.
17319       if (Value > 0 && isPowerOf2_64(Value))
17320         Result = DAG.getTargetConstant(Value, dl, TCVT);
17321       break;
17322     case 'O':  // "O" is the constant zero.
17323       if (Value == 0)
17324         Result = DAG.getTargetConstant(Value, dl, TCVT);
17325       break;
17326     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
17327       if (isInt<16>(-Value))
17328         Result = DAG.getTargetConstant(Value, dl, TCVT);
17329       break;
17330     }
17331     break;
17332   }
17333   }
17334 
17335   if (Result.getNode()) {
17336     Ops.push_back(Result);
17337     return;
17338   }
17339 
17340   // Handle standard constraint letters.
17341   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
17342 }
17343 
17344 void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
17345                                               SmallVectorImpl<SDValue> &Ops,
17346                                               SelectionDAG &DAG) const {
17347   if (I.getNumOperands() <= 1)
17348     return;
17349   if (!isa<ConstantSDNode>(Ops[1].getNode()))
17350     return;
17351   auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
17352   if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
17353       IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
17354     return;
17355 
17356   if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
17357     Ops.push_back(DAG.getMDNode(MDN));
17358 }
17359 
17360 // isLegalAddressingMode - Return true if the addressing mode represented
17361 // by AM is legal for this target, for a load/store of the specified type.
17362 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
17363                                               const AddrMode &AM, Type *Ty,
17364                                               unsigned AS,
17365                                               Instruction *I) const {
17366   // Vector type r+i form is supported since power9 as DQ form. We don't check
17367   // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
17368   // imm form is preferred and the offset can be adjusted to use imm form later
17369   // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
17370   // max offset to check legal addressing mode, we should be a little aggressive
17371   // to contain other offsets for that LSRUse.
17372   if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
17373     return false;
17374 
17375   // PPC allows a sign-extended 16-bit immediate field.
17376   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
17377     return false;
17378 
17379   // No global is ever allowed as a base.
17380   if (AM.BaseGV)
17381     return false;
17382 
17383   // PPC only support r+r,
17384   switch (AM.Scale) {
17385   case 0:  // "r+i" or just "i", depending on HasBaseReg.
17386     break;
17387   case 1:
17388     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
17389       return false;
17390     // Otherwise we have r+r or r+i.
17391     break;
17392   case 2:
17393     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
17394       return false;
17395     // Allow 2*r as r+r.
17396     break;
17397   default:
17398     // No other scales are supported.
17399     return false;
17400   }
17401 
17402   return true;
17403 }
17404 
17405 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17406                                            SelectionDAG &DAG) const {
17407   MachineFunction &MF = DAG.getMachineFunction();
17408   MachineFrameInfo &MFI = MF.getFrameInfo();
17409   MFI.setReturnAddressIsTaken(true);
17410 
17411   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
17412     return SDValue();
17413 
17414   SDLoc dl(Op);
17415   unsigned Depth = Op.getConstantOperandVal(0);
17416 
17417   // Make sure the function does not optimize away the store of the RA to
17418   // the stack.
17419   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17420   FuncInfo->setLRStoreRequired();
17421   auto PtrVT = getPointerTy(MF.getDataLayout());
17422 
17423   if (Depth > 0) {
17424     // The link register (return address) is saved in the caller's frame
17425     // not the callee's stack frame. So we must get the caller's frame
17426     // address and load the return address at the LR offset from there.
17427     SDValue FrameAddr =
17428         DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17429                     LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
17430     SDValue Offset =
17431         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17432                         Subtarget.getScalarIntVT());
17433     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
17434                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
17435                        MachinePointerInfo());
17436   }
17437 
17438   // Just load the return address off the stack.
17439   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17440   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17441                      MachinePointerInfo());
17442 }
17443 
17444 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17445                                           SelectionDAG &DAG) const {
17446   SDLoc dl(Op);
17447   unsigned Depth = Op.getConstantOperandVal(0);
17448 
17449   MachineFunction &MF = DAG.getMachineFunction();
17450   MachineFrameInfo &MFI = MF.getFrameInfo();
17451   MFI.setFrameAddressIsTaken(true);
17452 
17453   EVT PtrVT = getPointerTy(MF.getDataLayout());
17454   bool isPPC64 = PtrVT == MVT::i64;
17455 
17456   // Naked functions never have a frame pointer, and so we use r1. For all
17457   // other functions, this decision must be delayed until during PEI.
17458   unsigned FrameReg;
17459   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17460     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17461   else
17462     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17463 
17464   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17465                                          PtrVT);
17466   while (Depth--)
17467     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17468                             FrameAddr, MachinePointerInfo());
17469   return FrameAddr;
17470 }
17471 
17472 #define GET_REGISTER_MATCHER
17473 #include "PPCGenAsmMatcher.inc"
17474 
17475 Register PPCTargetLowering::getRegisterByName(const char *RegName, LLT VT,
17476                                               const MachineFunction &MF) const {
17477   bool IsPPC64 = Subtarget.isPPC64();
17478 
17479   bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
17480   if (!Is64Bit && VT != LLT::scalar(32))
17481     report_fatal_error("Invalid register global variable type");
17482 
17483   Register Reg = MatchRegisterName(RegName);
17484   if (!Reg)
17485     report_fatal_error(
17486         Twine("Invalid global name register \"" + StringRef(RegName) + "\"."));
17487 
17488   // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
17489   // Need followup investigation as to why.
17490   if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
17491     report_fatal_error(Twine("Trying to reserve an invalid register \"" +
17492                              StringRef(RegName) + "\"."));
17493 
17494   // Convert GPR to GP8R register for 64bit.
17495   if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
17496     Reg = Reg.id() - PPC::R0 + PPC::X0;
17497 
17498   return Reg;
17499 }
17500 
17501 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
17502   // 32-bit SVR4 ABI access everything as got-indirect.
17503   if (Subtarget.is32BitELFABI())
17504     return true;
17505 
17506   // AIX accesses everything indirectly through the TOC, which is similar to
17507   // the GOT.
17508   if (Subtarget.isAIXABI())
17509     return true;
17510 
17511   CodeModel::Model CModel = getTargetMachine().getCodeModel();
17512   // If it is small or large code model, module locals are accessed
17513   // indirectly by loading their address from .toc/.got.
17514   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17515     return true;
17516 
17517   // JumpTable and BlockAddress are accessed as got-indirect.
17518   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
17519     return true;
17520 
17521   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
17522     return Subtarget.isGVIndirectSymbol(G->getGlobal());
17523 
17524   return false;
17525 }
17526 
17527 bool
17528 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
17529   // The PowerPC target isn't yet aware of offsets.
17530   return false;
17531 }
17532 
17533 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
17534                                            const CallInst &I,
17535                                            MachineFunction &MF,
17536                                            unsigned Intrinsic) const {
17537   switch (Intrinsic) {
17538   case Intrinsic::ppc_atomicrmw_xchg_i128:
17539   case Intrinsic::ppc_atomicrmw_add_i128:
17540   case Intrinsic::ppc_atomicrmw_sub_i128:
17541   case Intrinsic::ppc_atomicrmw_nand_i128:
17542   case Intrinsic::ppc_atomicrmw_and_i128:
17543   case Intrinsic::ppc_atomicrmw_or_i128:
17544   case Intrinsic::ppc_atomicrmw_xor_i128:
17545   case Intrinsic::ppc_cmpxchg_i128:
17546     Info.opc = ISD::INTRINSIC_W_CHAIN;
17547     Info.memVT = MVT::i128;
17548     Info.ptrVal = I.getArgOperand(0);
17549     Info.offset = 0;
17550     Info.align = Align(16);
17551     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
17552                  MachineMemOperand::MOVolatile;
17553     return true;
17554   case Intrinsic::ppc_atomic_load_i128:
17555     Info.opc = ISD::INTRINSIC_W_CHAIN;
17556     Info.memVT = MVT::i128;
17557     Info.ptrVal = I.getArgOperand(0);
17558     Info.offset = 0;
17559     Info.align = Align(16);
17560     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
17561     return true;
17562   case Intrinsic::ppc_atomic_store_i128:
17563     Info.opc = ISD::INTRINSIC_VOID;
17564     Info.memVT = MVT::i128;
17565     Info.ptrVal = I.getArgOperand(2);
17566     Info.offset = 0;
17567     Info.align = Align(16);
17568     Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
17569     return true;
17570   case Intrinsic::ppc_altivec_lvx:
17571   case Intrinsic::ppc_altivec_lvxl:
17572   case Intrinsic::ppc_altivec_lvebx:
17573   case Intrinsic::ppc_altivec_lvehx:
17574   case Intrinsic::ppc_altivec_lvewx:
17575   case Intrinsic::ppc_vsx_lxvd2x:
17576   case Intrinsic::ppc_vsx_lxvw4x:
17577   case Intrinsic::ppc_vsx_lxvd2x_be:
17578   case Intrinsic::ppc_vsx_lxvw4x_be:
17579   case Intrinsic::ppc_vsx_lxvl:
17580   case Intrinsic::ppc_vsx_lxvll: {
17581     EVT VT;
17582     switch (Intrinsic) {
17583     case Intrinsic::ppc_altivec_lvebx:
17584       VT = MVT::i8;
17585       break;
17586     case Intrinsic::ppc_altivec_lvehx:
17587       VT = MVT::i16;
17588       break;
17589     case Intrinsic::ppc_altivec_lvewx:
17590       VT = MVT::i32;
17591       break;
17592     case Intrinsic::ppc_vsx_lxvd2x:
17593     case Intrinsic::ppc_vsx_lxvd2x_be:
17594       VT = MVT::v2f64;
17595       break;
17596     default:
17597       VT = MVT::v4i32;
17598       break;
17599     }
17600 
17601     Info.opc = ISD::INTRINSIC_W_CHAIN;
17602     Info.memVT = VT;
17603     Info.ptrVal = I.getArgOperand(0);
17604     Info.offset = -VT.getStoreSize()+1;
17605     Info.size = 2*VT.getStoreSize()-1;
17606     Info.align = Align(1);
17607     Info.flags = MachineMemOperand::MOLoad;
17608     return true;
17609   }
17610   case Intrinsic::ppc_altivec_stvx:
17611   case Intrinsic::ppc_altivec_stvxl:
17612   case Intrinsic::ppc_altivec_stvebx:
17613   case Intrinsic::ppc_altivec_stvehx:
17614   case Intrinsic::ppc_altivec_stvewx:
17615   case Intrinsic::ppc_vsx_stxvd2x:
17616   case Intrinsic::ppc_vsx_stxvw4x:
17617   case Intrinsic::ppc_vsx_stxvd2x_be:
17618   case Intrinsic::ppc_vsx_stxvw4x_be:
17619   case Intrinsic::ppc_vsx_stxvl:
17620   case Intrinsic::ppc_vsx_stxvll: {
17621     EVT VT;
17622     switch (Intrinsic) {
17623     case Intrinsic::ppc_altivec_stvebx:
17624       VT = MVT::i8;
17625       break;
17626     case Intrinsic::ppc_altivec_stvehx:
17627       VT = MVT::i16;
17628       break;
17629     case Intrinsic::ppc_altivec_stvewx:
17630       VT = MVT::i32;
17631       break;
17632     case Intrinsic::ppc_vsx_stxvd2x:
17633     case Intrinsic::ppc_vsx_stxvd2x_be:
17634       VT = MVT::v2f64;
17635       break;
17636     default:
17637       VT = MVT::v4i32;
17638       break;
17639     }
17640 
17641     Info.opc = ISD::INTRINSIC_VOID;
17642     Info.memVT = VT;
17643     Info.ptrVal = I.getArgOperand(1);
17644     Info.offset = -VT.getStoreSize()+1;
17645     Info.size = 2*VT.getStoreSize()-1;
17646     Info.align = Align(1);
17647     Info.flags = MachineMemOperand::MOStore;
17648     return true;
17649   }
17650   case Intrinsic::ppc_stdcx:
17651   case Intrinsic::ppc_stwcx:
17652   case Intrinsic::ppc_sthcx:
17653   case Intrinsic::ppc_stbcx: {
17654     EVT VT;
17655     auto Alignment = Align(8);
17656     switch (Intrinsic) {
17657     case Intrinsic::ppc_stdcx:
17658       VT = MVT::i64;
17659       break;
17660     case Intrinsic::ppc_stwcx:
17661       VT = MVT::i32;
17662       Alignment = Align(4);
17663       break;
17664     case Intrinsic::ppc_sthcx:
17665       VT = MVT::i16;
17666       Alignment = Align(2);
17667       break;
17668     case Intrinsic::ppc_stbcx:
17669       VT = MVT::i8;
17670       Alignment = Align(1);
17671       break;
17672     }
17673     Info.opc = ISD::INTRINSIC_W_CHAIN;
17674     Info.memVT = VT;
17675     Info.ptrVal = I.getArgOperand(0);
17676     Info.offset = 0;
17677     Info.align = Alignment;
17678     Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
17679     return true;
17680   }
17681   default:
17682     break;
17683   }
17684 
17685   return false;
17686 }
17687 
17688 /// It returns EVT::Other if the type should be determined using generic
17689 /// target-independent logic.
17690 EVT PPCTargetLowering::getOptimalMemOpType(
17691     const MemOp &Op, const AttributeList &FuncAttributes) const {
17692   if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17693     // We should use Altivec/VSX loads and stores when available. For unaligned
17694     // addresses, unaligned VSX loads are only fast starting with the P8.
17695     if (Subtarget.hasAltivec() && Op.size() >= 16) {
17696       if (Op.isMemset() && Subtarget.hasVSX()) {
17697         uint64_t TailSize = Op.size() % 16;
17698         // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17699         // element if vector element type matches tail store. For tail size
17700         // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17701         if (TailSize > 2 && TailSize <= 4) {
17702           return MVT::v8i16;
17703         }
17704         return MVT::v4i32;
17705       }
17706       if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17707         return MVT::v4i32;
17708     }
17709   }
17710 
17711   if (Subtarget.isPPC64()) {
17712     return MVT::i64;
17713   }
17714 
17715   return MVT::i32;
17716 }
17717 
17718 /// Returns true if it is beneficial to convert a load of a constant
17719 /// to just the constant itself.
17720 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
17721                                                           Type *Ty) const {
17722   assert(Ty->isIntegerTy());
17723 
17724   unsigned BitSize = Ty->getPrimitiveSizeInBits();
17725   return !(BitSize == 0 || BitSize > 64);
17726 }
17727 
17728 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
17729   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17730     return false;
17731   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17732   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17733   return NumBits1 == 64 && NumBits2 == 32;
17734 }
17735 
17736 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
17737   if (!VT1.isInteger() || !VT2.isInteger())
17738     return false;
17739   unsigned NumBits1 = VT1.getSizeInBits();
17740   unsigned NumBits2 = VT2.getSizeInBits();
17741   return NumBits1 == 64 && NumBits2 == 32;
17742 }
17743 
17744 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
17745   // Generally speaking, zexts are not free, but they are free when they can be
17746   // folded with other operations.
17747   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17748     EVT MemVT = LD->getMemoryVT();
17749     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17750          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17751         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17752          LD->getExtensionType() == ISD::ZEXTLOAD))
17753       return true;
17754   }
17755 
17756   // FIXME: Add other cases...
17757   //  - 32-bit shifts with a zext to i64
17758   //  - zext after ctlz, bswap, etc.
17759   //  - zext after and by a constant mask
17760 
17761   return TargetLowering::isZExtFree(Val, VT2);
17762 }
17763 
17764 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17765   assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17766          "invalid fpext types");
17767   // Extending to float128 is not free.
17768   if (DestVT == MVT::f128)
17769     return false;
17770   return true;
17771 }
17772 
17773 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
17774   return isInt<16>(Imm) || isUInt<16>(Imm);
17775 }
17776 
17777 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
17778   return isInt<16>(Imm) || isUInt<16>(Imm);
17779 }
17780 
17781 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
17782                                                        MachineMemOperand::Flags,
17783                                                        unsigned *Fast) const {
17784   if (DisablePPCUnaligned)
17785     return false;
17786 
17787   // PowerPC supports unaligned memory access for simple non-vector types.
17788   // Although accessing unaligned addresses is not as efficient as accessing
17789   // aligned addresses, it is generally more efficient than manual expansion,
17790   // and generally only traps for software emulation when crossing page
17791   // boundaries.
17792 
17793   if (!VT.isSimple())
17794     return false;
17795 
17796   if (VT.isFloatingPoint() && !VT.isVector() &&
17797       !Subtarget.allowsUnalignedFPAccess())
17798     return false;
17799 
17800   if (VT.getSimpleVT().isVector()) {
17801     if (Subtarget.hasVSX()) {
17802       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17803           VT != MVT::v4f32 && VT != MVT::v4i32)
17804         return false;
17805     } else {
17806       return false;
17807     }
17808   }
17809 
17810   if (VT == MVT::ppcf128)
17811     return false;
17812 
17813   if (Fast)
17814     *Fast = 1;
17815 
17816   return true;
17817 }
17818 
17819 bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
17820                                                SDValue C) const {
17821   // Check integral scalar types.
17822   if (!VT.isScalarInteger())
17823     return false;
17824   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17825     if (!ConstNode->getAPIntValue().isSignedIntN(64))
17826       return false;
17827     // This transformation will generate >= 2 operations. But the following
17828     // cases will generate <= 2 instructions during ISEL. So exclude them.
17829     // 1. If the constant multiplier fits 16 bits, it can be handled by one
17830     // HW instruction, ie. MULLI
17831     // 2. If the multiplier after shifted fits 16 bits, an extra shift
17832     // instruction is needed than case 1, ie. MULLI and RLDICR
17833     int64_t Imm = ConstNode->getSExtValue();
17834     unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17835     Imm >>= Shift;
17836     if (isInt<16>(Imm))
17837       return false;
17838     uint64_t UImm = static_cast<uint64_t>(Imm);
17839     if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17840         isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17841       return true;
17842   }
17843   return false;
17844 }
17845 
17846 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
17847                                                    EVT VT) const {
17848   return isFMAFasterThanFMulAndFAdd(
17849       MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
17850 }
17851 
17852 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
17853                                                    Type *Ty) const {
17854   if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17855     return false;
17856   switch (Ty->getScalarType()->getTypeID()) {
17857   case Type::FloatTyID:
17858   case Type::DoubleTyID:
17859     return true;
17860   case Type::FP128TyID:
17861     return Subtarget.hasP9Vector();
17862   default:
17863     return false;
17864   }
17865 }
17866 
17867 // FIXME: add more patterns which are not profitable to hoist.
17868 bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
17869   if (!I->hasOneUse())
17870     return true;
17871 
17872   Instruction *User = I->user_back();
17873   assert(User && "A single use instruction with no uses.");
17874 
17875   switch (I->getOpcode()) {
17876   case Instruction::FMul: {
17877     // Don't break FMA, PowerPC prefers FMA.
17878     if (User->getOpcode() != Instruction::FSub &&
17879         User->getOpcode() != Instruction::FAdd)
17880       return true;
17881 
17882     const TargetOptions &Options = getTargetMachine().Options;
17883     const Function *F = I->getFunction();
17884     const DataLayout &DL = F->getDataLayout();
17885     Type *Ty = User->getOperand(0)->getType();
17886 
17887     return !(
17888         isFMAFasterThanFMulAndFAdd(*F, Ty) &&
17889         isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
17890         (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17891   }
17892   case Instruction::Load: {
17893     // Don't break "store (load float*)" pattern, this pattern will be combined
17894     // to "store (load int32)" in later InstCombine pass. See function
17895     // combineLoadToOperationType. On PowerPC, loading a float point takes more
17896     // cycles than loading a 32 bit integer.
17897     LoadInst *LI = cast<LoadInst>(I);
17898     // For the loads that combineLoadToOperationType does nothing, like
17899     // ordered load, it should be profitable to hoist them.
17900     // For swifterror load, it can only be used for pointer to pointer type, so
17901     // later type check should get rid of this case.
17902     if (!LI->isUnordered())
17903       return true;
17904 
17905     if (User->getOpcode() != Instruction::Store)
17906       return true;
17907 
17908     if (I->getType()->getTypeID() != Type::FloatTyID)
17909       return true;
17910 
17911     return false;
17912   }
17913   default:
17914     return true;
17915   }
17916   return true;
17917 }
17918 
17919 const MCPhysReg *
17920 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
17921   // LR is a callee-save register, but we must treat it as clobbered by any call
17922   // site. Hence we include LR in the scratch registers, which are in turn added
17923   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17924   // to CTR, which is used by any indirect call.
17925   static const MCPhysReg ScratchRegs[] = {
17926     PPC::X12, PPC::LR8, PPC::CTR8, 0
17927   };
17928 
17929   return ScratchRegs;
17930 }
17931 
17932 Register PPCTargetLowering::getExceptionPointerRegister(
17933     const Constant *PersonalityFn) const {
17934   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17935 }
17936 
17937 Register PPCTargetLowering::getExceptionSelectorRegister(
17938     const Constant *PersonalityFn) const {
17939   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17940 }
17941 
17942 bool
17943 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
17944                      EVT VT , unsigned DefinedValues) const {
17945   if (VT == MVT::v2i64)
17946     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17947 
17948   if (Subtarget.hasVSX())
17949     return true;
17950 
17951   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
17952 }
17953 
17954 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
17955   if (DisableILPPref || Subtarget.enableMachineScheduler())
17956     return TargetLowering::getSchedulingPreference(N);
17957 
17958   return Sched::ILP;
17959 }
17960 
17961 // Create a fast isel object.
17962 FastISel *
17963 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
17964                                   const TargetLibraryInfo *LibInfo) const {
17965   return PPC::createFastISel(FuncInfo, LibInfo);
17966 }
17967 
17968 // 'Inverted' means the FMA opcode after negating one multiplicand.
17969 // For example, (fma -a b c) = (fnmsub a b c)
17970 static unsigned invertFMAOpcode(unsigned Opc) {
17971   switch (Opc) {
17972   default:
17973     llvm_unreachable("Invalid FMA opcode for PowerPC!");
17974   case ISD::FMA:
17975     return PPCISD::FNMSUB;
17976   case PPCISD::FNMSUB:
17977     return ISD::FMA;
17978   }
17979 }
17980 
17981 SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
17982                                                 bool LegalOps, bool OptForSize,
17983                                                 NegatibleCost &Cost,
17984                                                 unsigned Depth) const {
17985   if (Depth > SelectionDAG::MaxRecursionDepth)
17986     return SDValue();
17987 
17988   unsigned Opc = Op.getOpcode();
17989   EVT VT = Op.getValueType();
17990   SDNodeFlags Flags = Op.getNode()->getFlags();
17991 
17992   switch (Opc) {
17993   case PPCISD::FNMSUB:
17994     if (!Op.hasOneUse() || !isTypeLegal(VT))
17995       break;
17996 
17997     const TargetOptions &Options = getTargetMachine().Options;
17998     SDValue N0 = Op.getOperand(0);
17999     SDValue N1 = Op.getOperand(1);
18000     SDValue N2 = Op.getOperand(2);
18001     SDLoc Loc(Op);
18002 
18003     NegatibleCost N2Cost = NegatibleCost::Expensive;
18004     SDValue NegN2 =
18005         getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
18006 
18007     if (!NegN2)
18008       return SDValue();
18009 
18010     // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
18011     // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
18012     // These transformations may change sign of zeroes. For example,
18013     // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
18014     if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
18015       // Try and choose the cheaper one to negate.
18016       NegatibleCost N0Cost = NegatibleCost::Expensive;
18017       SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
18018                                            N0Cost, Depth + 1);
18019 
18020       NegatibleCost N1Cost = NegatibleCost::Expensive;
18021       SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
18022                                            N1Cost, Depth + 1);
18023 
18024       if (NegN0 && N0Cost <= N1Cost) {
18025         Cost = std::min(N0Cost, N2Cost);
18026         return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
18027       } else if (NegN1) {
18028         Cost = std::min(N1Cost, N2Cost);
18029         return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
18030       }
18031     }
18032 
18033     // (fneg (fnmsub a b c)) => (fma a b (fneg c))
18034     if (isOperationLegal(ISD::FMA, VT)) {
18035       Cost = N2Cost;
18036       return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
18037     }
18038 
18039     break;
18040   }
18041 
18042   return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
18043                                               Cost, Depth);
18044 }
18045 
18046 // Override to enable LOAD_STACK_GUARD lowering on Linux.
18047 bool PPCTargetLowering::useLoadStackGuardNode(const Module &M) const {
18048   if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
18049     return true;
18050   return TargetLowering::useLoadStackGuardNode(M);
18051 }
18052 
18053 // Override to disable global variable loading on Linux and insert AIX canary
18054 // word declaration.
18055 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
18056   if (Subtarget.isAIXABI()) {
18057     M.getOrInsertGlobal(AIXSSPCanaryWordName,
18058                         PointerType::getUnqual(M.getContext()));
18059     return;
18060   }
18061   if (!Subtarget.isTargetLinux())
18062     return TargetLowering::insertSSPDeclarations(M);
18063 }
18064 
18065 Value *PPCTargetLowering::getSDagStackGuard(const Module &M) const {
18066   if (Subtarget.isAIXABI())
18067     return M.getGlobalVariable(AIXSSPCanaryWordName);
18068   return TargetLowering::getSDagStackGuard(M);
18069 }
18070 
18071 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
18072                                      bool ForCodeSize) const {
18073   if (!VT.isSimple() || !Subtarget.hasVSX())
18074     return false;
18075 
18076   switch(VT.getSimpleVT().SimpleTy) {
18077   default:
18078     // For FP types that are currently not supported by PPC backend, return
18079     // false. Examples: f16, f80.
18080     return false;
18081   case MVT::f32:
18082   case MVT::f64: {
18083     if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
18084       // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
18085       return true;
18086     }
18087     bool IsExact;
18088     APSInt IntResult(16, false);
18089     // The rounding mode doesn't really matter because we only care about floats
18090     // that can be converted to integers exactly.
18091     Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
18092     // For exact values in the range [-16, 15] we can materialize the float.
18093     if (IsExact && IntResult <= 15 && IntResult >= -16)
18094       return true;
18095     return Imm.isZero();
18096   }
18097   case MVT::ppcf128:
18098     return Imm.isPosZero();
18099   }
18100 }
18101 
18102 // For vector shift operation op, fold
18103 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
18104 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
18105                                   SelectionDAG &DAG) {
18106   SDValue N0 = N->getOperand(0);
18107   SDValue N1 = N->getOperand(1);
18108   EVT VT = N0.getValueType();
18109   unsigned OpSizeInBits = VT.getScalarSizeInBits();
18110   unsigned Opcode = N->getOpcode();
18111   unsigned TargetOpcode;
18112 
18113   switch (Opcode) {
18114   default:
18115     llvm_unreachable("Unexpected shift operation");
18116   case ISD::SHL:
18117     TargetOpcode = PPCISD::SHL;
18118     break;
18119   case ISD::SRL:
18120     TargetOpcode = PPCISD::SRL;
18121     break;
18122   case ISD::SRA:
18123     TargetOpcode = PPCISD::SRA;
18124     break;
18125   }
18126 
18127   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
18128       N1->getOpcode() == ISD::AND)
18129     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
18130       if (Mask->getZExtValue() == OpSizeInBits - 1)
18131         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
18132 
18133   return SDValue();
18134 }
18135 
18136 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
18137   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18138     return Value;
18139 
18140   SDValue N0 = N->getOperand(0);
18141   ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18142   if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
18143       N0.getOpcode() != ISD::SIGN_EXTEND ||
18144       N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
18145       N->getValueType(0) != MVT::i64)
18146     return SDValue();
18147 
18148   // We can't save an operation here if the value is already extended, and
18149   // the existing shift is easier to combine.
18150   SDValue ExtsSrc = N0.getOperand(0);
18151   if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
18152       ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
18153     return SDValue();
18154 
18155   SDLoc DL(N0);
18156   SDValue ShiftBy = SDValue(CN1, 0);
18157   // We want the shift amount to be i32 on the extswli, but the shift could
18158   // have an i64.
18159   if (ShiftBy.getValueType() == MVT::i64)
18160     ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
18161 
18162   return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
18163                          ShiftBy);
18164 }
18165 
18166 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
18167   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18168     return Value;
18169 
18170   return SDValue();
18171 }
18172 
18173 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
18174   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
18175     return Value;
18176 
18177   return SDValue();
18178 }
18179 
18180 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
18181 // Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
18182 // When C is zero, the equation (addi Z, -C) can be simplified to Z
18183 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
18184 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
18185                                  const PPCSubtarget &Subtarget) {
18186   if (!Subtarget.isPPC64())
18187     return SDValue();
18188 
18189   SDValue LHS = N->getOperand(0);
18190   SDValue RHS = N->getOperand(1);
18191 
18192   auto isZextOfCompareWithConstant = [](SDValue Op) {
18193     if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
18194         Op.getValueType() != MVT::i64)
18195       return false;
18196 
18197     SDValue Cmp = Op.getOperand(0);
18198     if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
18199         Cmp.getOperand(0).getValueType() != MVT::i64)
18200       return false;
18201 
18202     if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
18203       int64_t NegConstant = 0 - Constant->getSExtValue();
18204       // Due to the limitations of the addi instruction,
18205       // -C is required to be [-32768, 32767].
18206       return isInt<16>(NegConstant);
18207     }
18208 
18209     return false;
18210   };
18211 
18212   bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
18213   bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
18214 
18215   // If there is a pattern, canonicalize a zext operand to the RHS.
18216   if (LHSHasPattern && !RHSHasPattern)
18217     std::swap(LHS, RHS);
18218   else if (!LHSHasPattern && !RHSHasPattern)
18219     return SDValue();
18220 
18221   SDLoc DL(N);
18222   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
18223   SDValue Cmp = RHS.getOperand(0);
18224   SDValue Z = Cmp.getOperand(0);
18225   auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
18226   int64_t NegConstant = 0 - Constant->getSExtValue();
18227 
18228   switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
18229   default: break;
18230   case ISD::SETNE: {
18231     //                                 when C == 0
18232     //                             --> addze X, (addic Z, -1).carry
18233     //                            /
18234     // add X, (zext(setne Z, C))--
18235     //                            \    when -32768 <= -C <= 32767 && C != 0
18236     //                             --> addze X, (addic (addi Z, -C), -1).carry
18237     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18238                               DAG.getConstant(NegConstant, DL, MVT::i64));
18239     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18240     SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
18241                                AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
18242     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
18243                        SDValue(Addc.getNode(), 1));
18244     }
18245   case ISD::SETEQ: {
18246     //                                 when C == 0
18247     //                             --> addze X, (subfic Z, 0).carry
18248     //                            /
18249     // add X, (zext(sete  Z, C))--
18250     //                            \    when -32768 <= -C <= 32767 && C != 0
18251     //                             --> addze X, (subfic (addi Z, -C), 0).carry
18252     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
18253                               DAG.getConstant(NegConstant, DL, MVT::i64));
18254     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
18255     SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
18256                                DAG.getConstant(0, DL, MVT::i64), AddOrZ);
18257     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
18258                        SDValue(Subc.getNode(), 1));
18259     }
18260   }
18261 
18262   return SDValue();
18263 }
18264 
18265 // Transform
18266 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
18267 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
18268 // In this case both C1 and C2 must be known constants.
18269 // C1+C2 must fit into a 34 bit signed integer.
18270 static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
18271                                           const PPCSubtarget &Subtarget) {
18272   if (!Subtarget.isUsingPCRelativeCalls())
18273     return SDValue();
18274 
18275   // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
18276   // If we find that node try to cast the Global Address and the Constant.
18277   SDValue LHS = N->getOperand(0);
18278   SDValue RHS = N->getOperand(1);
18279 
18280   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18281     std::swap(LHS, RHS);
18282 
18283   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
18284     return SDValue();
18285 
18286   // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
18287   GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
18288   ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
18289 
18290   // Check that both casts succeeded.
18291   if (!GSDN || !ConstNode)
18292     return SDValue();
18293 
18294   int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
18295   SDLoc DL(GSDN);
18296 
18297   // The signed int offset needs to fit in 34 bits.
18298   if (!isInt<34>(NewOffset))
18299     return SDValue();
18300 
18301   // The new global address is a copy of the old global address except
18302   // that it has the updated Offset.
18303   SDValue GA =
18304       DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
18305                                  NewOffset, GSDN->getTargetFlags());
18306   SDValue MatPCRel =
18307       DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
18308   return MatPCRel;
18309 }
18310 
18311 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
18312   if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
18313     return Value;
18314 
18315   if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
18316     return Value;
18317 
18318   return SDValue();
18319 }
18320 
18321 // Detect TRUNCATE operations on bitcasts of float128 values.
18322 // What we are looking for here is the situtation where we extract a subset
18323 // of bits from a 128 bit float.
18324 // This can be of two forms:
18325 // 1) BITCAST of f128 feeding TRUNCATE
18326 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
18327 // The reason this is required is because we do not have a legal i128 type
18328 // and so we want to prevent having to store the f128 and then reload part
18329 // of it.
18330 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
18331                                            DAGCombinerInfo &DCI) const {
18332   // If we are using CRBits then try that first.
18333   if (Subtarget.useCRBits()) {
18334     // Check if CRBits did anything and return that if it did.
18335     if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
18336       return CRTruncValue;
18337   }
18338 
18339   SDLoc dl(N);
18340   SDValue Op0 = N->getOperand(0);
18341 
18342   // Looking for a truncate of i128 to i64.
18343   if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
18344     return SDValue();
18345 
18346   int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
18347 
18348   // SRL feeding TRUNCATE.
18349   if (Op0.getOpcode() == ISD::SRL) {
18350     ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
18351     // The right shift has to be by 64 bits.
18352     if (!ConstNode || ConstNode->getZExtValue() != 64)
18353       return SDValue();
18354 
18355     // Switch the element number to extract.
18356     EltToExtract = EltToExtract ? 0 : 1;
18357     // Update Op0 past the SRL.
18358     Op0 = Op0.getOperand(0);
18359   }
18360 
18361   // BITCAST feeding a TRUNCATE possibly via SRL.
18362   if (Op0.getOpcode() == ISD::BITCAST &&
18363       Op0.getValueType() == MVT::i128 &&
18364       Op0.getOperand(0).getValueType() == MVT::f128) {
18365     SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
18366     return DCI.DAG.getNode(
18367         ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
18368         DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
18369   }
18370   return SDValue();
18371 }
18372 
18373 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
18374   SelectionDAG &DAG = DCI.DAG;
18375 
18376   ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
18377   if (!ConstOpOrElement)
18378     return SDValue();
18379 
18380   // An imul is usually smaller than the alternative sequence for legal type.
18381   if (DAG.getMachineFunction().getFunction().hasMinSize() &&
18382       isOperationLegal(ISD::MUL, N->getValueType(0)))
18383     return SDValue();
18384 
18385   auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
18386     switch (this->Subtarget.getCPUDirective()) {
18387     default:
18388       // TODO: enhance the condition for subtarget before pwr8
18389       return false;
18390     case PPC::DIR_PWR8:
18391       //  type        mul     add    shl
18392       // scalar        4       1      1
18393       // vector        7       2      2
18394       return true;
18395     case PPC::DIR_PWR9:
18396     case PPC::DIR_PWR10:
18397     case PPC::DIR_PWR11:
18398     case PPC::DIR_PWR_FUTURE:
18399       //  type        mul     add    shl
18400       // scalar        5       2      2
18401       // vector        7       2      2
18402 
18403       // The cycle RATIO of related operations are showed as a table above.
18404       // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
18405       // scalar and vector type. For 2 instrs patterns, add/sub + shl
18406       // are 4, it is always profitable; but for 3 instrs patterns
18407       // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
18408       // So we should only do it for vector type.
18409       return IsAddOne && IsNeg ? VT.isVector() : true;
18410     }
18411   };
18412 
18413   EVT VT = N->getValueType(0);
18414   SDLoc DL(N);
18415 
18416   const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18417   bool IsNeg = MulAmt.isNegative();
18418   APInt MulAmtAbs = MulAmt.abs();
18419 
18420   if ((MulAmtAbs - 1).isPowerOf2()) {
18421     // (mul x, 2^N + 1) => (add (shl x, N), x)
18422     // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18423 
18424     if (!IsProfitable(IsNeg, true, VT))
18425       return SDValue();
18426 
18427     SDValue Op0 = N->getOperand(0);
18428     SDValue Op1 =
18429         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18430                     DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
18431     SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
18432 
18433     if (!IsNeg)
18434       return Res;
18435 
18436     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
18437   } else if ((MulAmtAbs + 1).isPowerOf2()) {
18438     // (mul x, 2^N - 1) => (sub (shl x, N), x)
18439     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18440 
18441     if (!IsProfitable(IsNeg, false, VT))
18442       return SDValue();
18443 
18444     SDValue Op0 = N->getOperand(0);
18445     SDValue Op1 =
18446         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18447                     DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
18448 
18449     if (!IsNeg)
18450       return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18451     else
18452       return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18453 
18454   } else {
18455     return SDValue();
18456   }
18457 }
18458 
18459 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18460 // in combiner since we need to check SD flags and other subtarget features.
18461 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18462                                           DAGCombinerInfo &DCI) const {
18463   SDValue N0 = N->getOperand(0);
18464   SDValue N1 = N->getOperand(1);
18465   SDValue N2 = N->getOperand(2);
18466   SDNodeFlags Flags = N->getFlags();
18467   EVT VT = N->getValueType(0);
18468   SelectionDAG &DAG = DCI.DAG;
18469   const TargetOptions &Options = getTargetMachine().Options;
18470   unsigned Opc = N->getOpcode();
18471   bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18472   bool LegalOps = !DCI.isBeforeLegalizeOps();
18473   SDLoc Loc(N);
18474 
18475   if (!isOperationLegal(ISD::FMA, VT))
18476     return SDValue();
18477 
18478   // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18479   // since (fnmsub a b c)=-0 while c-ab=+0.
18480   if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18481     return SDValue();
18482 
18483   // (fma (fneg a) b c) => (fnmsub a b c)
18484   // (fnmsub (fneg a) b c) => (fma a b c)
18485   if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18486     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18487 
18488   // (fma a (fneg b) c) => (fnmsub a b c)
18489   // (fnmsub a (fneg b) c) => (fma a b c)
18490   if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18491     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18492 
18493   return SDValue();
18494 }
18495 
18496 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18497   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18498   if (!Subtarget.is64BitELFABI())
18499     return false;
18500 
18501   // If not a tail call then no need to proceed.
18502   if (!CI->isTailCall())
18503     return false;
18504 
18505   // If sibling calls have been disabled and tail-calls aren't guaranteed
18506   // there is no reason to duplicate.
18507   auto &TM = getTargetMachine();
18508   if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18509     return false;
18510 
18511   // Can't tail call a function called indirectly, or if it has variadic args.
18512   const Function *Callee = CI->getCalledFunction();
18513   if (!Callee || Callee->isVarArg())
18514     return false;
18515 
18516   // Make sure the callee and caller calling conventions are eligible for tco.
18517   const Function *Caller = CI->getParent()->getParent();
18518   if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18519                                            CI->getCallingConv()))
18520       return false;
18521 
18522   // If the function is local then we have a good chance at tail-calling it
18523   return getTargetMachine().shouldAssumeDSOLocal(Callee);
18524 }
18525 
18526 bool PPCTargetLowering::
18527 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18528   const Value *Mask = AndI.getOperand(1);
18529   // If the mask is suitable for andi. or andis. we should sink the and.
18530   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18531     // Can't handle constants wider than 64-bits.
18532     if (CI->getBitWidth() > 64)
18533       return false;
18534     int64_t ConstVal = CI->getZExtValue();
18535     return isUInt<16>(ConstVal) ||
18536       (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18537   }
18538 
18539   // For non-constant masks, we can always use the record-form and.
18540   return true;
18541 }
18542 
18543 /// getAddrModeForFlags - Based on the set of address flags, select the most
18544 /// optimal instruction format to match by.
18545 PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18546   // This is not a node we should be handling here.
18547   if (Flags == PPC::MOF_None)
18548     return PPC::AM_None;
18549   // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18550   for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18551     if ((Flags & FlagSet) == FlagSet)
18552       return PPC::AM_DForm;
18553   for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18554     if ((Flags & FlagSet) == FlagSet)
18555       return PPC::AM_DSForm;
18556   for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18557     if ((Flags & FlagSet) == FlagSet)
18558       return PPC::AM_DQForm;
18559   for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18560     if ((Flags & FlagSet) == FlagSet)
18561       return PPC::AM_PrefixDForm;
18562   // If no other forms are selected, return an X-Form as it is the most
18563   // general addressing mode.
18564   return PPC::AM_XForm;
18565 }
18566 
18567 /// Set alignment flags based on whether or not the Frame Index is aligned.
18568 /// Utilized when computing flags for address computation when selecting
18569 /// load and store instructions.
18570 static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18571                                SelectionDAG &DAG) {
18572   bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18573   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18574   if (!FI)
18575     return;
18576   const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
18577   unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18578   // If this is (add $FI, $S16Imm), the alignment flags are already set
18579   // based on the immediate. We just need to clear the alignment flags
18580   // if the FI alignment is weaker.
18581   if ((FrameIndexAlign % 4) != 0)
18582     FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18583   if ((FrameIndexAlign % 16) != 0)
18584     FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18585   // If the address is a plain FrameIndex, set alignment flags based on
18586   // FI alignment.
18587   if (!IsAdd) {
18588     if ((FrameIndexAlign % 4) == 0)
18589       FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18590     if ((FrameIndexAlign % 16) == 0)
18591       FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18592   }
18593 }
18594 
18595 /// Given a node, compute flags that are used for address computation when
18596 /// selecting load and store instructions. The flags computed are stored in
18597 /// FlagSet. This function takes into account whether the node is a constant,
18598 /// an ADD, OR, or a constant, and computes the address flags accordingly.
18599 static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18600                                               SelectionDAG &DAG) {
18601   // Set the alignment flags for the node depending on if the node is
18602   // 4-byte or 16-byte aligned.
18603   auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18604     if ((Imm & 0x3) == 0)
18605       FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18606     if ((Imm & 0xf) == 0)
18607       FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18608   };
18609 
18610   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18611     // All 32-bit constants can be computed as LIS + Disp.
18612     const APInt &ConstImm = CN->getAPIntValue();
18613     if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18614       FlagSet |= PPC::MOF_AddrIsSImm32;
18615       SetAlignFlagsForImm(ConstImm.getZExtValue());
18616       setAlignFlagsForFI(N, FlagSet, DAG);
18617     }
18618     if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18619       FlagSet |= PPC::MOF_RPlusSImm34;
18620     else // Let constant materialization handle large constants.
18621       FlagSet |= PPC::MOF_NotAddNorCst;
18622   } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18623     // This address can be represented as an addition of:
18624     // - Register + Imm16 (possibly a multiple of 4/16)
18625     // - Register + Imm34
18626     // - Register + PPCISD::Lo
18627     // - Register + Register
18628     // In any case, we won't have to match this as Base + Zero.
18629     SDValue RHS = N.getOperand(1);
18630     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18631       const APInt &ConstImm = CN->getAPIntValue();
18632       if (ConstImm.isSignedIntN(16)) {
18633         FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18634         SetAlignFlagsForImm(ConstImm.getZExtValue());
18635         setAlignFlagsForFI(N, FlagSet, DAG);
18636       }
18637       if (ConstImm.isSignedIntN(34))
18638         FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18639       else
18640         FlagSet |= PPC::MOF_RPlusR; // Register.
18641     } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18642       FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18643     else
18644       FlagSet |= PPC::MOF_RPlusR;
18645   } else { // The address computation is not a constant or an addition.
18646     setAlignFlagsForFI(N, FlagSet, DAG);
18647     FlagSet |= PPC::MOF_NotAddNorCst;
18648   }
18649 }
18650 
18651 static bool isPCRelNode(SDValue N) {
18652   return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18653       isValidPCRelNode<ConstantPoolSDNode>(N) ||
18654       isValidPCRelNode<GlobalAddressSDNode>(N) ||
18655       isValidPCRelNode<JumpTableSDNode>(N) ||
18656       isValidPCRelNode<BlockAddressSDNode>(N));
18657 }
18658 
18659 /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18660 /// the address flags of the load/store instruction that is to be matched.
18661 unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18662                                            SelectionDAG &DAG) const {
18663   unsigned FlagSet = PPC::MOF_None;
18664 
18665   // Compute subtarget flags.
18666   if (!Subtarget.hasP9Vector())
18667     FlagSet |= PPC::MOF_SubtargetBeforeP9;
18668   else
18669     FlagSet |= PPC::MOF_SubtargetP9;
18670 
18671   if (Subtarget.hasPrefixInstrs())
18672     FlagSet |= PPC::MOF_SubtargetP10;
18673 
18674   if (Subtarget.hasSPE())
18675     FlagSet |= PPC::MOF_SubtargetSPE;
18676 
18677   // Check if we have a PCRel node and return early.
18678   if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18679     return FlagSet;
18680 
18681   // If the node is the paired load/store intrinsics, compute flags for
18682   // address computation and return early.
18683   unsigned ParentOp = Parent->getOpcode();
18684   if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18685                                (ParentOp == ISD::INTRINSIC_VOID))) {
18686     unsigned ID = Parent->getConstantOperandVal(1);
18687     if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18688       SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18689                              ? Parent->getOperand(2)
18690                              : Parent->getOperand(3);
18691       computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18692       FlagSet |= PPC::MOF_Vector;
18693       return FlagSet;
18694     }
18695   }
18696 
18697   // Mark this as something we don't want to handle here if it is atomic
18698   // or pre-increment instruction.
18699   if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18700     if (LSB->isIndexed())
18701       return PPC::MOF_None;
18702 
18703   // Compute in-memory type flags. This is based on if there are scalars,
18704   // floats or vectors.
18705   const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18706   assert(MN && "Parent should be a MemSDNode!");
18707   EVT MemVT = MN->getMemoryVT();
18708   unsigned Size = MemVT.getSizeInBits();
18709   if (MemVT.isScalarInteger()) {
18710     assert(Size <= 128 &&
18711            "Not expecting scalar integers larger than 16 bytes!");
18712     if (Size < 32)
18713       FlagSet |= PPC::MOF_SubWordInt;
18714     else if (Size == 32)
18715       FlagSet |= PPC::MOF_WordInt;
18716     else
18717       FlagSet |= PPC::MOF_DoubleWordInt;
18718   } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18719     if (Size == 128)
18720       FlagSet |= PPC::MOF_Vector;
18721     else if (Size == 256) {
18722       assert(Subtarget.pairedVectorMemops() &&
18723              "256-bit vectors are only available when paired vector memops is "
18724              "enabled!");
18725       FlagSet |= PPC::MOF_Vector;
18726     } else
18727       llvm_unreachable("Not expecting illegal vectors!");
18728   } else { // Floating point type: can be scalar, f128 or vector types.
18729     if (Size == 32 || Size == 64)
18730       FlagSet |= PPC::MOF_ScalarFloat;
18731     else if (MemVT == MVT::f128 || MemVT.isVector())
18732       FlagSet |= PPC::MOF_Vector;
18733     else
18734       llvm_unreachable("Not expecting illegal scalar floats!");
18735   }
18736 
18737   // Compute flags for address computation.
18738   computeFlagsForAddressComputation(N, FlagSet, DAG);
18739 
18740   // Compute type extension flags.
18741   if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18742     switch (LN->getExtensionType()) {
18743     case ISD::SEXTLOAD:
18744       FlagSet |= PPC::MOF_SExt;
18745       break;
18746     case ISD::EXTLOAD:
18747     case ISD::ZEXTLOAD:
18748       FlagSet |= PPC::MOF_ZExt;
18749       break;
18750     case ISD::NON_EXTLOAD:
18751       FlagSet |= PPC::MOF_NoExt;
18752       break;
18753     }
18754   } else
18755     FlagSet |= PPC::MOF_NoExt;
18756 
18757   // For integers, no extension is the same as zero extension.
18758   // We set the extension mode to zero extension so we don't have
18759   // to add separate entries in AddrModesMap for loads and stores.
18760   if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18761     FlagSet |= PPC::MOF_ZExt;
18762     FlagSet &= ~PPC::MOF_NoExt;
18763   }
18764 
18765   // If we don't have prefixed instructions, 34-bit constants should be
18766   // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18767   bool IsNonP1034BitConst =
18768       ((PPC::MOF_RPlusSImm34 | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubtargetP10) &
18769        FlagSet) == PPC::MOF_RPlusSImm34;
18770   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18771       IsNonP1034BitConst)
18772     FlagSet |= PPC::MOF_NotAddNorCst;
18773 
18774   return FlagSet;
18775 }
18776 
18777 /// SelectForceXFormMode - Given the specified address, force it to be
18778 /// represented as an indexed [r+r] operation (an XForm instruction).
18779 PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
18780                                                       SDValue &Base,
18781                                                       SelectionDAG &DAG) const {
18782 
18783   PPC::AddrMode Mode = PPC::AM_XForm;
18784   int16_t ForceXFormImm = 0;
18785   if (provablyDisjointOr(DAG, N) &&
18786       !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18787     Disp = N.getOperand(0);
18788     Base = N.getOperand(1);
18789     return Mode;
18790   }
18791 
18792   // If the address is the result of an add, we will utilize the fact that the
18793   // address calculation includes an implicit add.  However, we can reduce
18794   // register pressure if we do not materialize a constant just for use as the
18795   // index register.  We only get rid of the add if it is not an add of a
18796   // value and a 16-bit signed constant and both have a single use.
18797   if (N.getOpcode() == ISD::ADD &&
18798       (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18799        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18800     Disp = N.getOperand(0);
18801     Base = N.getOperand(1);
18802     return Mode;
18803   }
18804 
18805   // Otherwise, use R0 as the base register.
18806   Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18807                          N.getValueType());
18808   Base = N;
18809 
18810   return Mode;
18811 }
18812 
18813 bool PPCTargetLowering::splitValueIntoRegisterParts(
18814     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18815     unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18816   EVT ValVT = Val.getValueType();
18817   // If we are splitting a scalar integer into f64 parts (i.e. so they
18818   // can be placed into VFRC registers), we need to zero extend and
18819   // bitcast the values. This will ensure the value is placed into a
18820   // VSR using direct moves or stack operations as needed.
18821   if (PartVT == MVT::f64 &&
18822       (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18823     Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18824     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18825     Parts[0] = Val;
18826     return true;
18827   }
18828   return false;
18829 }
18830 
18831 SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18832                                           SelectionDAG &DAG) const {
18833   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18834   TargetLowering::CallLoweringInfo CLI(DAG);
18835   EVT RetVT = Op.getValueType();
18836   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18837   SDValue Callee =
18838       DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18839   bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
18840   TargetLowering::ArgListTy Args;
18841   TargetLowering::ArgListEntry Entry;
18842   for (const SDValue &N : Op->op_values()) {
18843     EVT ArgVT = N.getValueType();
18844     Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18845     Entry.Node = N;
18846     Entry.Ty = ArgTy;
18847     Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
18848     Entry.IsZExt = !Entry.IsSExt;
18849     Args.push_back(Entry);
18850   }
18851 
18852   SDValue InChain = DAG.getEntryNode();
18853   SDValue TCChain = InChain;
18854   const Function &F = DAG.getMachineFunction().getFunction();
18855   bool isTailCall =
18856       TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18857       (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18858   if (isTailCall)
18859     InChain = TCChain;
18860   CLI.setDebugLoc(SDLoc(Op))
18861       .setChain(InChain)
18862       .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18863       .setTailCall(isTailCall)
18864       .setSExtResult(SignExtend)
18865       .setZExtResult(!SignExtend)
18866       .setIsPostTypeLegalization(true);
18867   return TLI.LowerCallTo(CLI).first;
18868 }
18869 
18870 SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18871     const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18872     SelectionDAG &DAG) const {
18873   if (Op.getValueType() == MVT::f32)
18874     return lowerToLibCall(LibCallFloatName, Op, DAG);
18875 
18876   if (Op.getValueType() == MVT::f64)
18877     return lowerToLibCall(LibCallDoubleName, Op, DAG);
18878 
18879   return SDValue();
18880 }
18881 
18882 bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18883   SDNodeFlags Flags = Op.getNode()->getFlags();
18884   return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18885          Flags.hasNoNaNs() && Flags.hasNoInfs();
18886 }
18887 
18888 bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18889   return Op.getNode()->getFlags().hasApproximateFuncs();
18890 }
18891 
18892 bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18893   return getTargetMachine().Options.PPCGenScalarMASSEntries;
18894 }
18895 
18896 SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18897                                             const char *LibCallFloatName,
18898                                             const char *LibCallDoubleNameFinite,
18899                                             const char *LibCallFloatNameFinite,
18900                                             SDValue Op,
18901                                             SelectionDAG &DAG) const {
18902   if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18903     return SDValue();
18904 
18905   if (!isLowringToMASSFiniteSafe(Op))
18906     return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18907                                    DAG);
18908 
18909   return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18910                                  LibCallDoubleNameFinite, Op, DAG);
18911 }
18912 
18913 SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18914   return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18915                           "__xl_powf_finite", Op, DAG);
18916 }
18917 
18918 SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18919   return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18920                           "__xl_sinf_finite", Op, DAG);
18921 }
18922 
18923 SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18924   return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18925                           "__xl_cosf_finite", Op, DAG);
18926 }
18927 
18928 SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18929   return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18930                           "__xl_logf_finite", Op, DAG);
18931 }
18932 
18933 SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18934   return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18935                           "__xl_log10f_finite", Op, DAG);
18936 }
18937 
18938 SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18939   return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18940                           "__xl_expf_finite", Op, DAG);
18941 }
18942 
18943 // If we happen to match to an aligned D-Form, check if the Frame Index is
18944 // adequately aligned. If it is not, reset the mode to match to X-Form.
18945 static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18946                                    PPC::AddrMode &Mode) {
18947   if (!isa<FrameIndexSDNode>(N))
18948     return;
18949   if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18950       (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18951     Mode = PPC::AM_XForm;
18952 }
18953 
18954 /// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18955 /// compute the address flags of the node, get the optimal address mode based
18956 /// on the flags, and set the Base and Disp based on the address mode.
18957 PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
18958                                                        SDValue N, SDValue &Disp,
18959                                                        SDValue &Base,
18960                                                        SelectionDAG &DAG,
18961                                                        MaybeAlign Align) const {
18962   SDLoc DL(Parent);
18963 
18964   // Compute the address flags.
18965   unsigned Flags = computeMOFlags(Parent, N, DAG);
18966 
18967   // Get the optimal address mode based on the Flags.
18968   PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18969 
18970   // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18971   // Select an X-Form load if it is not.
18972   setXFormForUnalignedFI(N, Flags, Mode);
18973 
18974   // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18975   if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18976     assert(Subtarget.isUsingPCRelativeCalls() &&
18977            "Must be using PC-Relative calls when a valid PC-Relative node is "
18978            "present!");
18979     Mode = PPC::AM_PCRel;
18980   }
18981 
18982   // Set Base and Disp accordingly depending on the address mode.
18983   switch (Mode) {
18984   case PPC::AM_DForm:
18985   case PPC::AM_DSForm:
18986   case PPC::AM_DQForm: {
18987     // This is a register plus a 16-bit immediate. The base will be the
18988     // register and the displacement will be the immediate unless it
18989     // isn't sufficiently aligned.
18990     if (Flags & PPC::MOF_RPlusSImm16) {
18991       SDValue Op0 = N.getOperand(0);
18992       SDValue Op1 = N.getOperand(1);
18993       int16_t Imm = Op1->getAsZExtVal();
18994       if (!Align || isAligned(*Align, Imm)) {
18995         Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
18996         Base = Op0;
18997         if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18998           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18999           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19000         }
19001         break;
19002       }
19003     }
19004     // This is a register plus the @lo relocation. The base is the register
19005     // and the displacement is the global address.
19006     else if (Flags & PPC::MOF_RPlusLo) {
19007       Disp = N.getOperand(1).getOperand(0); // The global address.
19008       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
19009              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
19010              Disp.getOpcode() == ISD::TargetConstantPool ||
19011              Disp.getOpcode() == ISD::TargetJumpTable);
19012       Base = N.getOperand(0);
19013       break;
19014     }
19015     // This is a constant address at most 32 bits. The base will be
19016     // zero or load-immediate-shifted and the displacement will be
19017     // the low 16 bits of the address.
19018     else if (Flags & PPC::MOF_AddrIsSImm32) {
19019       auto *CN = cast<ConstantSDNode>(N);
19020       EVT CNType = CN->getValueType(0);
19021       uint64_t CNImm = CN->getZExtValue();
19022       // If this address fits entirely in a 16-bit sext immediate field, codegen
19023       // this as "d, 0".
19024       int16_t Imm;
19025       if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
19026         Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
19027         Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19028                                CNType);
19029         break;
19030       }
19031       // Handle 32-bit sext immediate with LIS + Addr mode.
19032       if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
19033           (!Align || isAligned(*Align, CNImm))) {
19034         int32_t Addr = (int32_t)CNImm;
19035         // Otherwise, break this down into LIS + Disp.
19036         Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
19037         Base =
19038             DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
19039         uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
19040         Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
19041         break;
19042       }
19043     }
19044     // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
19045     Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
19046     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
19047       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19048       fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19049     } else
19050       Base = N;
19051     break;
19052   }
19053   case PPC::AM_PrefixDForm: {
19054     int64_t Imm34 = 0;
19055     unsigned Opcode = N.getOpcode();
19056     if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
19057         (isIntS34Immediate(N.getOperand(1), Imm34))) {
19058       // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
19059       Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19060       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
19061         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19062       else
19063         Base = N.getOperand(0);
19064     } else if (isIntS34Immediate(N, Imm34)) {
19065       // The address is a 34-bit signed immediate.
19066       Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19067       Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
19068     }
19069     break;
19070   }
19071   case PPC::AM_PCRel: {
19072     // When selecting PC-Relative instructions, "Base" is not utilized as
19073     // we select the address as [PC+imm].
19074     Disp = N;
19075     break;
19076   }
19077   case PPC::AM_None:
19078     break;
19079   default: { // By default, X-Form is always available to be selected.
19080     // When a frame index is not aligned, we also match by XForm.
19081     FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
19082     Base = FI ? N : N.getOperand(1);
19083     Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19084                                 N.getValueType())
19085               : N.getOperand(0);
19086     break;
19087   }
19088   }
19089   return Mode;
19090 }
19091 
19092 CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
19093                                                  bool Return,
19094                                                  bool IsVarArg) const {
19095   switch (CC) {
19096   case CallingConv::Cold:
19097     return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
19098   default:
19099     return CC_PPC64_ELF;
19100   }
19101 }
19102 
19103 bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
19104   return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
19105 }
19106 
19107 TargetLowering::AtomicExpansionKind
19108 PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
19109   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
19110   if (shouldInlineQuadwordAtomics() && Size == 128)
19111     return AtomicExpansionKind::MaskedIntrinsic;
19112 
19113   switch (AI->getOperation()) {
19114   case AtomicRMWInst::UIncWrap:
19115   case AtomicRMWInst::UDecWrap:
19116   case AtomicRMWInst::USubCond:
19117   case AtomicRMWInst::USubSat:
19118     return AtomicExpansionKind::CmpXChg;
19119   default:
19120     return TargetLowering::shouldExpandAtomicRMWInIR(AI);
19121   }
19122 
19123   llvm_unreachable("unreachable atomicrmw operation");
19124 }
19125 
19126 TargetLowering::AtomicExpansionKind
19127 PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
19128   unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
19129   if (shouldInlineQuadwordAtomics() && Size == 128)
19130     return AtomicExpansionKind::MaskedIntrinsic;
19131   return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
19132 }
19133 
19134 static Intrinsic::ID
19135 getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
19136   switch (BinOp) {
19137   default:
19138     llvm_unreachable("Unexpected AtomicRMW BinOp");
19139   case AtomicRMWInst::Xchg:
19140     return Intrinsic::ppc_atomicrmw_xchg_i128;
19141   case AtomicRMWInst::Add:
19142     return Intrinsic::ppc_atomicrmw_add_i128;
19143   case AtomicRMWInst::Sub:
19144     return Intrinsic::ppc_atomicrmw_sub_i128;
19145   case AtomicRMWInst::And:
19146     return Intrinsic::ppc_atomicrmw_and_i128;
19147   case AtomicRMWInst::Or:
19148     return Intrinsic::ppc_atomicrmw_or_i128;
19149   case AtomicRMWInst::Xor:
19150     return Intrinsic::ppc_atomicrmw_xor_i128;
19151   case AtomicRMWInst::Nand:
19152     return Intrinsic::ppc_atomicrmw_nand_i128;
19153   }
19154 }
19155 
19156 Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
19157     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
19158     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
19159   assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19160   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19161   Type *ValTy = Incr->getType();
19162   assert(ValTy->getPrimitiveSizeInBits() == 128);
19163   Type *Int64Ty = Type::getInt64Ty(M->getContext());
19164   Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
19165   Value *IncrHi =
19166       Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
19167   Value *LoHi = Builder.CreateIntrinsic(
19168       getIntrinsicForAtomicRMWBinOp128(AI->getOperation()), {},
19169       {AlignedAddr, IncrLo, IncrHi});
19170   Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19171   Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19172   Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19173   Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19174   return Builder.CreateOr(
19175       Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19176 }
19177 
19178 Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
19179     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
19180     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
19181   assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
19182   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
19183   Type *ValTy = CmpVal->getType();
19184   assert(ValTy->getPrimitiveSizeInBits() == 128);
19185   Function *IntCmpXchg =
19186       Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
19187   Type *Int64Ty = Type::getInt64Ty(M->getContext());
19188   Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
19189   Value *CmpHi =
19190       Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
19191   Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
19192   Value *NewHi =
19193       Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
19194   emitLeadingFence(Builder, CI, Ord);
19195   Value *LoHi =
19196       Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
19197   emitTrailingFence(Builder, CI, Ord);
19198   Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
19199   Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
19200   Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
19201   Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
19202   return Builder.CreateOr(
19203       Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
19204 }
19205