xref: /llvm-project/llvm/lib/Target/X86/X86ISelLowering.h (revision 2068b1ba031e258a6448bea372005d19692c802a)
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/TargetLowering.h"
19 
20 namespace llvm {
21   class X86Subtarget;
22   class X86TargetMachine;
23 
24   namespace X86ISD {
25     // X86 Specific DAG Nodes
26   enum NodeType : unsigned {
27     // Start the numbering where the builtin ops leave off.
28     FIRST_NUMBER = ISD::BUILTIN_OP_END,
29 
30     /// Bit scan forward.
31     BSF,
32     /// Bit scan reverse.
33     BSR,
34 
35     /// X86 funnel/double shift i16 instructions. These correspond to
36     /// X86::SHLDW and X86::SHRDW instructions which have different amt
37     /// modulo rules to generic funnel shifts.
38     /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39     FSHL,
40     FSHR,
41 
42     /// Bitwise logical AND of floating point values. This corresponds
43     /// to X86::ANDPS or X86::ANDPD.
44     FAND,
45 
46     /// Bitwise logical OR of floating point values. This corresponds
47     /// to X86::ORPS or X86::ORPD.
48     FOR,
49 
50     /// Bitwise logical XOR of floating point values. This corresponds
51     /// to X86::XORPS or X86::XORPD.
52     FXOR,
53 
54     ///  Bitwise logical ANDNOT of floating point values. This
55     /// corresponds to X86::ANDNPS or X86::ANDNPD.
56     FANDN,
57 
58     /// These operations represent an abstract X86 call
59     /// instruction, which includes a bunch of information.  In particular the
60     /// operands of these node are:
61     ///
62     ///     #0 - The incoming token chain
63     ///     #1 - The callee
64     ///     #2 - The number of arg bytes the caller pushes on the stack.
65     ///     #3 - The number of arg bytes the callee pops off the stack.
66     ///     #4 - The value to pass in AL/AX/EAX (optional)
67     ///     #5 - The value to pass in DL/DX/EDX (optional)
68     ///
69     /// The result values of these nodes are:
70     ///
71     ///     #0 - The outgoing token chain
72     ///     #1 - The first register result value (optional)
73     ///     #2 - The second register result value (optional)
74     ///
75     CALL,
76 
77     /// Same as call except it adds the NoTrack prefix.
78     NT_CALL,
79 
80     // Pseudo for a OBJC call that gets emitted together with a special
81     // marker instruction.
82     CALL_RVMARKER,
83 
84     /// X86 compare and logical compare instructions.
85     CMP,
86     FCMP,
87     COMI,
88     UCOMI,
89 
90     // X86 compare with Intrinsics similar to COMI.
91     COMX,
92     UCOMX,
93 
94     /// X86 bit-test instructions.
95     BT,
96 
97     /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
98     /// operand, usually produced by a CMP instruction.
99     SETCC,
100 
101     /// X86 Select
102     SELECTS,
103 
104     // Same as SETCC except it's materialized with a sbb and the value is all
105     // one's or all zero's.
106     SETCC_CARRY, // R = carry_bit ? ~0 : 0
107 
108     /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
109     /// Operands are two FP values to compare; result is a mask of
110     /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
111     FSETCC,
112 
113     /// X86 FP SETCC, similar to above, but with output as an i1 mask and
114     /// and a version with SAE.
115     FSETCCM,
116     FSETCCM_SAE,
117 
118     /// X86 conditional moves. Operand 0 and operand 1 are the two values
119     /// to select from. Operand 2 is the condition code, and operand 3 is the
120     /// flag operand produced by a CMP or TEST instruction.
121     CMOV,
122 
123     /// X86 conditional branches. Operand 0 is the chain operand, operand 1
124     /// is the block to branch if condition is true, operand 2 is the
125     /// condition code, and operand 3 is the flag operand produced by a CMP
126     /// or TEST instruction.
127     BRCOND,
128 
129     /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
130     /// operand 1 is the target address.
131     NT_BRIND,
132 
133     /// Return with a glue operand. Operand 0 is the chain operand, operand
134     /// 1 is the number of bytes of stack to pop.
135     RET_GLUE,
136 
137     /// Return from interrupt. Operand 0 is the number of bytes to pop.
138     IRET,
139 
140     /// Repeat fill, corresponds to X86::REP_STOSx.
141     REP_STOS,
142 
143     /// Repeat move, corresponds to X86::REP_MOVSx.
144     REP_MOVS,
145 
146     /// On Darwin, this node represents the result of the popl
147     /// at function entry, used for PIC code.
148     GlobalBaseReg,
149 
150     /// A wrapper node for TargetConstantPool, TargetJumpTable,
151     /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
152     /// MCSymbol and TargetBlockAddress.
153     Wrapper,
154 
155     /// Special wrapper used under X86-64 PIC mode for RIP
156     /// relative displacements.
157     WrapperRIP,
158 
159     /// Copies a 64-bit value from an MMX vector to the low word
160     /// of an XMM vector, with the high word zero filled.
161     MOVQ2DQ,
162 
163     /// Copies a 64-bit value from the low word of an XMM vector
164     /// to an MMX vector.
165     MOVDQ2Q,
166 
167     /// Copies a 32-bit value from the low word of a MMX
168     /// vector to a GPR.
169     MMX_MOVD2W,
170 
171     /// Copies a GPR into the low 32-bit word of a MMX vector
172     /// and zero out the high word.
173     MMX_MOVW2D,
174 
175     /// Extract an 8-bit value from a vector and zero extend it to
176     /// i32, corresponds to X86::PEXTRB.
177     PEXTRB,
178 
179     /// Extract a 16-bit value from a vector and zero extend it to
180     /// i32, corresponds to X86::PEXTRW.
181     PEXTRW,
182 
183     /// Insert any element of a 4 x float vector into any element
184     /// of a destination 4 x floatvector.
185     INSERTPS,
186 
187     /// Insert the lower 8-bits of a 32-bit value to a vector,
188     /// corresponds to X86::PINSRB.
189     PINSRB,
190 
191     /// Insert the lower 16-bits of a 32-bit value to a vector,
192     /// corresponds to X86::PINSRW.
193     PINSRW,
194 
195     /// Shuffle 16 8-bit values within a vector.
196     PSHUFB,
197 
198     /// Compute Sum of Absolute Differences.
199     PSADBW,
200     /// Compute Double Block Packed Sum-Absolute-Differences
201     DBPSADBW,
202 
203     /// Bitwise Logical AND NOT of Packed FP values.
204     ANDNP,
205 
206     /// Blend where the selector is an immediate.
207     BLENDI,
208 
209     /// Dynamic (non-constant condition) vector blend where only the sign bits
210     /// of the condition elements are used. This is used to enforce that the
211     /// condition mask is not valid for generic VSELECT optimizations. This
212     /// is also used to implement the intrinsics.
213     /// Operands are in VSELECT order: MASK, TRUE, FALSE
214     BLENDV,
215 
216     /// Combined add and sub on an FP vector.
217     ADDSUB,
218 
219     //  FP vector ops with rounding mode.
220     FADD_RND,
221     FADDS,
222     FADDS_RND,
223     FSUB_RND,
224     FSUBS,
225     FSUBS_RND,
226     FMUL_RND,
227     FMULS,
228     FMULS_RND,
229     FDIV_RND,
230     FDIVS,
231     FDIVS_RND,
232     FMAX_SAE,
233     FMAXS_SAE,
234     FMIN_SAE,
235     FMINS_SAE,
236     FSQRT_RND,
237     FSQRTS,
238     FSQRTS_RND,
239 
240     // FP vector get exponent.
241     FGETEXP,
242     FGETEXP_SAE,
243     FGETEXPS,
244     FGETEXPS_SAE,
245     // Extract Normalized Mantissas.
246     VGETMANT,
247     VGETMANT_SAE,
248     VGETMANTS,
249     VGETMANTS_SAE,
250     // FP Scale.
251     SCALEF,
252     SCALEF_RND,
253     SCALEFS,
254     SCALEFS_RND,
255 
256     /// Integer horizontal add/sub.
257     HADD,
258     HSUB,
259 
260     /// Floating point horizontal add/sub.
261     FHADD,
262     FHSUB,
263 
264     // Detect Conflicts Within a Vector
265     CONFLICT,
266 
267     /// Floating point max and min.
268     FMAX,
269     FMIN,
270 
271     /// Commutative FMIN and FMAX.
272     FMAXC,
273     FMINC,
274 
275     /// Scalar intrinsic floating point max and min.
276     FMAXS,
277     FMINS,
278 
279     /// Floating point reciprocal-sqrt and reciprocal approximation.
280     /// Note that these typically require refinement
281     /// in order to obtain suitable precision.
282     FRSQRT,
283     FRCP,
284 
285     // AVX-512 reciprocal approximations with a little more precision.
286     RSQRT14,
287     RSQRT14S,
288     RCP14,
289     RCP14S,
290 
291     // Thread Local Storage.
292     TLSADDR,
293 
294     // Thread Local Storage. A call to get the start address
295     // of the TLS block for the current module.
296     TLSBASEADDR,
297 
298     // Thread Local Storage.  When calling to an OS provided
299     // thunk at the address from an earlier relocation.
300     TLSCALL,
301 
302     // Thread Local Storage. A descriptor containing pointer to
303     // code and to argument to get the TLS offset for the symbol.
304     TLSDESC,
305 
306     // Exception Handling helpers.
307     EH_RETURN,
308 
309     // SjLj exception handling setjmp.
310     EH_SJLJ_SETJMP,
311 
312     // SjLj exception handling longjmp.
313     EH_SJLJ_LONGJMP,
314 
315     // SjLj exception handling dispatch.
316     EH_SJLJ_SETUP_DISPATCH,
317 
318     /// Tail call return. See X86TargetLowering::LowerCall for
319     /// the list of operands.
320     TC_RETURN,
321 
322     // Vector move to low scalar and zero higher vector elements.
323     VZEXT_MOVL,
324 
325     // Vector integer truncate.
326     VTRUNC,
327     // Vector integer truncate with unsigned/signed saturation.
328     VTRUNCUS,
329     VTRUNCS,
330 
331     // Masked version of the above. Used when less than a 128-bit result is
332     // produced since the mask only applies to the lower elements and can't
333     // be represented by a select.
334     // SRC, PASSTHRU, MASK
335     VMTRUNC,
336     VMTRUNCUS,
337     VMTRUNCS,
338 
339     // Vector FP extend.
340     VFPEXT,
341     VFPEXT_SAE,
342     VFPEXTS,
343     VFPEXTS_SAE,
344 
345     // Vector FP round.
346     VFPROUND,
347     // Convert TWO packed single data to one packed data
348     VFPROUND2,
349     VFPROUND2_RND,
350     VFPROUND_RND,
351     VFPROUNDS,
352     VFPROUNDS_RND,
353 
354     // Masked version of above. Used for v2f64->v4f32.
355     // SRC, PASSTHRU, MASK
356     VMFPROUND,
357 
358     // 128-bit vector logical left / right shift
359     VSHLDQ,
360     VSRLDQ,
361 
362     // Vector shift elements
363     VSHL,
364     VSRL,
365     VSRA,
366 
367     // Vector variable shift
368     VSHLV,
369     VSRLV,
370     VSRAV,
371 
372     // Vector shift elements by immediate
373     VSHLI,
374     VSRLI,
375     VSRAI,
376 
377     // Shifts of mask registers.
378     KSHIFTL,
379     KSHIFTR,
380 
381     // Bit rotate by immediate
382     VROTLI,
383     VROTRI,
384 
385     // Vector packed double/float comparison.
386     CMPP,
387 
388     // Vector integer comparisons.
389     PCMPEQ,
390     PCMPGT,
391 
392     // v8i16 Horizontal minimum and position.
393     PHMINPOS,
394 
395     MULTISHIFT,
396 
397     /// Vector comparison generating mask bits for fp and
398     /// integer signed and unsigned data types.
399     CMPM,
400     // Vector mask comparison generating mask bits for FP values.
401     CMPMM,
402     // Vector mask comparison with SAE for FP values.
403     CMPMM_SAE,
404 
405     // Arithmetic operations with FLAGS results.
406     ADD,
407     SUB,
408     ADC,
409     SBB,
410     SMUL,
411     UMUL,
412     OR,
413     XOR,
414     AND,
415 
416     // Bit field extract.
417     BEXTR,
418     BEXTRI,
419 
420     // Zero High Bits Starting with Specified Bit Position.
421     BZHI,
422 
423     // Parallel extract and deposit.
424     PDEP,
425     PEXT,
426 
427     // X86-specific multiply by immediate.
428     MUL_IMM,
429 
430     // Vector sign bit extraction.
431     MOVMSK,
432 
433     // Vector bitwise comparisons.
434     PTEST,
435 
436     // Vector packed fp sign bitwise comparisons.
437     TESTP,
438 
439     // OR/AND test for masks.
440     KORTEST,
441     KTEST,
442 
443     // ADD for masks.
444     KADD,
445 
446     // Several flavors of instructions with vector shuffle behaviors.
447     // Saturated signed/unnsigned packing.
448     PACKSS,
449     PACKUS,
450     // Intra-lane alignr.
451     PALIGNR,
452     // AVX512 inter-lane alignr.
453     VALIGN,
454     PSHUFD,
455     PSHUFHW,
456     PSHUFLW,
457     SHUFP,
458     // VBMI2 Concat & Shift.
459     VSHLD,
460     VSHRD,
461     VSHLDV,
462     VSHRDV,
463     // Shuffle Packed Values at 128-bit granularity.
464     SHUF128,
465     MOVDDUP,
466     MOVSHDUP,
467     MOVSLDUP,
468     MOVLHPS,
469     MOVHLPS,
470     MOVSD,
471     MOVSS,
472     MOVSH,
473     UNPCKL,
474     UNPCKH,
475     VPERMILPV,
476     VPERMILPI,
477     VPERMI,
478     VPERM2X128,
479 
480     // Variable Permute (VPERM).
481     // Res = VPERMV MaskV, V0
482     VPERMV,
483 
484     // 3-op Variable Permute (VPERMT2).
485     // Res = VPERMV3 V0, MaskV, V1
486     VPERMV3,
487 
488     // Bitwise ternary logic.
489     VPTERNLOG,
490     // Fix Up Special Packed Float32/64 values.
491     VFIXUPIMM,
492     VFIXUPIMM_SAE,
493     VFIXUPIMMS,
494     VFIXUPIMMS_SAE,
495     // Range Restriction Calculation For Packed Pairs of Float32/64 values.
496     VRANGE,
497     VRANGE_SAE,
498     VRANGES,
499     VRANGES_SAE,
500     // Reduce - Perform Reduction Transformation on scalar\packed FP.
501     VREDUCE,
502     VREDUCE_SAE,
503     VREDUCES,
504     VREDUCES_SAE,
505     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
506     // Also used by the legacy (V)ROUND intrinsics where we mask out the
507     // scaling part of the immediate.
508     VRNDSCALE,
509     VRNDSCALE_SAE,
510     VRNDSCALES,
511     VRNDSCALES_SAE,
512     // Tests Types Of a FP Values for packed types.
513     VFPCLASS,
514     // Tests Types Of a FP Values for scalar types.
515     VFPCLASSS,
516 
517     // Broadcast (splat) scalar or element 0 of a vector. If the operand is
518     // a vector, this node may change the vector length as part of the splat.
519     VBROADCAST,
520     // Broadcast mask to vector.
521     VBROADCASTM,
522 
523     /// SSE4A Extraction and Insertion.
524     EXTRQI,
525     INSERTQI,
526 
527     // XOP arithmetic/logical shifts.
528     VPSHA,
529     VPSHL,
530     // XOP signed/unsigned integer comparisons.
531     VPCOM,
532     VPCOMU,
533     // XOP packed permute bytes.
534     VPPERM,
535     // XOP two source permutation.
536     VPERMIL2,
537 
538     // Vector multiply packed unsigned doubleword integers.
539     PMULUDQ,
540     // Vector multiply packed signed doubleword integers.
541     PMULDQ,
542     // Vector Multiply Packed UnsignedIntegers with Round and Scale.
543     MULHRS,
544 
545     // Multiply and Add Packed Integers.
546     VPMADDUBSW,
547     VPMADDWD,
548 
549     // AVX512IFMA multiply and add.
550     // NOTE: These are different than the instruction and perform
551     // op0 x op1 + op2.
552     VPMADD52L,
553     VPMADD52H,
554 
555     // VNNI
556     VPDPBUSD,
557     VPDPBUSDS,
558     VPDPWSSD,
559     VPDPWSSDS,
560 
561     // FMA nodes.
562     // We use the target independent ISD::FMA for the non-inverted case.
563     FNMADD,
564     FMSUB,
565     FNMSUB,
566     FMADDSUB,
567     FMSUBADD,
568 
569     // FMA with rounding mode.
570     FMADD_RND,
571     FNMADD_RND,
572     FMSUB_RND,
573     FNMSUB_RND,
574     FMADDSUB_RND,
575     FMSUBADD_RND,
576 
577     // AVX512-FP16 complex addition and multiplication.
578     VFMADDC,
579     VFMADDC_RND,
580     VFCMADDC,
581     VFCMADDC_RND,
582 
583     VFMULC,
584     VFMULC_RND,
585     VFCMULC,
586     VFCMULC_RND,
587 
588     VFMADDCSH,
589     VFMADDCSH_RND,
590     VFCMADDCSH,
591     VFCMADDCSH_RND,
592 
593     VFMULCSH,
594     VFMULCSH_RND,
595     VFCMULCSH,
596     VFCMULCSH_RND,
597 
598     VPDPBSUD,
599     VPDPBSUDS,
600     VPDPBUUD,
601     VPDPBUUDS,
602     VPDPBSSD,
603     VPDPBSSDS,
604 
605     VPDPWSUD,
606     VPDPWSUDS,
607     VPDPWUSD,
608     VPDPWUSDS,
609     VPDPWUUD,
610     VPDPWUUDS,
611 
612     VMINMAX,
613     VMINMAX_SAE,
614     VMINMAXS,
615     VMINMAXS_SAE,
616 
617     CVTP2IBS,
618     CVTP2IUBS,
619     CVTP2IBS_RND,
620     CVTP2IUBS_RND,
621     CVTTP2IBS,
622     CVTTP2IUBS,
623     CVTTP2IBS_SAE,
624     CVTTP2IUBS_SAE,
625 
626     MPSADBW,
627 
628     VCVT2PH2BF8,
629     VCVT2PH2BF8S,
630     VCVT2PH2HF8,
631     VCVT2PH2HF8S,
632     VCVTBIASPH2BF8,
633     VCVTBIASPH2BF8S,
634     VCVTBIASPH2HF8,
635     VCVTBIASPH2HF8S,
636     VCVTPH2BF8,
637     VCVTPH2BF8S,
638     VCVTPH2HF8,
639     VCVTPH2HF8S,
640     VMCVTBIASPH2BF8,
641     VMCVTBIASPH2BF8S,
642     VMCVTBIASPH2HF8,
643     VMCVTBIASPH2HF8S,
644     VMCVTPH2BF8,
645     VMCVTPH2BF8S,
646     VMCVTPH2HF8,
647     VMCVTPH2HF8S,
648     VCVTHF82PH,
649 
650     // Compress and expand.
651     COMPRESS,
652     EXPAND,
653 
654     // Bits shuffle
655     VPSHUFBITQMB,
656 
657     // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
658     SINT_TO_FP_RND,
659     UINT_TO_FP_RND,
660     SCALAR_SINT_TO_FP,
661     SCALAR_UINT_TO_FP,
662     SCALAR_SINT_TO_FP_RND,
663     SCALAR_UINT_TO_FP_RND,
664 
665     // Vector float/double to signed/unsigned integer.
666     CVTP2SI,
667     CVTP2UI,
668     CVTP2SI_RND,
669     CVTP2UI_RND,
670     // Scalar float/double to signed/unsigned integer.
671     CVTS2SI,
672     CVTS2UI,
673     CVTS2SI_RND,
674     CVTS2UI_RND,
675 
676     // Vector float/double to signed/unsigned integer with truncation.
677     CVTTP2SI,
678     CVTTP2UI,
679     CVTTP2SI_SAE,
680     CVTTP2UI_SAE,
681 
682     // Saturation enabled Vector float/double to signed/unsigned
683     // integer with truncation.
684     CVTTP2SIS,
685     CVTTP2UIS,
686     CVTTP2SIS_SAE,
687     CVTTP2UIS_SAE,
688     // Masked versions of above. Used for v2f64 to v4i32.
689     // SRC, PASSTHRU, MASK
690     MCVTTP2SIS,
691     MCVTTP2UIS,
692 
693     // Scalar float/double to signed/unsigned integer with truncation.
694     CVTTS2SI,
695     CVTTS2UI,
696     CVTTS2SI_SAE,
697     CVTTS2UI_SAE,
698 
699     // Vector signed/unsigned integer to float/double.
700     CVTSI2P,
701     CVTUI2P,
702 
703     // Scalar float/double to signed/unsigned integer with saturation.
704     CVTTS2SIS,
705     CVTTS2UIS,
706     CVTTS2SIS_SAE,
707     CVTTS2UIS_SAE,
708 
709     // Masked versions of above. Used for v2f64->v4f32.
710     // SRC, PASSTHRU, MASK
711     MCVTP2SI,
712     MCVTP2UI,
713     MCVTTP2SI,
714     MCVTTP2UI,
715     MCVTSI2P,
716     MCVTUI2P,
717 
718     // Vector float to bfloat16.
719     // Convert packed single data to packed BF16 data
720     CVTNEPS2BF16,
721     // Masked version of above.
722     // SRC, PASSTHRU, MASK
723     MCVTNEPS2BF16,
724 
725     // Dot product of BF16/FP16 pairs to accumulated into
726     // packed single precision.
727     DPBF16PS,
728     DPFP16PS,
729 
730     // A stack checking function call. On Windows it's _chkstk call.
731     DYN_ALLOCA,
732 
733     // For allocating variable amounts of stack space when using
734     // segmented stacks. Check if the current stacklet has enough space, and
735     // falls back to heap allocation if not.
736     SEG_ALLOCA,
737 
738     // For allocating stack space when using stack clash protector.
739     // Allocation is performed by block, and each block is probed.
740     PROBED_ALLOCA,
741 
742     // Memory barriers.
743     MFENCE,
744 
745     // Get a random integer and indicate whether it is valid in CF.
746     RDRAND,
747 
748     // Get a NIST SP800-90B & C compliant random integer and
749     // indicate whether it is valid in CF.
750     RDSEED,
751 
752     // Protection keys
753     // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
754     // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
755     // value for ECX.
756     RDPKRU,
757     WRPKRU,
758 
759     // SSE42 string comparisons.
760     // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
761     // will emit one or two instructions based on which results are used. If
762     // flags and index/mask this allows us to use a single instruction since
763     // we won't have to pick and opcode for flags. Instead we can rely on the
764     // DAG to CSE everything and decide at isel.
765     PCMPISTR,
766     PCMPESTR,
767 
768     // Test if in transactional execution.
769     XTEST,
770 
771     // Conversions between float and half-float.
772     CVTPS2PH,
773     CVTPS2PH_SAE,
774     CVTPH2PS,
775     CVTPH2PS_SAE,
776 
777     // Masked version of above.
778     // SRC, RND, PASSTHRU, MASK
779     MCVTPS2PH,
780     MCVTPS2PH_SAE,
781 
782     // Galois Field Arithmetic Instructions
783     GF2P8AFFINEINVQB,
784     GF2P8AFFINEQB,
785     GF2P8MULB,
786 
787     // LWP insert record.
788     LWPINS,
789 
790     // User level wait
791     UMWAIT,
792     TPAUSE,
793 
794     // Enqueue Stores Instructions
795     ENQCMD,
796     ENQCMDS,
797 
798     // For avx512-vp2intersect
799     VP2INTERSECT,
800 
801     // User level interrupts - testui
802     TESTUI,
803 
804     // Perform an FP80 add after changing precision control in FPCW.
805     FP80_ADD,
806 
807     // Conditional compare instructions
808     CCMP,
809     CTEST,
810 
811     /// X86 strict FP compare instructions.
812     FIRST_STRICTFP_OPCODE,
813     STRICT_FCMP = FIRST_STRICTFP_OPCODE,
814     STRICT_FCMPS,
815 
816     // Vector packed double/float comparison.
817     STRICT_CMPP,
818 
819     /// Vector comparison generating mask bits for fp and
820     /// integer signed and unsigned data types.
821     STRICT_CMPM,
822 
823     // Vector float/double to signed/unsigned integer with truncation.
824     STRICT_CVTTP2SI,
825     STRICT_CVTTP2UI,
826 
827     // Vector FP extend.
828     STRICT_VFPEXT,
829 
830     // Vector FP round.
831     STRICT_VFPROUND,
832 
833     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
834     // Also used by the legacy (V)ROUND intrinsics where we mask out the
835     // scaling part of the immediate.
836     STRICT_VRNDSCALE,
837 
838     // Vector signed/unsigned integer to float/double.
839     STRICT_CVTSI2P,
840     STRICT_CVTUI2P,
841 
842     // Strict FMA nodes.
843     STRICT_FNMADD,
844     STRICT_FMSUB,
845     STRICT_FNMSUB,
846 
847     // Conversions between float and half-float.
848     STRICT_CVTPS2PH,
849     STRICT_CVTPH2PS,
850 
851     // Perform an FP80 add after changing precision control in FPCW.
852     STRICT_FP80_ADD,
853 
854     /// Floating point max and min.
855     STRICT_FMAX,
856     STRICT_FMIN,
857     LAST_STRICTFP_OPCODE = STRICT_FMIN,
858 
859     // Compare and swap.
860     FIRST_MEMORY_OPCODE,
861     LCMPXCHG_DAG = FIRST_MEMORY_OPCODE,
862     LCMPXCHG8_DAG,
863     LCMPXCHG16_DAG,
864     LCMPXCHG16_SAVE_RBX_DAG,
865 
866     /// LOCK-prefixed arithmetic read-modify-write instructions.
867     /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
868     LADD,
869     LSUB,
870     LOR,
871     LXOR,
872     LAND,
873     LBTS,
874     LBTC,
875     LBTR,
876     LBTS_RM,
877     LBTC_RM,
878     LBTR_RM,
879 
880     /// RAO arithmetic instructions.
881     /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
882     AADD,
883     AOR,
884     AXOR,
885     AAND,
886 
887     // Load, scalar_to_vector, and zero extend.
888     VZEXT_LOAD,
889 
890     // extract_vector_elt, store.
891     VEXTRACT_STORE,
892 
893     // scalar broadcast from memory.
894     VBROADCAST_LOAD,
895 
896     // subvector broadcast from memory.
897     SUBV_BROADCAST_LOAD,
898 
899     // Store FP control word into i16 memory.
900     FNSTCW16m,
901 
902     // Load FP control word from i16 memory.
903     FLDCW16m,
904 
905     // Store x87 FPU environment into memory.
906     FNSTENVm,
907 
908     // Load x87 FPU environment from memory.
909     FLDENVm,
910 
911     // Custom handling for FP_TO_xINT_SAT
912     FP_TO_SINT_SAT,
913     FP_TO_UINT_SAT,
914 
915     /// This instruction implements FP_TO_SINT with the
916     /// integer destination in memory and a FP reg source.  This corresponds
917     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
918     /// has two inputs (token chain and address) and two outputs (int value
919     /// and token chain). Memory VT specifies the type to store to.
920     FP_TO_INT_IN_MEM,
921 
922     /// This instruction implements SINT_TO_FP with the
923     /// integer source in memory and FP reg result.  This corresponds to the
924     /// X86::FILD*m instructions. It has two inputs (token chain and address)
925     /// and two outputs (FP value and token chain). The integer source type is
926     /// specified by the memory VT.
927     FILD,
928 
929     /// This instruction implements a fp->int store from FP stack
930     /// slots. This corresponds to the fist instruction. It takes a
931     /// chain operand, value to store, address, and glue. The memory VT
932     /// specifies the type to store as.
933     FIST,
934 
935     /// This instruction implements an extending load to FP stack slots.
936     /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
937     /// operand, and ptr to load from. The memory VT specifies the type to
938     /// load from.
939     FLD,
940 
941     /// This instruction implements a truncating store from FP stack
942     /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
943     /// chain operand, value to store, address, and glue. The memory VT
944     /// specifies the type to store as.
945     FST,
946 
947     /// These instructions grab the address of the next argument
948     /// from a va_list. (reads and modifies the va_list in memory)
949     VAARG_64,
950     VAARG_X32,
951 
952     // Vector truncating store with unsigned/signed saturation
953     VTRUNCSTOREUS,
954     VTRUNCSTORES,
955     // Vector truncating masked store with unsigned/signed saturation
956     VMTRUNCSTOREUS,
957     VMTRUNCSTORES,
958 
959     // X86 specific gather and scatter
960     MGATHER,
961     MSCATTER,
962 
963     // Key locker nodes that produce flags.
964     AESENC128KL,
965     AESDEC128KL,
966     AESENC256KL,
967     AESDEC256KL,
968     AESENCWIDE128KL,
969     AESDECWIDE128KL,
970     AESENCWIDE256KL,
971     AESDECWIDE256KL,
972 
973     /// Compare and Add if Condition is Met. Compare value in operand 2 with
974     /// value in memory of operand 1. If condition of operand 4 is met, add
975     /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
976     /// always updated with the original value from operand 1.
977     CMPCCXADD,
978 
979     // Save xmm argument registers to the stack, according to %al. An operator
980     // is needed so that this can be expanded with control flow.
981     VASTART_SAVE_XMM_REGS,
982 
983     // Conditional load/store instructions
984     CLOAD,
985     CSTORE,
986     LAST_MEMORY_OPCODE = CSTORE,
987   };
988   } // end namespace X86ISD
989 
990   namespace X86 {
991     /// Current rounding mode is represented in bits 11:10 of FPSR. These
992     /// values are same as corresponding constants for rounding mode used
993     /// in glibc.
994     enum RoundingMode {
995       rmToNearest   = 0,        // FE_TONEAREST
996       rmDownward    = 1 << 10,  // FE_DOWNWARD
997       rmUpward      = 2 << 10,  // FE_UPWARD
998       rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
999       rmMask        = 3 << 10   // Bit mask selecting rounding mode
1000     };
1001   }
1002 
1003   /// Define some predicates that are used for node matching.
1004   namespace X86 {
1005     /// Returns true if Elt is a constant zero or floating point constant +0.0.
1006     bool isZeroNode(SDValue Elt);
1007 
1008     /// Returns true of the given offset can be
1009     /// fit into displacement field of the instruction.
1010     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
1011                                       bool hasSymbolicDisplacement);
1012 
1013     /// Determines whether the callee is required to pop its
1014     /// own arguments. Callee pop is necessary to support tail calls.
1015     bool isCalleePop(CallingConv::ID CallingConv,
1016                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
1017 
1018     /// If Op is a constant whose elements are all the same constant or
1019     /// undefined, return true and return the constant value in \p SplatVal.
1020     /// If we have undef bits that don't cover an entire element, we treat these
1021     /// as zero if AllowPartialUndefs is set, else we fail and return false.
1022     bool isConstantSplat(SDValue Op, APInt &SplatVal,
1023                          bool AllowPartialUndefs = true);
1024 
1025     /// Check if Op is a load operation that could be folded into some other x86
1026     /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
1027     bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
1028                      bool AssumeSingleUse = false);
1029 
1030     /// Check if Op is a load operation that could be folded into a vector splat
1031     /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
1032     bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
1033                                          const X86Subtarget &Subtarget,
1034                                          bool AssumeSingleUse = false);
1035 
1036     /// Check if Op is a value that could be used to fold a store into some
1037     /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
1038     bool mayFoldIntoStore(SDValue Op);
1039 
1040     /// Check if Op is an operation that could be folded into a zero extend x86
1041     /// instruction.
1042     bool mayFoldIntoZeroExtend(SDValue Op);
1043 
1044     /// True if the target supports the extended frame for async Swift
1045     /// functions.
1046     bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
1047                                             const MachineFunction &MF);
1048   } // end namespace X86
1049 
1050   //===--------------------------------------------------------------------===//
1051   //  X86 Implementation of the TargetLowering interface
1052   class X86TargetLowering final : public TargetLowering {
1053   public:
1054     explicit X86TargetLowering(const X86TargetMachine &TM,
1055                                const X86Subtarget &STI);
1056 
1057     unsigned getJumpTableEncoding() const override;
1058     bool useSoftFloat() const override;
1059 
1060     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
1061                                ArgListTy &Args) const override;
1062 
1063     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
1064       return MVT::i8;
1065     }
1066 
1067     const MCExpr *
1068     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1069                               const MachineBasicBlock *MBB, unsigned uid,
1070                               MCContext &Ctx) const override;
1071 
1072     /// Returns relocation base for the given PIC jumptable.
1073     SDValue getPICJumpTableRelocBase(SDValue Table,
1074                                      SelectionDAG &DAG) const override;
1075     const MCExpr *
1076     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
1077                                  unsigned JTI, MCContext &Ctx) const override;
1078 
1079     /// Return the desired alignment for ByVal aggregate
1080     /// function arguments in the caller parameter area. For X86, aggregates
1081     /// that contains are placed at 16-byte boundaries while the rest are at
1082     /// 4-byte boundaries.
1083     Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override;
1084 
1085     EVT getOptimalMemOpType(const MemOp &Op,
1086                             const AttributeList &FuncAttributes) const override;
1087 
1088     /// Returns true if it's safe to use load / store of the
1089     /// specified type to expand memcpy / memset inline. This is mostly true
1090     /// for all types except for some special cases. For example, on X86
1091     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
1092     /// also does type conversion. Note the specified type doesn't have to be
1093     /// legal as the hook is used before type legalization.
1094     bool isSafeMemOpType(MVT VT) const override;
1095 
1096     bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1097 
1098     /// Returns true if the target allows unaligned memory accesses of the
1099     /// specified type. Returns whether it is "fast" in the last argument.
1100     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1101                                         MachineMemOperand::Flags Flags,
1102                                         unsigned *Fast) const override;
1103 
1104     /// This function returns true if the memory access is aligned or if the
1105     /// target allows this specific unaligned memory access. If the access is
1106     /// allowed, the optional final parameter returns a relative speed of the
1107     /// access (as defined by the target).
1108     bool allowsMemoryAccess(
1109         LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1110         Align Alignment,
1111         MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1112         unsigned *Fast = nullptr) const override;
1113 
1114     bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1115                             const MachineMemOperand &MMO,
1116                             unsigned *Fast) const {
1117       return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
1118                                 MMO.getAlign(), MMO.getFlags(), Fast);
1119     }
1120 
1121     /// Provide custom lowering hooks for some operations.
1122     ///
1123     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1124 
1125     /// Replace the results of node with an illegal result
1126     /// type with new values built out of custom code.
1127     ///
1128     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1129                             SelectionDAG &DAG) const override;
1130 
1131     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
1132 
1133     bool preferABDSToABSWithNSW(EVT VT) const override;
1134 
1135     bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
1136                                    EVT ExtVT) const override;
1137 
1138     bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
1139                                            EVT VT) const override;
1140 
1141     /// Return true if the target has native support for
1142     /// the specified value type and it is 'desirable' to use the type for the
1143     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1144     /// instruction encodings are longer and some i16 instructions are slow.
1145     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1146 
1147     /// Return true if the target has native support for the
1148     /// specified value type and it is 'desirable' to use the type. e.g. On x86
1149     /// i16 is legal, but undesirable since i16 instruction encodings are longer
1150     /// and some i16 instructions are slow.
1151     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1152 
1153     /// Return prefered fold type, Abs if this is a vector, AddAnd if its an
1154     /// integer, None otherwise.
1155     TargetLowering::AndOrSETCCFoldKind
1156     isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
1157                                        const SDNode *SETCC0,
1158                                        const SDNode *SETCC1) const override;
1159 
1160     /// Return the newly negated expression if the cost is not expensive and
1161     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1162     /// do the negation.
1163     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1164                                  bool LegalOperations, bool ForCodeSize,
1165                                  NegatibleCost &Cost,
1166                                  unsigned Depth) const override;
1167 
1168     MachineBasicBlock *
1169     EmitInstrWithCustomInserter(MachineInstr &MI,
1170                                 MachineBasicBlock *MBB) const override;
1171 
1172     /// This method returns the name of a target specific DAG node.
1173     const char *getTargetNodeName(unsigned Opcode) const override;
1174 
1175     /// Do not merge vector stores after legalization because that may conflict
1176     /// with x86-specific store splitting optimizations.
1177     bool mergeStoresAfterLegalization(EVT MemVT) const override {
1178       return !MemVT.isVector();
1179     }
1180 
1181     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1182                           const MachineFunction &MF) const override;
1183 
1184     bool isCheapToSpeculateCttz(Type *Ty) const override;
1185 
1186     bool isCheapToSpeculateCtlz(Type *Ty) const override;
1187 
1188     bool isCtlzFast() const override;
1189 
1190     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1191       // If the pair to store is a mixture of float and int values, we will
1192       // save two bitwise instructions and one float-to-int instruction and
1193       // increase one store instruction. There is potentially a more
1194       // significant benefit because it avoids the float->int domain switch
1195       // for input value. So It is more likely a win.
1196       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1197           (LTy.isInteger() && HTy.isFloatingPoint()))
1198         return true;
1199       // If the pair only contains int values, we will save two bitwise
1200       // instructions and increase one store instruction (costing one more
1201       // store buffer). Since the benefit is more blurred so we leave
1202       // such pair out until we get testcase to prove it is a win.
1203       return false;
1204     }
1205 
1206     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1207 
1208     bool hasAndNotCompare(SDValue Y) const override;
1209 
1210     bool hasAndNot(SDValue Y) const override;
1211 
1212     bool hasBitTest(SDValue X, SDValue Y) const override;
1213 
1214     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1215         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1216         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1217         SelectionDAG &DAG) const override;
1218 
1219     unsigned preferedOpcodeForCmpEqPiecesOfOperand(
1220         EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
1221         const APInt &ShiftOrRotateAmt,
1222         const std::optional<APInt> &AndMask) const override;
1223 
1224     bool preferScalarizeSplat(SDNode *N) const override;
1225 
1226     CondMergingParams
1227     getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
1228                                   const Value *Rhs) const override;
1229 
1230     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1231                                            CombineLevel Level) const override;
1232 
1233     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1234 
1235     bool
1236     shouldTransformSignedTruncationCheck(EVT XVT,
1237                                          unsigned KeptBits) const override {
1238       // For vectors, we don't have a preference..
1239       if (XVT.isVector())
1240         return false;
1241 
1242       auto VTIsOk = [](EVT VT) -> bool {
1243         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1244                VT == MVT::i64;
1245       };
1246 
1247       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1248       // XVT will be larger than KeptBitsVT.
1249       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1250       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1251     }
1252 
1253     ShiftLegalizationStrategy
1254     preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1255                                        unsigned ExpansionFactor) const override;
1256 
1257     bool shouldSplatInsEltVarIndex(EVT VT) const override;
1258 
1259     bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1260       // Converting to sat variants holds little benefit on X86 as we will just
1261       // need to saturate the value back using fp arithmatic.
1262       return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1263     }
1264 
1265     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1266       return VT.isScalarInteger();
1267     }
1268 
1269     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1270     MVT hasFastEqualityCompare(unsigned NumBits) const override;
1271 
1272     /// Return the value type to use for ISD::SETCC.
1273     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1274                            EVT VT) const override;
1275 
1276     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1277                                       const APInt &DemandedElts,
1278                                       TargetLoweringOpt &TLO) const override;
1279 
1280     /// Determine which of the bits specified in Mask are known to be either
1281     /// zero or one and return them in the KnownZero/KnownOne bitsets.
1282     void computeKnownBitsForTargetNode(const SDValue Op,
1283                                        KnownBits &Known,
1284                                        const APInt &DemandedElts,
1285                                        const SelectionDAG &DAG,
1286                                        unsigned Depth = 0) const override;
1287 
1288     /// Determine the number of bits in the operation that are sign bits.
1289     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1290                                              const APInt &DemandedElts,
1291                                              const SelectionDAG &DAG,
1292                                              unsigned Depth) const override;
1293 
1294     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1295                                                  const APInt &DemandedElts,
1296                                                  APInt &KnownUndef,
1297                                                  APInt &KnownZero,
1298                                                  TargetLoweringOpt &TLO,
1299                                                  unsigned Depth) const override;
1300 
1301     bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1302                                                     const APInt &DemandedElts,
1303                                                     unsigned MaskIndex,
1304                                                     TargetLoweringOpt &TLO,
1305                                                     unsigned Depth) const;
1306 
1307     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1308                                            const APInt &DemandedBits,
1309                                            const APInt &DemandedElts,
1310                                            KnownBits &Known,
1311                                            TargetLoweringOpt &TLO,
1312                                            unsigned Depth) const override;
1313 
1314     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1315         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1316         SelectionDAG &DAG, unsigned Depth) const override;
1317 
1318     bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1319         SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1320         bool PoisonOnly, unsigned Depth) const override;
1321 
1322     bool canCreateUndefOrPoisonForTargetNode(
1323         SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1324         bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1325 
1326     bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1327                                    APInt &UndefElts, const SelectionDAG &DAG,
1328                                    unsigned Depth) const override;
1329 
1330     bool isTargetCanonicalConstantNode(SDValue Op) const override {
1331       // Peek through bitcasts/extracts/inserts to see if we have a broadcast
1332       // vector from memory.
1333       while (Op.getOpcode() == ISD::BITCAST ||
1334              Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1335              (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1336               Op.getOperand(0).isUndef()))
1337         Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1338 
1339       return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1340              TargetLowering::isTargetCanonicalConstantNode(Op);
1341     }
1342 
1343     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1344 
1345     SDValue unwrapAddress(SDValue N) const override;
1346 
1347     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1348 
1349     bool ExpandInlineAsm(CallInst *CI) const override;
1350 
1351     ConstraintType getConstraintType(StringRef Constraint) const override;
1352 
1353     /// Examine constraint string and operand type and determine a weight value.
1354     /// The operand object must already have been set up with the operand type.
1355     ConstraintWeight
1356       getSingleConstraintMatchWeight(AsmOperandInfo &Info,
1357                                      const char *Constraint) const override;
1358 
1359     const char *LowerXConstraint(EVT ConstraintVT) const override;
1360 
1361     /// Lower the specified operand into the Ops vector. If it is invalid, don't
1362     /// add anything to Ops. If hasMemory is true it means one of the asm
1363     /// constraint of the inline asm instruction being processed is 'm'.
1364     void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1365                                       std::vector<SDValue> &Ops,
1366                                       SelectionDAG &DAG) const override;
1367 
1368     InlineAsm::ConstraintCode
1369     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1370       if (ConstraintCode == "v")
1371         return InlineAsm::ConstraintCode::v;
1372       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1373     }
1374 
1375     /// Handle Lowering flag assembly outputs.
1376     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1377                                         const SDLoc &DL,
1378                                         const AsmOperandInfo &Constraint,
1379                                         SelectionDAG &DAG) const override;
1380 
1381     /// Given a physical register constraint
1382     /// (e.g. {edx}), return the register number and the register class for the
1383     /// register.  This should only be used for C_Register constraints.  On
1384     /// error, this returns a register number of 0.
1385     std::pair<unsigned, const TargetRegisterClass *>
1386     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1387                                  StringRef Constraint, MVT VT) const override;
1388 
1389     /// Return true if the addressing mode represented
1390     /// by AM is legal for this target, for a load/store of the specified type.
1391     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1392                                Type *Ty, unsigned AS,
1393                                Instruction *I = nullptr) const override;
1394 
1395     bool addressingModeSupportsTLS(const GlobalValue &GV) const override;
1396 
1397     /// Return true if the specified immediate is legal
1398     /// icmp immediate, that is the target has icmp instructions which can
1399     /// compare a register against the immediate without having to materialize
1400     /// the immediate into a register.
1401     bool isLegalICmpImmediate(int64_t Imm) const override;
1402 
1403     /// Return true if the specified immediate is legal
1404     /// add immediate, that is the target has add instructions which can
1405     /// add a register and the immediate without having to materialize
1406     /// the immediate into a register.
1407     bool isLegalAddImmediate(int64_t Imm) const override;
1408 
1409     bool isLegalStoreImmediate(int64_t Imm) const override;
1410 
1411     /// Add x86-specific opcodes to the default list.
1412     bool isBinOp(unsigned Opcode) const override;
1413 
1414     /// Returns true if the opcode is a commutative binary operation.
1415     bool isCommutativeBinOp(unsigned Opcode) const override;
1416 
1417     /// Return true if it's free to truncate a value of
1418     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1419     /// register EAX to i16 by referencing its sub-register AX.
1420     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1421     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1422 
1423     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1424 
1425     /// Return true if any actual instruction that defines a
1426     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1427     /// register. This does not necessarily include registers defined in
1428     /// unknown ways, such as incoming arguments, or copies from unknown
1429     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1430     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1431     /// all instructions that define 32-bit values implicit zero-extend the
1432     /// result out to 64 bits.
1433     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1434     bool isZExtFree(EVT VT1, EVT VT2) const override;
1435     bool isZExtFree(SDValue Val, EVT VT2) const override;
1436 
1437     bool shouldConvertPhiType(Type *From, Type *To) const override;
1438 
1439     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1440     /// extend node) is profitable.
1441     bool isVectorLoadExtDesirable(SDValue) const override;
1442 
1443     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1444     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1445     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1446     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1447                                     EVT VT) const override;
1448 
1449     /// Return true if it's profitable to narrow operations of type SrcVT to
1450     /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
1451     /// from i32 to i16.
1452     bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override;
1453 
1454     bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
1455                                               EVT VT) const override;
1456 
1457     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1458     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1459     /// true and stores the intrinsic information into the IntrinsicInfo that was
1460     /// passed to the function.
1461     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1462                             MachineFunction &MF,
1463                             unsigned Intrinsic) const override;
1464 
1465     /// Returns true if the target can instruction select the
1466     /// specified FP immediate natively. If false, the legalizer will
1467     /// materialize the FP immediate as a load from a constant pool.
1468     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1469                       bool ForCodeSize) const override;
1470 
1471     /// Targets can use this to indicate that they only support *some*
1472     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1473     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1474     /// be legal.
1475     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1476 
1477     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1478     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1479     /// constant pool entry.
1480     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1481 
1482     /// Returns true if lowering to a jump table is allowed.
1483     bool areJTsAllowed(const Function *Fn) const override;
1484 
1485     MVT getPreferredSwitchConditionType(LLVMContext &Context,
1486                                         EVT ConditionVT) const override;
1487 
1488     /// If true, then instruction selection should
1489     /// seek to shrink the FP constant of the specified type to a smaller type
1490     /// in order to save space and / or reduce runtime.
1491     bool ShouldShrinkFPConstant(EVT VT) const override;
1492 
1493     /// Return true if we believe it is correct and profitable to reduce the
1494     /// load node to a smaller type.
1495     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1496                                EVT NewVT) const override;
1497 
1498     /// Return true if the specified scalar FP type is computed in an SSE
1499     /// register, not on the X87 floating point stack.
1500     bool isScalarFPTypeInSSEReg(EVT VT) const;
1501 
1502     /// Returns true if it is beneficial to convert a load of a constant
1503     /// to just the constant itself.
1504     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1505                                            Type *Ty) const override;
1506 
1507     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1508 
1509     bool convertSelectOfConstantsToMath(EVT VT) const override;
1510 
1511     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1512                                 SDValue C) const override;
1513 
1514     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1515     /// with this index.
1516     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1517                                  unsigned Index) const override;
1518 
1519     /// Scalar ops always have equal or better analysis/performance/power than
1520     /// the vector equivalent, so this always makes sense if the scalar op is
1521     /// supported.
1522     bool shouldScalarizeBinop(SDValue) const override;
1523 
1524     /// Extract of a scalar FP value from index 0 of a vector is free.
1525     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1526       EVT EltVT = VT.getScalarType();
1527       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1528     }
1529 
1530     /// Overflow nodes should get combined/lowered to optimal instructions
1531     /// (they should allow eliminating explicit compares by getting flags from
1532     /// math ops).
1533     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1534                               bool MathUsed) const override;
1535 
1536     bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
1537                                       unsigned AddrSpace) const override {
1538       // If we can replace more than 2 scalar stores, there will be a reduction
1539       // in instructions even after we add a vector constant load.
1540       return IsZero || NumElem > 2;
1541     }
1542 
1543     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1544                                  const SelectionDAG &DAG,
1545                                  const MachineMemOperand &MMO) const override;
1546 
1547     Register getRegisterByName(const char* RegName, LLT VT,
1548                                const MachineFunction &MF) const override;
1549 
1550     /// If a physical register, this returns the register that receives the
1551     /// exception address on entry to an EH pad.
1552     Register
1553     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1554 
1555     /// If a physical register, this returns the register that receives the
1556     /// exception typeid on entry to a landing pad.
1557     Register
1558     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1559 
1560     bool needsFixedCatchObjects() const override;
1561 
1562     /// This method returns a target specific FastISel object,
1563     /// or null if the target does not support "fast" ISel.
1564     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1565                              const TargetLibraryInfo *libInfo) const override;
1566 
1567     /// If the target has a standard location for the stack protector cookie,
1568     /// returns the address of that location. Otherwise, returns nullptr.
1569     Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1570 
1571     bool useLoadStackGuardNode(const Module &M) const override;
1572     bool useStackGuardXorFP() const override;
1573     void insertSSPDeclarations(Module &M) const override;
1574     Value *getSDagStackGuard(const Module &M) const override;
1575     Function *getSSPStackGuardCheck(const Module &M) const override;
1576     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1577                                 const SDLoc &DL) const override;
1578 
1579 
1580     /// Return true if the target stores SafeStack pointer at a fixed offset in
1581     /// some non-standard address space, and populates the address space and
1582     /// offset as appropriate.
1583     Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1584 
1585     std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1586                                           SDValue Chain, SDValue Pointer,
1587                                           MachinePointerInfo PtrInfo,
1588                                           Align Alignment,
1589                                           SelectionDAG &DAG) const;
1590 
1591     /// Customize the preferred legalization strategy for certain types.
1592     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1593 
1594     bool softPromoteHalfType() const override { return true; }
1595 
1596     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1597                                       EVT VT) const override;
1598 
1599     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1600                                            CallingConv::ID CC,
1601                                            EVT VT) const override;
1602 
1603     unsigned getVectorTypeBreakdownForCallingConv(
1604         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1605         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1606 
1607     bool functionArgumentNeedsConsecutiveRegisters(
1608         Type *Ty, CallingConv::ID CallConv, bool isVarArg,
1609         const DataLayout &DL) const override;
1610 
1611     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1612 
1613     bool supportSwiftError() const override;
1614 
1615     bool supportKCFIBundles() const override { return true; }
1616 
1617     MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
1618                                 MachineBasicBlock::instr_iterator &MBBI,
1619                                 const TargetInstrInfo *TII) const override;
1620 
1621     bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1622     bool hasInlineStackProbe(const MachineFunction &MF) const override;
1623     StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1624 
1625     unsigned getStackProbeSize(const MachineFunction &MF) const;
1626 
1627     bool hasVectorBlend() const override { return true; }
1628 
1629     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1630 
1631     bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1632                                  unsigned OpNo) const override;
1633 
1634     SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1635                             MachineMemOperand *MMO, SDValue &NewLoad,
1636                             SDValue Ptr, SDValue PassThru,
1637                             SDValue Mask) const override;
1638     SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1639                              MachineMemOperand *MMO, SDValue Ptr, SDValue Val,
1640                              SDValue Mask) const override;
1641 
1642     /// Lower interleaved load(s) into target specific
1643     /// instructions/intrinsics.
1644     bool lowerInterleavedLoad(LoadInst *LI,
1645                               ArrayRef<ShuffleVectorInst *> Shuffles,
1646                               ArrayRef<unsigned> Indices,
1647                               unsigned Factor) const override;
1648 
1649     /// Lower interleaved store(s) into target specific
1650     /// instructions/intrinsics.
1651     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1652                                unsigned Factor) const override;
1653 
1654     SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
1655                                    int JTI, SelectionDAG &DAG) const override;
1656 
1657     Align getPrefLoopAlignment(MachineLoop *ML) const override;
1658 
1659     EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1660       if (VT == MVT::f80)
1661         return EVT::getIntegerVT(Context, 96);
1662       return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1663     }
1664 
1665   protected:
1666     std::pair<const TargetRegisterClass *, uint8_t>
1667     findRepresentativeClass(const TargetRegisterInfo *TRI,
1668                             MVT VT) const override;
1669 
1670   private:
1671     /// Keep a reference to the X86Subtarget around so that we can
1672     /// make the right decision when generating code for different targets.
1673     const X86Subtarget &Subtarget;
1674 
1675     /// A list of legal FP immediates.
1676     std::vector<APFloat> LegalFPImmediates;
1677 
1678     /// Indicate that this x86 target can instruction
1679     /// select the specified FP immediate natively.
1680     void addLegalFPImmediate(const APFloat& Imm) {
1681       LegalFPImmediates.push_back(Imm);
1682     }
1683 
1684     SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1685                             CallingConv::ID CallConv, bool isVarArg,
1686                             const SmallVectorImpl<ISD::InputArg> &Ins,
1687                             const SDLoc &dl, SelectionDAG &DAG,
1688                             SmallVectorImpl<SDValue> &InVals,
1689                             uint32_t *RegMask) const;
1690     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1691                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1692                              const SDLoc &dl, SelectionDAG &DAG,
1693                              const CCValAssign &VA, MachineFrameInfo &MFI,
1694                              unsigned i) const;
1695     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1696                              const SDLoc &dl, SelectionDAG &DAG,
1697                              const CCValAssign &VA,
1698                              ISD::ArgFlagsTy Flags, bool isByval) const;
1699 
1700     // Call lowering helpers.
1701 
1702     /// Check whether the call is eligible for tail call optimization. Targets
1703     /// that want to do tail call optimization should implement this function.
1704     bool IsEligibleForTailCallOptimization(
1705         TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
1706         SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const;
1707     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1708                                     SDValue Chain, bool IsTailCall,
1709                                     bool Is64Bit, int FPDiff,
1710                                     const SDLoc &dl) const;
1711 
1712     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1713                                          SelectionDAG &DAG) const;
1714 
1715     unsigned getAddressSpace() const;
1716 
1717     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1718                             SDValue &Chain) const;
1719     SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1720 
1721     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1722     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1723     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1724     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1725 
1726     unsigned getGlobalWrapperKind(const GlobalValue *GV,
1727                                   const unsigned char OpFlags) const;
1728     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1729     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1730     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1731     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1732     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1733 
1734     /// Creates target global address or external symbol nodes for calls or
1735     /// other uses.
1736     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1737                                   bool ForCall) const;
1738 
1739     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1740     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1741     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1742     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1743     SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1744     SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1745     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1746     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1747     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1748     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1749     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1750     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1751     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1752     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1753     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1754     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1755     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1756     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1757     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1758     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1759     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1760     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1761     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1762     SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1763     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1764     SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1765     SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1766     SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
1767     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1768     SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1769                                     SDValue &Chain) const;
1770     SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1771     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1772     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1773     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1774     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1775     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1776     SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1777 
1778     SDValue
1779     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1780                          const SmallVectorImpl<ISD::InputArg> &Ins,
1781                          const SDLoc &dl, SelectionDAG &DAG,
1782                          SmallVectorImpl<SDValue> &InVals) const override;
1783     SDValue LowerCall(CallLoweringInfo &CLI,
1784                       SmallVectorImpl<SDValue> &InVals) const override;
1785 
1786     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1787                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1788                         const SmallVectorImpl<SDValue> &OutVals,
1789                         const SDLoc &dl, SelectionDAG &DAG) const override;
1790 
1791     bool supportSplitCSR(MachineFunction *MF) const override {
1792       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1793           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1794     }
1795     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1796     void insertCopiesSplitCSR(
1797       MachineBasicBlock *Entry,
1798       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1799 
1800     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1801 
1802     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1803 
1804     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1805                             ISD::NodeType ExtendKind) const override;
1806 
1807     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1808                         bool isVarArg,
1809                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1810                         LLVMContext &Context,
1811                         const Type *RetTy) const override;
1812 
1813     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1814     ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
1815 
1816     TargetLoweringBase::AtomicExpansionKind
1817     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1818     TargetLoweringBase::AtomicExpansionKind
1819     shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1820     TargetLoweringBase::AtomicExpansionKind
1821     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1822     TargetLoweringBase::AtomicExpansionKind
1823     shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1824     void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1825     void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1826 
1827     LoadInst *
1828     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1829 
1830     bool needsCmpXchgNb(Type *MemType) const;
1831 
1832     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1833                                 MachineBasicBlock *DispatchBB, int FI) const;
1834 
1835     // Utility function to emit the low-level va_arg code for X86-64.
1836     MachineBasicBlock *
1837     EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1838 
1839     /// Utility function to emit the xmm reg save portion of va_start.
1840     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1841                                                  MachineInstr &MI2,
1842                                                  MachineBasicBlock *BB) const;
1843 
1844     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1845                                          MachineBasicBlock *BB) const;
1846 
1847     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1848                                            MachineBasicBlock *BB) const;
1849 
1850     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1851                                             MachineBasicBlock *BB) const;
1852 
1853     MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1854                                                MachineBasicBlock *BB) const;
1855 
1856     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1857                                           MachineBasicBlock *BB) const;
1858 
1859     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1860                                                 MachineBasicBlock *BB) const;
1861 
1862     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1863                                         MachineBasicBlock *MBB) const;
1864 
1865     void emitSetJmpShadowStackFix(MachineInstr &MI,
1866                                   MachineBasicBlock *MBB) const;
1867 
1868     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1869                                          MachineBasicBlock *MBB) const;
1870 
1871     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1872                                                  MachineBasicBlock *MBB) const;
1873 
1874     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1875                                              MachineBasicBlock *MBB) const;
1876 
1877     MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI,
1878                                               MachineBasicBlock *MBB) const;
1879 
1880     /// Emit flags for the given setcc condition and operands. Also returns the
1881     /// corresponding X86 condition code constant in X86CC.
1882     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1883                               const SDLoc &dl, SelectionDAG &DAG,
1884                               SDValue &X86CC) const;
1885 
1886     bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
1887                                              SDValue IntPow2) const override;
1888 
1889     /// Check if replacement of SQRT with RSQRT should be disabled.
1890     bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1891 
1892     /// Use rsqrt* to speed up sqrt calculations.
1893     SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1894                             int &RefinementSteps, bool &UseOneConstNR,
1895                             bool Reciprocal) const override;
1896 
1897     /// Use rcp* to speed up fdiv calculations.
1898     SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1899                              int &RefinementSteps) const override;
1900 
1901     /// Reassociate floating point divisions into multiply by reciprocal.
1902     unsigned combineRepeatedFPDivisors() const override;
1903 
1904     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1905                           SmallVectorImpl<SDNode *> &Created) const override;
1906 
1907     SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
1908                     SDValue V2) const;
1909   };
1910 
1911   namespace X86 {
1912     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1913                              const TargetLibraryInfo *libInfo);
1914   } // end namespace X86
1915 
1916   // X86 specific Gather/Scatter nodes.
1917   // The class has the same order of operands as MaskedGatherScatterSDNode for
1918   // convenience.
1919   class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1920   public:
1921     // This is a intended as a utility and should never be directly created.
1922     X86MaskedGatherScatterSDNode() = delete;
1923     ~X86MaskedGatherScatterSDNode() = delete;
1924 
1925     const SDValue &getBasePtr() const { return getOperand(3); }
1926     const SDValue &getIndex()   const { return getOperand(4); }
1927     const SDValue &getMask()    const { return getOperand(2); }
1928     const SDValue &getScale()   const { return getOperand(5); }
1929 
1930     static bool classof(const SDNode *N) {
1931       return N->getOpcode() == X86ISD::MGATHER ||
1932              N->getOpcode() == X86ISD::MSCATTER;
1933     }
1934   };
1935 
1936   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1937   public:
1938     const SDValue &getPassThru() const { return getOperand(1); }
1939 
1940     static bool classof(const SDNode *N) {
1941       return N->getOpcode() == X86ISD::MGATHER;
1942     }
1943   };
1944 
1945   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1946   public:
1947     const SDValue &getValue() const { return getOperand(1); }
1948 
1949     static bool classof(const SDNode *N) {
1950       return N->getOpcode() == X86ISD::MSCATTER;
1951     }
1952   };
1953 
1954   /// Generate unpacklo/unpackhi shuffle mask.
1955   void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1956                                bool Unary);
1957 
1958   /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1959   /// imposed by AVX and specific to the unary pattern. Example:
1960   /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1961   /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1962   void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1963 
1964 } // end namespace llvm
1965 
1966 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1967