1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that X86 uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 16 17 #include "llvm/CodeGen/MachineFunction.h" 18 #include "llvm/CodeGen/TargetLowering.h" 19 20 namespace llvm { 21 class X86Subtarget; 22 class X86TargetMachine; 23 24 namespace X86ISD { 25 // X86 Specific DAG Nodes 26 enum NodeType : unsigned { 27 // Start the numbering where the builtin ops leave off. 28 FIRST_NUMBER = ISD::BUILTIN_OP_END, 29 30 /// Bit scan forward. 31 BSF, 32 /// Bit scan reverse. 33 BSR, 34 35 /// X86 funnel/double shift i16 instructions. These correspond to 36 /// X86::SHLDW and X86::SHRDW instructions which have different amt 37 /// modulo rules to generic funnel shifts. 38 /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. 39 FSHL, 40 FSHR, 41 42 /// Bitwise logical AND of floating point values. This corresponds 43 /// to X86::ANDPS or X86::ANDPD. 44 FAND, 45 46 /// Bitwise logical OR of floating point values. This corresponds 47 /// to X86::ORPS or X86::ORPD. 48 FOR, 49 50 /// Bitwise logical XOR of floating point values. This corresponds 51 /// to X86::XORPS or X86::XORPD. 52 FXOR, 53 54 /// Bitwise logical ANDNOT of floating point values. This 55 /// corresponds to X86::ANDNPS or X86::ANDNPD. 56 FANDN, 57 58 /// These operations represent an abstract X86 call 59 /// instruction, which includes a bunch of information. In particular the 60 /// operands of these node are: 61 /// 62 /// #0 - The incoming token chain 63 /// #1 - The callee 64 /// #2 - The number of arg bytes the caller pushes on the stack. 65 /// #3 - The number of arg bytes the callee pops off the stack. 66 /// #4 - The value to pass in AL/AX/EAX (optional) 67 /// #5 - The value to pass in DL/DX/EDX (optional) 68 /// 69 /// The result values of these nodes are: 70 /// 71 /// #0 - The outgoing token chain 72 /// #1 - The first register result value (optional) 73 /// #2 - The second register result value (optional) 74 /// 75 CALL, 76 77 /// Same as call except it adds the NoTrack prefix. 78 NT_CALL, 79 80 // Pseudo for a OBJC call that gets emitted together with a special 81 // marker instruction. 82 CALL_RVMARKER, 83 84 /// X86 compare and logical compare instructions. 85 CMP, 86 FCMP, 87 COMI, 88 UCOMI, 89 90 // X86 compare with Intrinsics similar to COMI. 91 COMX, 92 UCOMX, 93 94 /// X86 bit-test instructions. 95 BT, 96 97 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 98 /// operand, usually produced by a CMP instruction. 99 SETCC, 100 101 /// X86 Select 102 SELECTS, 103 104 // Same as SETCC except it's materialized with a sbb and the value is all 105 // one's or all zero's. 106 SETCC_CARRY, // R = carry_bit ? ~0 : 0 107 108 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 109 /// Operands are two FP values to compare; result is a mask of 110 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 111 FSETCC, 112 113 /// X86 FP SETCC, similar to above, but with output as an i1 mask and 114 /// and a version with SAE. 115 FSETCCM, 116 FSETCCM_SAE, 117 118 /// X86 conditional moves. Operand 0 and operand 1 are the two values 119 /// to select from. Operand 2 is the condition code, and operand 3 is the 120 /// flag operand produced by a CMP or TEST instruction. 121 CMOV, 122 123 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 124 /// is the block to branch if condition is true, operand 2 is the 125 /// condition code, and operand 3 is the flag operand produced by a CMP 126 /// or TEST instruction. 127 BRCOND, 128 129 /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and 130 /// operand 1 is the target address. 131 NT_BRIND, 132 133 /// Return with a glue operand. Operand 0 is the chain operand, operand 134 /// 1 is the number of bytes of stack to pop. 135 RET_GLUE, 136 137 /// Return from interrupt. Operand 0 is the number of bytes to pop. 138 IRET, 139 140 /// Repeat fill, corresponds to X86::REP_STOSx. 141 REP_STOS, 142 143 /// Repeat move, corresponds to X86::REP_MOVSx. 144 REP_MOVS, 145 146 /// On Darwin, this node represents the result of the popl 147 /// at function entry, used for PIC code. 148 GlobalBaseReg, 149 150 /// A wrapper node for TargetConstantPool, TargetJumpTable, 151 /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, 152 /// MCSymbol and TargetBlockAddress. 153 Wrapper, 154 155 /// Special wrapper used under X86-64 PIC mode for RIP 156 /// relative displacements. 157 WrapperRIP, 158 159 /// Copies a 64-bit value from an MMX vector to the low word 160 /// of an XMM vector, with the high word zero filled. 161 MOVQ2DQ, 162 163 /// Copies a 64-bit value from the low word of an XMM vector 164 /// to an MMX vector. 165 MOVDQ2Q, 166 167 /// Copies a 32-bit value from the low word of a MMX 168 /// vector to a GPR. 169 MMX_MOVD2W, 170 171 /// Copies a GPR into the low 32-bit word of a MMX vector 172 /// and zero out the high word. 173 MMX_MOVW2D, 174 175 /// Extract an 8-bit value from a vector and zero extend it to 176 /// i32, corresponds to X86::PEXTRB. 177 PEXTRB, 178 179 /// Extract a 16-bit value from a vector and zero extend it to 180 /// i32, corresponds to X86::PEXTRW. 181 PEXTRW, 182 183 /// Insert any element of a 4 x float vector into any element 184 /// of a destination 4 x floatvector. 185 INSERTPS, 186 187 /// Insert the lower 8-bits of a 32-bit value to a vector, 188 /// corresponds to X86::PINSRB. 189 PINSRB, 190 191 /// Insert the lower 16-bits of a 32-bit value to a vector, 192 /// corresponds to X86::PINSRW. 193 PINSRW, 194 195 /// Shuffle 16 8-bit values within a vector. 196 PSHUFB, 197 198 /// Compute Sum of Absolute Differences. 199 PSADBW, 200 /// Compute Double Block Packed Sum-Absolute-Differences 201 DBPSADBW, 202 203 /// Bitwise Logical AND NOT of Packed FP values. 204 ANDNP, 205 206 /// Blend where the selector is an immediate. 207 BLENDI, 208 209 /// Dynamic (non-constant condition) vector blend where only the sign bits 210 /// of the condition elements are used. This is used to enforce that the 211 /// condition mask is not valid for generic VSELECT optimizations. This 212 /// is also used to implement the intrinsics. 213 /// Operands are in VSELECT order: MASK, TRUE, FALSE 214 BLENDV, 215 216 /// Combined add and sub on an FP vector. 217 ADDSUB, 218 219 // FP vector ops with rounding mode. 220 FADD_RND, 221 FADDS, 222 FADDS_RND, 223 FSUB_RND, 224 FSUBS, 225 FSUBS_RND, 226 FMUL_RND, 227 FMULS, 228 FMULS_RND, 229 FDIV_RND, 230 FDIVS, 231 FDIVS_RND, 232 FMAX_SAE, 233 FMAXS_SAE, 234 FMIN_SAE, 235 FMINS_SAE, 236 FSQRT_RND, 237 FSQRTS, 238 FSQRTS_RND, 239 240 // FP vector get exponent. 241 FGETEXP, 242 FGETEXP_SAE, 243 FGETEXPS, 244 FGETEXPS_SAE, 245 // Extract Normalized Mantissas. 246 VGETMANT, 247 VGETMANT_SAE, 248 VGETMANTS, 249 VGETMANTS_SAE, 250 // FP Scale. 251 SCALEF, 252 SCALEF_RND, 253 SCALEFS, 254 SCALEFS_RND, 255 256 /// Integer horizontal add/sub. 257 HADD, 258 HSUB, 259 260 /// Floating point horizontal add/sub. 261 FHADD, 262 FHSUB, 263 264 // Detect Conflicts Within a Vector 265 CONFLICT, 266 267 /// Floating point max and min. 268 FMAX, 269 FMIN, 270 271 /// Commutative FMIN and FMAX. 272 FMAXC, 273 FMINC, 274 275 /// Scalar intrinsic floating point max and min. 276 FMAXS, 277 FMINS, 278 279 /// Floating point reciprocal-sqrt and reciprocal approximation. 280 /// Note that these typically require refinement 281 /// in order to obtain suitable precision. 282 FRSQRT, 283 FRCP, 284 285 // AVX-512 reciprocal approximations with a little more precision. 286 RSQRT14, 287 RSQRT14S, 288 RCP14, 289 RCP14S, 290 291 // Thread Local Storage. 292 TLSADDR, 293 294 // Thread Local Storage. A call to get the start address 295 // of the TLS block for the current module. 296 TLSBASEADDR, 297 298 // Thread Local Storage. When calling to an OS provided 299 // thunk at the address from an earlier relocation. 300 TLSCALL, 301 302 // Thread Local Storage. A descriptor containing pointer to 303 // code and to argument to get the TLS offset for the symbol. 304 TLSDESC, 305 306 // Exception Handling helpers. 307 EH_RETURN, 308 309 // SjLj exception handling setjmp. 310 EH_SJLJ_SETJMP, 311 312 // SjLj exception handling longjmp. 313 EH_SJLJ_LONGJMP, 314 315 // SjLj exception handling dispatch. 316 EH_SJLJ_SETUP_DISPATCH, 317 318 /// Tail call return. See X86TargetLowering::LowerCall for 319 /// the list of operands. 320 TC_RETURN, 321 322 // Vector move to low scalar and zero higher vector elements. 323 VZEXT_MOVL, 324 325 // Vector integer truncate. 326 VTRUNC, 327 // Vector integer truncate with unsigned/signed saturation. 328 VTRUNCUS, 329 VTRUNCS, 330 331 // Masked version of the above. Used when less than a 128-bit result is 332 // produced since the mask only applies to the lower elements and can't 333 // be represented by a select. 334 // SRC, PASSTHRU, MASK 335 VMTRUNC, 336 VMTRUNCUS, 337 VMTRUNCS, 338 339 // Vector FP extend. 340 VFPEXT, 341 VFPEXT_SAE, 342 VFPEXTS, 343 VFPEXTS_SAE, 344 345 // Vector FP round. 346 VFPROUND, 347 // Convert TWO packed single data to one packed data 348 VFPROUND2, 349 VFPROUND2_RND, 350 VFPROUND_RND, 351 VFPROUNDS, 352 VFPROUNDS_RND, 353 354 // Masked version of above. Used for v2f64->v4f32. 355 // SRC, PASSTHRU, MASK 356 VMFPROUND, 357 358 // 128-bit vector logical left / right shift 359 VSHLDQ, 360 VSRLDQ, 361 362 // Vector shift elements 363 VSHL, 364 VSRL, 365 VSRA, 366 367 // Vector variable shift 368 VSHLV, 369 VSRLV, 370 VSRAV, 371 372 // Vector shift elements by immediate 373 VSHLI, 374 VSRLI, 375 VSRAI, 376 377 // Shifts of mask registers. 378 KSHIFTL, 379 KSHIFTR, 380 381 // Bit rotate by immediate 382 VROTLI, 383 VROTRI, 384 385 // Vector packed double/float comparison. 386 CMPP, 387 388 // Vector integer comparisons. 389 PCMPEQ, 390 PCMPGT, 391 392 // v8i16 Horizontal minimum and position. 393 PHMINPOS, 394 395 MULTISHIFT, 396 397 /// Vector comparison generating mask bits for fp and 398 /// integer signed and unsigned data types. 399 CMPM, 400 // Vector mask comparison generating mask bits for FP values. 401 CMPMM, 402 // Vector mask comparison with SAE for FP values. 403 CMPMM_SAE, 404 405 // Arithmetic operations with FLAGS results. 406 ADD, 407 SUB, 408 ADC, 409 SBB, 410 SMUL, 411 UMUL, 412 OR, 413 XOR, 414 AND, 415 416 // Bit field extract. 417 BEXTR, 418 BEXTRI, 419 420 // Zero High Bits Starting with Specified Bit Position. 421 BZHI, 422 423 // Parallel extract and deposit. 424 PDEP, 425 PEXT, 426 427 // X86-specific multiply by immediate. 428 MUL_IMM, 429 430 // Vector sign bit extraction. 431 MOVMSK, 432 433 // Vector bitwise comparisons. 434 PTEST, 435 436 // Vector packed fp sign bitwise comparisons. 437 TESTP, 438 439 // OR/AND test for masks. 440 KORTEST, 441 KTEST, 442 443 // ADD for masks. 444 KADD, 445 446 // Several flavors of instructions with vector shuffle behaviors. 447 // Saturated signed/unnsigned packing. 448 PACKSS, 449 PACKUS, 450 // Intra-lane alignr. 451 PALIGNR, 452 // AVX512 inter-lane alignr. 453 VALIGN, 454 PSHUFD, 455 PSHUFHW, 456 PSHUFLW, 457 SHUFP, 458 // VBMI2 Concat & Shift. 459 VSHLD, 460 VSHRD, 461 VSHLDV, 462 VSHRDV, 463 // Shuffle Packed Values at 128-bit granularity. 464 SHUF128, 465 MOVDDUP, 466 MOVSHDUP, 467 MOVSLDUP, 468 MOVLHPS, 469 MOVHLPS, 470 MOVSD, 471 MOVSS, 472 MOVSH, 473 UNPCKL, 474 UNPCKH, 475 VPERMILPV, 476 VPERMILPI, 477 VPERMI, 478 VPERM2X128, 479 480 // Variable Permute (VPERM). 481 // Res = VPERMV MaskV, V0 482 VPERMV, 483 484 // 3-op Variable Permute (VPERMT2). 485 // Res = VPERMV3 V0, MaskV, V1 486 VPERMV3, 487 488 // Bitwise ternary logic. 489 VPTERNLOG, 490 // Fix Up Special Packed Float32/64 values. 491 VFIXUPIMM, 492 VFIXUPIMM_SAE, 493 VFIXUPIMMS, 494 VFIXUPIMMS_SAE, 495 // Range Restriction Calculation For Packed Pairs of Float32/64 values. 496 VRANGE, 497 VRANGE_SAE, 498 VRANGES, 499 VRANGES_SAE, 500 // Reduce - Perform Reduction Transformation on scalar\packed FP. 501 VREDUCE, 502 VREDUCE_SAE, 503 VREDUCES, 504 VREDUCES_SAE, 505 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 506 // Also used by the legacy (V)ROUND intrinsics where we mask out the 507 // scaling part of the immediate. 508 VRNDSCALE, 509 VRNDSCALE_SAE, 510 VRNDSCALES, 511 VRNDSCALES_SAE, 512 // Tests Types Of a FP Values for packed types. 513 VFPCLASS, 514 // Tests Types Of a FP Values for scalar types. 515 VFPCLASSS, 516 517 // Broadcast (splat) scalar or element 0 of a vector. If the operand is 518 // a vector, this node may change the vector length as part of the splat. 519 VBROADCAST, 520 // Broadcast mask to vector. 521 VBROADCASTM, 522 523 /// SSE4A Extraction and Insertion. 524 EXTRQI, 525 INSERTQI, 526 527 // XOP arithmetic/logical shifts. 528 VPSHA, 529 VPSHL, 530 // XOP signed/unsigned integer comparisons. 531 VPCOM, 532 VPCOMU, 533 // XOP packed permute bytes. 534 VPPERM, 535 // XOP two source permutation. 536 VPERMIL2, 537 538 // Vector multiply packed unsigned doubleword integers. 539 PMULUDQ, 540 // Vector multiply packed signed doubleword integers. 541 PMULDQ, 542 // Vector Multiply Packed UnsignedIntegers with Round and Scale. 543 MULHRS, 544 545 // Multiply and Add Packed Integers. 546 VPMADDUBSW, 547 VPMADDWD, 548 549 // AVX512IFMA multiply and add. 550 // NOTE: These are different than the instruction and perform 551 // op0 x op1 + op2. 552 VPMADD52L, 553 VPMADD52H, 554 555 // VNNI 556 VPDPBUSD, 557 VPDPBUSDS, 558 VPDPWSSD, 559 VPDPWSSDS, 560 561 // FMA nodes. 562 // We use the target independent ISD::FMA for the non-inverted case. 563 FNMADD, 564 FMSUB, 565 FNMSUB, 566 FMADDSUB, 567 FMSUBADD, 568 569 // FMA with rounding mode. 570 FMADD_RND, 571 FNMADD_RND, 572 FMSUB_RND, 573 FNMSUB_RND, 574 FMADDSUB_RND, 575 FMSUBADD_RND, 576 577 // AVX512-FP16 complex addition and multiplication. 578 VFMADDC, 579 VFMADDC_RND, 580 VFCMADDC, 581 VFCMADDC_RND, 582 583 VFMULC, 584 VFMULC_RND, 585 VFCMULC, 586 VFCMULC_RND, 587 588 VFMADDCSH, 589 VFMADDCSH_RND, 590 VFCMADDCSH, 591 VFCMADDCSH_RND, 592 593 VFMULCSH, 594 VFMULCSH_RND, 595 VFCMULCSH, 596 VFCMULCSH_RND, 597 598 VPDPBSUD, 599 VPDPBSUDS, 600 VPDPBUUD, 601 VPDPBUUDS, 602 VPDPBSSD, 603 VPDPBSSDS, 604 605 VPDPWSUD, 606 VPDPWSUDS, 607 VPDPWUSD, 608 VPDPWUSDS, 609 VPDPWUUD, 610 VPDPWUUDS, 611 612 VMINMAX, 613 VMINMAX_SAE, 614 VMINMAXS, 615 VMINMAXS_SAE, 616 617 CVTP2IBS, 618 CVTP2IUBS, 619 CVTP2IBS_RND, 620 CVTP2IUBS_RND, 621 CVTTP2IBS, 622 CVTTP2IUBS, 623 CVTTP2IBS_SAE, 624 CVTTP2IUBS_SAE, 625 626 MPSADBW, 627 628 VCVT2PH2BF8, 629 VCVT2PH2BF8S, 630 VCVT2PH2HF8, 631 VCVT2PH2HF8S, 632 VCVTBIASPH2BF8, 633 VCVTBIASPH2BF8S, 634 VCVTBIASPH2HF8, 635 VCVTBIASPH2HF8S, 636 VCVTPH2BF8, 637 VCVTPH2BF8S, 638 VCVTPH2HF8, 639 VCVTPH2HF8S, 640 VMCVTBIASPH2BF8, 641 VMCVTBIASPH2BF8S, 642 VMCVTBIASPH2HF8, 643 VMCVTBIASPH2HF8S, 644 VMCVTPH2BF8, 645 VMCVTPH2BF8S, 646 VMCVTPH2HF8, 647 VMCVTPH2HF8S, 648 VCVTHF82PH, 649 650 // Compress and expand. 651 COMPRESS, 652 EXPAND, 653 654 // Bits shuffle 655 VPSHUFBITQMB, 656 657 // Convert Unsigned/Integer to Floating-Point Value with rounding mode. 658 SINT_TO_FP_RND, 659 UINT_TO_FP_RND, 660 SCALAR_SINT_TO_FP, 661 SCALAR_UINT_TO_FP, 662 SCALAR_SINT_TO_FP_RND, 663 SCALAR_UINT_TO_FP_RND, 664 665 // Vector float/double to signed/unsigned integer. 666 CVTP2SI, 667 CVTP2UI, 668 CVTP2SI_RND, 669 CVTP2UI_RND, 670 // Scalar float/double to signed/unsigned integer. 671 CVTS2SI, 672 CVTS2UI, 673 CVTS2SI_RND, 674 CVTS2UI_RND, 675 676 // Vector float/double to signed/unsigned integer with truncation. 677 CVTTP2SI, 678 CVTTP2UI, 679 CVTTP2SI_SAE, 680 CVTTP2UI_SAE, 681 682 // Saturation enabled Vector float/double to signed/unsigned 683 // integer with truncation. 684 CVTTP2SIS, 685 CVTTP2UIS, 686 CVTTP2SIS_SAE, 687 CVTTP2UIS_SAE, 688 // Masked versions of above. Used for v2f64 to v4i32. 689 // SRC, PASSTHRU, MASK 690 MCVTTP2SIS, 691 MCVTTP2UIS, 692 693 // Scalar float/double to signed/unsigned integer with truncation. 694 CVTTS2SI, 695 CVTTS2UI, 696 CVTTS2SI_SAE, 697 CVTTS2UI_SAE, 698 699 // Vector signed/unsigned integer to float/double. 700 CVTSI2P, 701 CVTUI2P, 702 703 // Scalar float/double to signed/unsigned integer with saturation. 704 CVTTS2SIS, 705 CVTTS2UIS, 706 CVTTS2SIS_SAE, 707 CVTTS2UIS_SAE, 708 709 // Masked versions of above. Used for v2f64->v4f32. 710 // SRC, PASSTHRU, MASK 711 MCVTP2SI, 712 MCVTP2UI, 713 MCVTTP2SI, 714 MCVTTP2UI, 715 MCVTSI2P, 716 MCVTUI2P, 717 718 // Vector float to bfloat16. 719 // Convert packed single data to packed BF16 data 720 CVTNEPS2BF16, 721 // Masked version of above. 722 // SRC, PASSTHRU, MASK 723 MCVTNEPS2BF16, 724 725 // Dot product of BF16/FP16 pairs to accumulated into 726 // packed single precision. 727 DPBF16PS, 728 DPFP16PS, 729 730 // A stack checking function call. On Windows it's _chkstk call. 731 DYN_ALLOCA, 732 733 // For allocating variable amounts of stack space when using 734 // segmented stacks. Check if the current stacklet has enough space, and 735 // falls back to heap allocation if not. 736 SEG_ALLOCA, 737 738 // For allocating stack space when using stack clash protector. 739 // Allocation is performed by block, and each block is probed. 740 PROBED_ALLOCA, 741 742 // Memory barriers. 743 MFENCE, 744 745 // Get a random integer and indicate whether it is valid in CF. 746 RDRAND, 747 748 // Get a NIST SP800-90B & C compliant random integer and 749 // indicate whether it is valid in CF. 750 RDSEED, 751 752 // Protection keys 753 // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. 754 // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is 755 // value for ECX. 756 RDPKRU, 757 WRPKRU, 758 759 // SSE42 string comparisons. 760 // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG 761 // will emit one or two instructions based on which results are used. If 762 // flags and index/mask this allows us to use a single instruction since 763 // we won't have to pick and opcode for flags. Instead we can rely on the 764 // DAG to CSE everything and decide at isel. 765 PCMPISTR, 766 PCMPESTR, 767 768 // Test if in transactional execution. 769 XTEST, 770 771 // Conversions between float and half-float. 772 CVTPS2PH, 773 CVTPS2PH_SAE, 774 CVTPH2PS, 775 CVTPH2PS_SAE, 776 777 // Masked version of above. 778 // SRC, RND, PASSTHRU, MASK 779 MCVTPS2PH, 780 MCVTPS2PH_SAE, 781 782 // Galois Field Arithmetic Instructions 783 GF2P8AFFINEINVQB, 784 GF2P8AFFINEQB, 785 GF2P8MULB, 786 787 // LWP insert record. 788 LWPINS, 789 790 // User level wait 791 UMWAIT, 792 TPAUSE, 793 794 // Enqueue Stores Instructions 795 ENQCMD, 796 ENQCMDS, 797 798 // For avx512-vp2intersect 799 VP2INTERSECT, 800 801 // User level interrupts - testui 802 TESTUI, 803 804 // Perform an FP80 add after changing precision control in FPCW. 805 FP80_ADD, 806 807 // Conditional compare instructions 808 CCMP, 809 CTEST, 810 811 /// X86 strict FP compare instructions. 812 FIRST_STRICTFP_OPCODE, 813 STRICT_FCMP = FIRST_STRICTFP_OPCODE, 814 STRICT_FCMPS, 815 816 // Vector packed double/float comparison. 817 STRICT_CMPP, 818 819 /// Vector comparison generating mask bits for fp and 820 /// integer signed and unsigned data types. 821 STRICT_CMPM, 822 823 // Vector float/double to signed/unsigned integer with truncation. 824 STRICT_CVTTP2SI, 825 STRICT_CVTTP2UI, 826 827 // Vector FP extend. 828 STRICT_VFPEXT, 829 830 // Vector FP round. 831 STRICT_VFPROUND, 832 833 // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. 834 // Also used by the legacy (V)ROUND intrinsics where we mask out the 835 // scaling part of the immediate. 836 STRICT_VRNDSCALE, 837 838 // Vector signed/unsigned integer to float/double. 839 STRICT_CVTSI2P, 840 STRICT_CVTUI2P, 841 842 // Strict FMA nodes. 843 STRICT_FNMADD, 844 STRICT_FMSUB, 845 STRICT_FNMSUB, 846 847 // Conversions between float and half-float. 848 STRICT_CVTPS2PH, 849 STRICT_CVTPH2PS, 850 851 // Perform an FP80 add after changing precision control in FPCW. 852 STRICT_FP80_ADD, 853 854 /// Floating point max and min. 855 STRICT_FMAX, 856 STRICT_FMIN, 857 LAST_STRICTFP_OPCODE = STRICT_FMIN, 858 859 // Compare and swap. 860 FIRST_MEMORY_OPCODE, 861 LCMPXCHG_DAG = FIRST_MEMORY_OPCODE, 862 LCMPXCHG8_DAG, 863 LCMPXCHG16_DAG, 864 LCMPXCHG16_SAVE_RBX_DAG, 865 866 /// LOCK-prefixed arithmetic read-modify-write instructions. 867 /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) 868 LADD, 869 LSUB, 870 LOR, 871 LXOR, 872 LAND, 873 LBTS, 874 LBTC, 875 LBTR, 876 LBTS_RM, 877 LBTC_RM, 878 LBTR_RM, 879 880 /// RAO arithmetic instructions. 881 /// OUTCHAIN = AADD(INCHAIN, PTR, RHS) 882 AADD, 883 AOR, 884 AXOR, 885 AAND, 886 887 // Load, scalar_to_vector, and zero extend. 888 VZEXT_LOAD, 889 890 // extract_vector_elt, store. 891 VEXTRACT_STORE, 892 893 // scalar broadcast from memory. 894 VBROADCAST_LOAD, 895 896 // subvector broadcast from memory. 897 SUBV_BROADCAST_LOAD, 898 899 // Store FP control word into i16 memory. 900 FNSTCW16m, 901 902 // Load FP control word from i16 memory. 903 FLDCW16m, 904 905 // Store x87 FPU environment into memory. 906 FNSTENVm, 907 908 // Load x87 FPU environment from memory. 909 FLDENVm, 910 911 // Custom handling for FP_TO_xINT_SAT 912 FP_TO_SINT_SAT, 913 FP_TO_UINT_SAT, 914 915 /// This instruction implements FP_TO_SINT with the 916 /// integer destination in memory and a FP reg source. This corresponds 917 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 918 /// has two inputs (token chain and address) and two outputs (int value 919 /// and token chain). Memory VT specifies the type to store to. 920 FP_TO_INT_IN_MEM, 921 922 /// This instruction implements SINT_TO_FP with the 923 /// integer source in memory and FP reg result. This corresponds to the 924 /// X86::FILD*m instructions. It has two inputs (token chain and address) 925 /// and two outputs (FP value and token chain). The integer source type is 926 /// specified by the memory VT. 927 FILD, 928 929 /// This instruction implements a fp->int store from FP stack 930 /// slots. This corresponds to the fist instruction. It takes a 931 /// chain operand, value to store, address, and glue. The memory VT 932 /// specifies the type to store as. 933 FIST, 934 935 /// This instruction implements an extending load to FP stack slots. 936 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 937 /// operand, and ptr to load from. The memory VT specifies the type to 938 /// load from. 939 FLD, 940 941 /// This instruction implements a truncating store from FP stack 942 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 943 /// chain operand, value to store, address, and glue. The memory VT 944 /// specifies the type to store as. 945 FST, 946 947 /// These instructions grab the address of the next argument 948 /// from a va_list. (reads and modifies the va_list in memory) 949 VAARG_64, 950 VAARG_X32, 951 952 // Vector truncating store with unsigned/signed saturation 953 VTRUNCSTOREUS, 954 VTRUNCSTORES, 955 // Vector truncating masked store with unsigned/signed saturation 956 VMTRUNCSTOREUS, 957 VMTRUNCSTORES, 958 959 // X86 specific gather and scatter 960 MGATHER, 961 MSCATTER, 962 963 // Key locker nodes that produce flags. 964 AESENC128KL, 965 AESDEC128KL, 966 AESENC256KL, 967 AESDEC256KL, 968 AESENCWIDE128KL, 969 AESDECWIDE128KL, 970 AESENCWIDE256KL, 971 AESDECWIDE256KL, 972 973 /// Compare and Add if Condition is Met. Compare value in operand 2 with 974 /// value in memory of operand 1. If condition of operand 4 is met, add 975 /// value operand 3 to m32 and write new value in operand 1. Operand 2 is 976 /// always updated with the original value from operand 1. 977 CMPCCXADD, 978 979 // Save xmm argument registers to the stack, according to %al. An operator 980 // is needed so that this can be expanded with control flow. 981 VASTART_SAVE_XMM_REGS, 982 983 // Conditional load/store instructions 984 CLOAD, 985 CSTORE, 986 LAST_MEMORY_OPCODE = CSTORE, 987 }; 988 } // end namespace X86ISD 989 990 namespace X86 { 991 /// Current rounding mode is represented in bits 11:10 of FPSR. These 992 /// values are same as corresponding constants for rounding mode used 993 /// in glibc. 994 enum RoundingMode { 995 rmToNearest = 0, // FE_TONEAREST 996 rmDownward = 1 << 10, // FE_DOWNWARD 997 rmUpward = 2 << 10, // FE_UPWARD 998 rmTowardZero = 3 << 10, // FE_TOWARDZERO 999 rmMask = 3 << 10 // Bit mask selecting rounding mode 1000 }; 1001 } 1002 1003 /// Define some predicates that are used for node matching. 1004 namespace X86 { 1005 /// Returns true if Elt is a constant zero or floating point constant +0.0. 1006 bool isZeroNode(SDValue Elt); 1007 1008 /// Returns true of the given offset can be 1009 /// fit into displacement field of the instruction. 1010 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 1011 bool hasSymbolicDisplacement); 1012 1013 /// Determines whether the callee is required to pop its 1014 /// own arguments. Callee pop is necessary to support tail calls. 1015 bool isCalleePop(CallingConv::ID CallingConv, 1016 bool is64Bit, bool IsVarArg, bool GuaranteeTCO); 1017 1018 /// If Op is a constant whose elements are all the same constant or 1019 /// undefined, return true and return the constant value in \p SplatVal. 1020 /// If we have undef bits that don't cover an entire element, we treat these 1021 /// as zero if AllowPartialUndefs is set, else we fail and return false. 1022 bool isConstantSplat(SDValue Op, APInt &SplatVal, 1023 bool AllowPartialUndefs = true); 1024 1025 /// Check if Op is a load operation that could be folded into some other x86 1026 /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0. 1027 bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget, 1028 bool AssumeSingleUse = false); 1029 1030 /// Check if Op is a load operation that could be folded into a vector splat 1031 /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2. 1032 bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT, 1033 const X86Subtarget &Subtarget, 1034 bool AssumeSingleUse = false); 1035 1036 /// Check if Op is a value that could be used to fold a store into some 1037 /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi). 1038 bool mayFoldIntoStore(SDValue Op); 1039 1040 /// Check if Op is an operation that could be folded into a zero extend x86 1041 /// instruction. 1042 bool mayFoldIntoZeroExtend(SDValue Op); 1043 1044 /// True if the target supports the extended frame for async Swift 1045 /// functions. 1046 bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, 1047 const MachineFunction &MF); 1048 } // end namespace X86 1049 1050 //===--------------------------------------------------------------------===// 1051 // X86 Implementation of the TargetLowering interface 1052 class X86TargetLowering final : public TargetLowering { 1053 public: 1054 explicit X86TargetLowering(const X86TargetMachine &TM, 1055 const X86Subtarget &STI); 1056 1057 unsigned getJumpTableEncoding() const override; 1058 bool useSoftFloat() const override; 1059 1060 void markLibCallAttributes(MachineFunction *MF, unsigned CC, 1061 ArgListTy &Args) const override; 1062 1063 MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override { 1064 return MVT::i8; 1065 } 1066 1067 const MCExpr * 1068 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 1069 const MachineBasicBlock *MBB, unsigned uid, 1070 MCContext &Ctx) const override; 1071 1072 /// Returns relocation base for the given PIC jumptable. 1073 SDValue getPICJumpTableRelocBase(SDValue Table, 1074 SelectionDAG &DAG) const override; 1075 const MCExpr * 1076 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 1077 unsigned JTI, MCContext &Ctx) const override; 1078 1079 /// Return the desired alignment for ByVal aggregate 1080 /// function arguments in the caller parameter area. For X86, aggregates 1081 /// that contains are placed at 16-byte boundaries while the rest are at 1082 /// 4-byte boundaries. 1083 Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override; 1084 1085 EVT getOptimalMemOpType(const MemOp &Op, 1086 const AttributeList &FuncAttributes) const override; 1087 1088 /// Returns true if it's safe to use load / store of the 1089 /// specified type to expand memcpy / memset inline. This is mostly true 1090 /// for all types except for some special cases. For example, on X86 1091 /// targets without SSE2 f64 load / store are done with fldl / fstpl which 1092 /// also does type conversion. Note the specified type doesn't have to be 1093 /// legal as the hook is used before type legalization. 1094 bool isSafeMemOpType(MVT VT) const override; 1095 1096 bool isMemoryAccessFast(EVT VT, Align Alignment) const; 1097 1098 /// Returns true if the target allows unaligned memory accesses of the 1099 /// specified type. Returns whether it is "fast" in the last argument. 1100 bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, 1101 MachineMemOperand::Flags Flags, 1102 unsigned *Fast) const override; 1103 1104 /// This function returns true if the memory access is aligned or if the 1105 /// target allows this specific unaligned memory access. If the access is 1106 /// allowed, the optional final parameter returns a relative speed of the 1107 /// access (as defined by the target). 1108 bool allowsMemoryAccess( 1109 LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, 1110 Align Alignment, 1111 MachineMemOperand::Flags Flags = MachineMemOperand::MONone, 1112 unsigned *Fast = nullptr) const override; 1113 1114 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, 1115 const MachineMemOperand &MMO, 1116 unsigned *Fast) const { 1117 return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), 1118 MMO.getAlign(), MMO.getFlags(), Fast); 1119 } 1120 1121 /// Provide custom lowering hooks for some operations. 1122 /// 1123 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 1124 1125 /// Replace the results of node with an illegal result 1126 /// type with new values built out of custom code. 1127 /// 1128 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 1129 SelectionDAG &DAG) const override; 1130 1131 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 1132 1133 bool preferABDSToABSWithNSW(EVT VT) const override; 1134 1135 bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, 1136 EVT ExtVT) const override; 1137 1138 bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond, 1139 EVT VT) const override; 1140 1141 /// Return true if the target has native support for 1142 /// the specified value type and it is 'desirable' to use the type for the 1143 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 1144 /// instruction encodings are longer and some i16 instructions are slow. 1145 bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; 1146 1147 /// Return true if the target has native support for the 1148 /// specified value type and it is 'desirable' to use the type. e.g. On x86 1149 /// i16 is legal, but undesirable since i16 instruction encodings are longer 1150 /// and some i16 instructions are slow. 1151 bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; 1152 1153 /// Return prefered fold type, Abs if this is a vector, AddAnd if its an 1154 /// integer, None otherwise. 1155 TargetLowering::AndOrSETCCFoldKind 1156 isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, 1157 const SDNode *SETCC0, 1158 const SDNode *SETCC1) const override; 1159 1160 /// Return the newly negated expression if the cost is not expensive and 1161 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to 1162 /// do the negation. 1163 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 1164 bool LegalOperations, bool ForCodeSize, 1165 NegatibleCost &Cost, 1166 unsigned Depth) const override; 1167 1168 MachineBasicBlock * 1169 EmitInstrWithCustomInserter(MachineInstr &MI, 1170 MachineBasicBlock *MBB) const override; 1171 1172 /// This method returns the name of a target specific DAG node. 1173 const char *getTargetNodeName(unsigned Opcode) const override; 1174 1175 /// Do not merge vector stores after legalization because that may conflict 1176 /// with x86-specific store splitting optimizations. 1177 bool mergeStoresAfterLegalization(EVT MemVT) const override { 1178 return !MemVT.isVector(); 1179 } 1180 1181 bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, 1182 const MachineFunction &MF) const override; 1183 1184 bool isCheapToSpeculateCttz(Type *Ty) const override; 1185 1186 bool isCheapToSpeculateCtlz(Type *Ty) const override; 1187 1188 bool isCtlzFast() const override; 1189 1190 bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { 1191 // If the pair to store is a mixture of float and int values, we will 1192 // save two bitwise instructions and one float-to-int instruction and 1193 // increase one store instruction. There is potentially a more 1194 // significant benefit because it avoids the float->int domain switch 1195 // for input value. So It is more likely a win. 1196 if ((LTy.isFloatingPoint() && HTy.isInteger()) || 1197 (LTy.isInteger() && HTy.isFloatingPoint())) 1198 return true; 1199 // If the pair only contains int values, we will save two bitwise 1200 // instructions and increase one store instruction (costing one more 1201 // store buffer). Since the benefit is more blurred so we leave 1202 // such pair out until we get testcase to prove it is a win. 1203 return false; 1204 } 1205 1206 bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; 1207 1208 bool hasAndNotCompare(SDValue Y) const override; 1209 1210 bool hasAndNot(SDValue Y) const override; 1211 1212 bool hasBitTest(SDValue X, SDValue Y) const override; 1213 1214 bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( 1215 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, 1216 unsigned OldShiftOpcode, unsigned NewShiftOpcode, 1217 SelectionDAG &DAG) const override; 1218 1219 unsigned preferedOpcodeForCmpEqPiecesOfOperand( 1220 EVT VT, unsigned ShiftOpc, bool MayTransformRotate, 1221 const APInt &ShiftOrRotateAmt, 1222 const std::optional<APInt> &AndMask) const override; 1223 1224 bool preferScalarizeSplat(SDNode *N) const override; 1225 1226 CondMergingParams 1227 getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, 1228 const Value *Rhs) const override; 1229 1230 bool shouldFoldConstantShiftPairToMask(const SDNode *N, 1231 CombineLevel Level) const override; 1232 1233 bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override; 1234 1235 bool 1236 shouldTransformSignedTruncationCheck(EVT XVT, 1237 unsigned KeptBits) const override { 1238 // For vectors, we don't have a preference.. 1239 if (XVT.isVector()) 1240 return false; 1241 1242 auto VTIsOk = [](EVT VT) -> bool { 1243 return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || 1244 VT == MVT::i64; 1245 }; 1246 1247 // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports. 1248 // XVT will be larger than KeptBitsVT. 1249 MVT KeptBitsVT = MVT::getIntegerVT(KeptBits); 1250 return VTIsOk(XVT) && VTIsOk(KeptBitsVT); 1251 } 1252 1253 ShiftLegalizationStrategy 1254 preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, 1255 unsigned ExpansionFactor) const override; 1256 1257 bool shouldSplatInsEltVarIndex(EVT VT) const override; 1258 1259 bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override { 1260 // Converting to sat variants holds little benefit on X86 as we will just 1261 // need to saturate the value back using fp arithmatic. 1262 return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT); 1263 } 1264 1265 bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { 1266 return VT.isScalarInteger(); 1267 } 1268 1269 /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST. 1270 MVT hasFastEqualityCompare(unsigned NumBits) const override; 1271 1272 /// Return the value type to use for ISD::SETCC. 1273 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, 1274 EVT VT) const override; 1275 1276 bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, 1277 const APInt &DemandedElts, 1278 TargetLoweringOpt &TLO) const override; 1279 1280 /// Determine which of the bits specified in Mask are known to be either 1281 /// zero or one and return them in the KnownZero/KnownOne bitsets. 1282 void computeKnownBitsForTargetNode(const SDValue Op, 1283 KnownBits &Known, 1284 const APInt &DemandedElts, 1285 const SelectionDAG &DAG, 1286 unsigned Depth = 0) const override; 1287 1288 /// Determine the number of bits in the operation that are sign bits. 1289 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 1290 const APInt &DemandedElts, 1291 const SelectionDAG &DAG, 1292 unsigned Depth) const override; 1293 1294 bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op, 1295 const APInt &DemandedElts, 1296 APInt &KnownUndef, 1297 APInt &KnownZero, 1298 TargetLoweringOpt &TLO, 1299 unsigned Depth) const override; 1300 1301 bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op, 1302 const APInt &DemandedElts, 1303 unsigned MaskIndex, 1304 TargetLoweringOpt &TLO, 1305 unsigned Depth) const; 1306 1307 bool SimplifyDemandedBitsForTargetNode(SDValue Op, 1308 const APInt &DemandedBits, 1309 const APInt &DemandedElts, 1310 KnownBits &Known, 1311 TargetLoweringOpt &TLO, 1312 unsigned Depth) const override; 1313 1314 SDValue SimplifyMultipleUseDemandedBitsForTargetNode( 1315 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, 1316 SelectionDAG &DAG, unsigned Depth) const override; 1317 1318 bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( 1319 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 1320 bool PoisonOnly, unsigned Depth) const override; 1321 1322 bool canCreateUndefOrPoisonForTargetNode( 1323 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 1324 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override; 1325 1326 bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, 1327 APInt &UndefElts, const SelectionDAG &DAG, 1328 unsigned Depth) const override; 1329 1330 bool isTargetCanonicalConstantNode(SDValue Op) const override { 1331 // Peek through bitcasts/extracts/inserts to see if we have a broadcast 1332 // vector from memory. 1333 while (Op.getOpcode() == ISD::BITCAST || 1334 Op.getOpcode() == ISD::EXTRACT_SUBVECTOR || 1335 (Op.getOpcode() == ISD::INSERT_SUBVECTOR && 1336 Op.getOperand(0).isUndef())) 1337 Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0); 1338 1339 return Op.getOpcode() == X86ISD::VBROADCAST_LOAD || 1340 TargetLowering::isTargetCanonicalConstantNode(Op); 1341 } 1342 1343 const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; 1344 1345 SDValue unwrapAddress(SDValue N) const override; 1346 1347 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 1348 1349 bool ExpandInlineAsm(CallInst *CI) const override; 1350 1351 ConstraintType getConstraintType(StringRef Constraint) const override; 1352 1353 /// Examine constraint string and operand type and determine a weight value. 1354 /// The operand object must already have been set up with the operand type. 1355 ConstraintWeight 1356 getSingleConstraintMatchWeight(AsmOperandInfo &Info, 1357 const char *Constraint) const override; 1358 1359 const char *LowerXConstraint(EVT ConstraintVT) const override; 1360 1361 /// Lower the specified operand into the Ops vector. If it is invalid, don't 1362 /// add anything to Ops. If hasMemory is true it means one of the asm 1363 /// constraint of the inline asm instruction being processed is 'm'. 1364 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, 1365 std::vector<SDValue> &Ops, 1366 SelectionDAG &DAG) const override; 1367 1368 InlineAsm::ConstraintCode 1369 getInlineAsmMemConstraint(StringRef ConstraintCode) const override { 1370 if (ConstraintCode == "v") 1371 return InlineAsm::ConstraintCode::v; 1372 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 1373 } 1374 1375 /// Handle Lowering flag assembly outputs. 1376 SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, 1377 const SDLoc &DL, 1378 const AsmOperandInfo &Constraint, 1379 SelectionDAG &DAG) const override; 1380 1381 /// Given a physical register constraint 1382 /// (e.g. {edx}), return the register number and the register class for the 1383 /// register. This should only be used for C_Register constraints. On 1384 /// error, this returns a register number of 0. 1385 std::pair<unsigned, const TargetRegisterClass *> 1386 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 1387 StringRef Constraint, MVT VT) const override; 1388 1389 /// Return true if the addressing mode represented 1390 /// by AM is legal for this target, for a load/store of the specified type. 1391 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, 1392 Type *Ty, unsigned AS, 1393 Instruction *I = nullptr) const override; 1394 1395 bool addressingModeSupportsTLS(const GlobalValue &GV) const override; 1396 1397 /// Return true if the specified immediate is legal 1398 /// icmp immediate, that is the target has icmp instructions which can 1399 /// compare a register against the immediate without having to materialize 1400 /// the immediate into a register. 1401 bool isLegalICmpImmediate(int64_t Imm) const override; 1402 1403 /// Return true if the specified immediate is legal 1404 /// add immediate, that is the target has add instructions which can 1405 /// add a register and the immediate without having to materialize 1406 /// the immediate into a register. 1407 bool isLegalAddImmediate(int64_t Imm) const override; 1408 1409 bool isLegalStoreImmediate(int64_t Imm) const override; 1410 1411 /// Add x86-specific opcodes to the default list. 1412 bool isBinOp(unsigned Opcode) const override; 1413 1414 /// Returns true if the opcode is a commutative binary operation. 1415 bool isCommutativeBinOp(unsigned Opcode) const override; 1416 1417 /// Return true if it's free to truncate a value of 1418 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 1419 /// register EAX to i16 by referencing its sub-register AX. 1420 bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 1421 bool isTruncateFree(EVT VT1, EVT VT2) const override; 1422 1423 bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; 1424 1425 /// Return true if any actual instruction that defines a 1426 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 1427 /// register. This does not necessarily include registers defined in 1428 /// unknown ways, such as incoming arguments, or copies from unknown 1429 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 1430 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 1431 /// all instructions that define 32-bit values implicit zero-extend the 1432 /// result out to 64 bits. 1433 bool isZExtFree(Type *Ty1, Type *Ty2) const override; 1434 bool isZExtFree(EVT VT1, EVT VT2) const override; 1435 bool isZExtFree(SDValue Val, EVT VT2) const override; 1436 1437 bool shouldConvertPhiType(Type *From, Type *To) const override; 1438 1439 /// Return true if folding a vector load into ExtVal (a sign, zero, or any 1440 /// extend node) is profitable. 1441 bool isVectorLoadExtDesirable(SDValue) const override; 1442 1443 /// Return true if an FMA operation is faster than a pair of fmul and fadd 1444 /// instructions. fmuladd intrinsics will be expanded to FMAs when this 1445 /// method returns true, otherwise fmuladd is expanded to fmul + fadd. 1446 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 1447 EVT VT) const override; 1448 1449 /// Return true if it's profitable to narrow operations of type SrcVT to 1450 /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not 1451 /// from i32 to i16. 1452 bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override; 1453 1454 bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, 1455 EVT VT) const override; 1456 1457 /// Given an intrinsic, checks if on the target the intrinsic will need to map 1458 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns 1459 /// true and stores the intrinsic information into the IntrinsicInfo that was 1460 /// passed to the function. 1461 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 1462 MachineFunction &MF, 1463 unsigned Intrinsic) const override; 1464 1465 /// Returns true if the target can instruction select the 1466 /// specified FP immediate natively. If false, the legalizer will 1467 /// materialize the FP immediate as a load from a constant pool. 1468 bool isFPImmLegal(const APFloat &Imm, EVT VT, 1469 bool ForCodeSize) const override; 1470 1471 /// Targets can use this to indicate that they only support *some* 1472 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a 1473 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to 1474 /// be legal. 1475 bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1476 1477 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there 1478 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a 1479 /// constant pool entry. 1480 bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override; 1481 1482 /// Returns true if lowering to a jump table is allowed. 1483 bool areJTsAllowed(const Function *Fn) const override; 1484 1485 MVT getPreferredSwitchConditionType(LLVMContext &Context, 1486 EVT ConditionVT) const override; 1487 1488 /// If true, then instruction selection should 1489 /// seek to shrink the FP constant of the specified type to a smaller type 1490 /// in order to save space and / or reduce runtime. 1491 bool ShouldShrinkFPConstant(EVT VT) const override; 1492 1493 /// Return true if we believe it is correct and profitable to reduce the 1494 /// load node to a smaller type. 1495 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, 1496 EVT NewVT) const override; 1497 1498 /// Return true if the specified scalar FP type is computed in an SSE 1499 /// register, not on the X87 floating point stack. 1500 bool isScalarFPTypeInSSEReg(EVT VT) const; 1501 1502 /// Returns true if it is beneficial to convert a load of a constant 1503 /// to just the constant itself. 1504 bool shouldConvertConstantLoadToIntImm(const APInt &Imm, 1505 Type *Ty) const override; 1506 1507 bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override; 1508 1509 bool convertSelectOfConstantsToMath(EVT VT) const override; 1510 1511 bool decomposeMulByConstant(LLVMContext &Context, EVT VT, 1512 SDValue C) const override; 1513 1514 /// Return true if EXTRACT_SUBVECTOR is cheap for this result type 1515 /// with this index. 1516 bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, 1517 unsigned Index) const override; 1518 1519 /// Scalar ops always have equal or better analysis/performance/power than 1520 /// the vector equivalent, so this always makes sense if the scalar op is 1521 /// supported. 1522 bool shouldScalarizeBinop(SDValue) const override; 1523 1524 /// Extract of a scalar FP value from index 0 of a vector is free. 1525 bool isExtractVecEltCheap(EVT VT, unsigned Index) const override { 1526 EVT EltVT = VT.getScalarType(); 1527 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0; 1528 } 1529 1530 /// Overflow nodes should get combined/lowered to optimal instructions 1531 /// (they should allow eliminating explicit compares by getting flags from 1532 /// math ops). 1533 bool shouldFormOverflowOp(unsigned Opcode, EVT VT, 1534 bool MathUsed) const override; 1535 1536 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, 1537 unsigned AddrSpace) const override { 1538 // If we can replace more than 2 scalar stores, there will be a reduction 1539 // in instructions even after we add a vector constant load. 1540 return IsZero || NumElem > 2; 1541 } 1542 1543 bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, 1544 const SelectionDAG &DAG, 1545 const MachineMemOperand &MMO) const override; 1546 1547 Register getRegisterByName(const char* RegName, LLT VT, 1548 const MachineFunction &MF) const override; 1549 1550 /// If a physical register, this returns the register that receives the 1551 /// exception address on entry to an EH pad. 1552 Register 1553 getExceptionPointerRegister(const Constant *PersonalityFn) const override; 1554 1555 /// If a physical register, this returns the register that receives the 1556 /// exception typeid on entry to a landing pad. 1557 Register 1558 getExceptionSelectorRegister(const Constant *PersonalityFn) const override; 1559 1560 bool needsFixedCatchObjects() const override; 1561 1562 /// This method returns a target specific FastISel object, 1563 /// or null if the target does not support "fast" ISel. 1564 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1565 const TargetLibraryInfo *libInfo) const override; 1566 1567 /// If the target has a standard location for the stack protector cookie, 1568 /// returns the address of that location. Otherwise, returns nullptr. 1569 Value *getIRStackGuard(IRBuilderBase &IRB) const override; 1570 1571 bool useLoadStackGuardNode(const Module &M) const override; 1572 bool useStackGuardXorFP() const override; 1573 void insertSSPDeclarations(Module &M) const override; 1574 Value *getSDagStackGuard(const Module &M) const override; 1575 Function *getSSPStackGuardCheck(const Module &M) const override; 1576 SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, 1577 const SDLoc &DL) const override; 1578 1579 1580 /// Return true if the target stores SafeStack pointer at a fixed offset in 1581 /// some non-standard address space, and populates the address space and 1582 /// offset as appropriate. 1583 Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override; 1584 1585 std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL, 1586 SDValue Chain, SDValue Pointer, 1587 MachinePointerInfo PtrInfo, 1588 Align Alignment, 1589 SelectionDAG &DAG) const; 1590 1591 /// Customize the preferred legalization strategy for certain types. 1592 LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; 1593 1594 bool softPromoteHalfType() const override { return true; } 1595 1596 MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, 1597 EVT VT) const override; 1598 1599 unsigned getNumRegistersForCallingConv(LLVMContext &Context, 1600 CallingConv::ID CC, 1601 EVT VT) const override; 1602 1603 unsigned getVectorTypeBreakdownForCallingConv( 1604 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, 1605 unsigned &NumIntermediates, MVT &RegisterVT) const override; 1606 1607 bool functionArgumentNeedsConsecutiveRegisters( 1608 Type *Ty, CallingConv::ID CallConv, bool isVarArg, 1609 const DataLayout &DL) const override; 1610 1611 bool isIntDivCheap(EVT VT, AttributeList Attr) const override; 1612 1613 bool supportSwiftError() const override; 1614 1615 bool supportKCFIBundles() const override { return true; } 1616 1617 MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, 1618 MachineBasicBlock::instr_iterator &MBBI, 1619 const TargetInstrInfo *TII) const override; 1620 1621 bool hasStackProbeSymbol(const MachineFunction &MF) const override; 1622 bool hasInlineStackProbe(const MachineFunction &MF) const override; 1623 StringRef getStackProbeSymbolName(const MachineFunction &MF) const override; 1624 1625 unsigned getStackProbeSize(const MachineFunction &MF) const; 1626 1627 bool hasVectorBlend() const override { return true; } 1628 1629 unsigned getMaxSupportedInterleaveFactor() const override { return 4; } 1630 1631 bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs, 1632 unsigned OpNo) const override; 1633 1634 SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, 1635 MachineMemOperand *MMO, SDValue &NewLoad, 1636 SDValue Ptr, SDValue PassThru, 1637 SDValue Mask) const override; 1638 SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, 1639 MachineMemOperand *MMO, SDValue Ptr, SDValue Val, 1640 SDValue Mask) const override; 1641 1642 /// Lower interleaved load(s) into target specific 1643 /// instructions/intrinsics. 1644 bool lowerInterleavedLoad(LoadInst *LI, 1645 ArrayRef<ShuffleVectorInst *> Shuffles, 1646 ArrayRef<unsigned> Indices, 1647 unsigned Factor) const override; 1648 1649 /// Lower interleaved store(s) into target specific 1650 /// instructions/intrinsics. 1651 bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, 1652 unsigned Factor) const override; 1653 1654 SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr, 1655 int JTI, SelectionDAG &DAG) const override; 1656 1657 Align getPrefLoopAlignment(MachineLoop *ML) const override; 1658 1659 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override { 1660 if (VT == MVT::f80) 1661 return EVT::getIntegerVT(Context, 96); 1662 return TargetLoweringBase::getTypeToTransformTo(Context, VT); 1663 } 1664 1665 protected: 1666 std::pair<const TargetRegisterClass *, uint8_t> 1667 findRepresentativeClass(const TargetRegisterInfo *TRI, 1668 MVT VT) const override; 1669 1670 private: 1671 /// Keep a reference to the X86Subtarget around so that we can 1672 /// make the right decision when generating code for different targets. 1673 const X86Subtarget &Subtarget; 1674 1675 /// A list of legal FP immediates. 1676 std::vector<APFloat> LegalFPImmediates; 1677 1678 /// Indicate that this x86 target can instruction 1679 /// select the specified FP immediate natively. 1680 void addLegalFPImmediate(const APFloat& Imm) { 1681 LegalFPImmediates.push_back(Imm); 1682 } 1683 1684 SDValue LowerCallResult(SDValue Chain, SDValue InGlue, 1685 CallingConv::ID CallConv, bool isVarArg, 1686 const SmallVectorImpl<ISD::InputArg> &Ins, 1687 const SDLoc &dl, SelectionDAG &DAG, 1688 SmallVectorImpl<SDValue> &InVals, 1689 uint32_t *RegMask) const; 1690 SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, 1691 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 1692 const SDLoc &dl, SelectionDAG &DAG, 1693 const CCValAssign &VA, MachineFrameInfo &MFI, 1694 unsigned i) const; 1695 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 1696 const SDLoc &dl, SelectionDAG &DAG, 1697 const CCValAssign &VA, 1698 ISD::ArgFlagsTy Flags, bool isByval) const; 1699 1700 // Call lowering helpers. 1701 1702 /// Check whether the call is eligible for tail call optimization. Targets 1703 /// that want to do tail call optimization should implement this function. 1704 bool IsEligibleForTailCallOptimization( 1705 TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo, 1706 SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const; 1707 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 1708 SDValue Chain, bool IsTailCall, 1709 bool Is64Bit, int FPDiff, 1710 const SDLoc &dl) const; 1711 1712 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 1713 SelectionDAG &DAG) const; 1714 1715 unsigned getAddressSpace() const; 1716 1717 SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, 1718 SDValue &Chain) const; 1719 SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const; 1720 1721 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 1722 SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; 1723 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1724 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 1725 1726 unsigned getGlobalWrapperKind(const GlobalValue *GV, 1727 const unsigned char OpFlags) const; 1728 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 1729 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 1730 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 1731 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 1732 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 1733 1734 /// Creates target global address or external symbol nodes for calls or 1735 /// other uses. 1736 SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, 1737 bool ForCall) const; 1738 1739 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1740 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1741 SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; 1742 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 1743 SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; 1744 SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; 1745 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 1746 SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; 1747 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 1748 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 1749 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 1750 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 1751 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 1752 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 1753 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1754 SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 1755 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 1756 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 1757 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 1758 SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; 1759 SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; 1760 SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; 1761 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 1762 SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1763 SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; 1764 SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; 1765 SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; 1766 SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const; 1767 SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; 1768 SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG, 1769 SDValue &Chain) const; 1770 SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const; 1771 SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; 1772 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 1773 SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; 1774 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 1775 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 1776 SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const; 1777 1778 SDValue 1779 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1780 const SmallVectorImpl<ISD::InputArg> &Ins, 1781 const SDLoc &dl, SelectionDAG &DAG, 1782 SmallVectorImpl<SDValue> &InVals) const override; 1783 SDValue LowerCall(CallLoweringInfo &CLI, 1784 SmallVectorImpl<SDValue> &InVals) const override; 1785 1786 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1787 const SmallVectorImpl<ISD::OutputArg> &Outs, 1788 const SmallVectorImpl<SDValue> &OutVals, 1789 const SDLoc &dl, SelectionDAG &DAG) const override; 1790 1791 bool supportSplitCSR(MachineFunction *MF) const override { 1792 return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && 1793 MF->getFunction().hasFnAttribute(Attribute::NoUnwind); 1794 } 1795 void initializeSplitCSR(MachineBasicBlock *Entry) const override; 1796 void insertCopiesSplitCSR( 1797 MachineBasicBlock *Entry, 1798 const SmallVectorImpl<MachineBasicBlock *> &Exits) const override; 1799 1800 bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; 1801 1802 bool mayBeEmittedAsTailCall(const CallInst *CI) const override; 1803 1804 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 1805 ISD::NodeType ExtendKind) const override; 1806 1807 bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 1808 bool isVarArg, 1809 const SmallVectorImpl<ISD::OutputArg> &Outs, 1810 LLVMContext &Context, 1811 const Type *RetTy) const override; 1812 1813 const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; 1814 ArrayRef<MCPhysReg> getRoundingControlRegisters() const override; 1815 1816 TargetLoweringBase::AtomicExpansionKind 1817 shouldExpandAtomicLoadInIR(LoadInst *LI) const override; 1818 TargetLoweringBase::AtomicExpansionKind 1819 shouldExpandAtomicStoreInIR(StoreInst *SI) const override; 1820 TargetLoweringBase::AtomicExpansionKind 1821 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 1822 TargetLoweringBase::AtomicExpansionKind 1823 shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const; 1824 void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; 1825 void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override; 1826 1827 LoadInst * 1828 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; 1829 1830 bool needsCmpXchgNb(Type *MemType) const; 1831 1832 void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, 1833 MachineBasicBlock *DispatchBB, int FI) const; 1834 1835 // Utility function to emit the low-level va_arg code for X86-64. 1836 MachineBasicBlock * 1837 EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; 1838 1839 /// Utility function to emit the xmm reg save portion of va_start. 1840 MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1, 1841 MachineInstr &MI2, 1842 MachineBasicBlock *BB) const; 1843 1844 MachineBasicBlock *EmitLoweredSelect(MachineInstr &I, 1845 MachineBasicBlock *BB) const; 1846 1847 MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, 1848 MachineBasicBlock *BB) const; 1849 1850 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI, 1851 MachineBasicBlock *BB) const; 1852 1853 MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI, 1854 MachineBasicBlock *BB) const; 1855 1856 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI, 1857 MachineBasicBlock *BB) const; 1858 1859 MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI, 1860 MachineBasicBlock *BB) const; 1861 1862 MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, 1863 MachineBasicBlock *MBB) const; 1864 1865 void emitSetJmpShadowStackFix(MachineInstr &MI, 1866 MachineBasicBlock *MBB) const; 1867 1868 MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI, 1869 MachineBasicBlock *MBB) const; 1870 1871 MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI, 1872 MachineBasicBlock *MBB) const; 1873 1874 MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI, 1875 MachineBasicBlock *MBB) const; 1876 1877 MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI, 1878 MachineBasicBlock *MBB) const; 1879 1880 /// Emit flags for the given setcc condition and operands. Also returns the 1881 /// corresponding X86 condition code constant in X86CC. 1882 SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC, 1883 const SDLoc &dl, SelectionDAG &DAG, 1884 SDValue &X86CC) const; 1885 1886 bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, 1887 SDValue IntPow2) const override; 1888 1889 /// Check if replacement of SQRT with RSQRT should be disabled. 1890 bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override; 1891 1892 /// Use rsqrt* to speed up sqrt calculations. 1893 SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1894 int &RefinementSteps, bool &UseOneConstNR, 1895 bool Reciprocal) const override; 1896 1897 /// Use rcp* to speed up fdiv calculations. 1898 SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled, 1899 int &RefinementSteps) const override; 1900 1901 /// Reassociate floating point divisions into multiply by reciprocal. 1902 unsigned combineRepeatedFPDivisors() const override; 1903 1904 SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, 1905 SmallVectorImpl<SDNode *> &Created) const override; 1906 1907 SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, 1908 SDValue V2) const; 1909 }; 1910 1911 namespace X86 { 1912 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 1913 const TargetLibraryInfo *libInfo); 1914 } // end namespace X86 1915 1916 // X86 specific Gather/Scatter nodes. 1917 // The class has the same order of operands as MaskedGatherScatterSDNode for 1918 // convenience. 1919 class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode { 1920 public: 1921 // This is a intended as a utility and should never be directly created. 1922 X86MaskedGatherScatterSDNode() = delete; 1923 ~X86MaskedGatherScatterSDNode() = delete; 1924 1925 const SDValue &getBasePtr() const { return getOperand(3); } 1926 const SDValue &getIndex() const { return getOperand(4); } 1927 const SDValue &getMask() const { return getOperand(2); } 1928 const SDValue &getScale() const { return getOperand(5); } 1929 1930 static bool classof(const SDNode *N) { 1931 return N->getOpcode() == X86ISD::MGATHER || 1932 N->getOpcode() == X86ISD::MSCATTER; 1933 } 1934 }; 1935 1936 class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode { 1937 public: 1938 const SDValue &getPassThru() const { return getOperand(1); } 1939 1940 static bool classof(const SDNode *N) { 1941 return N->getOpcode() == X86ISD::MGATHER; 1942 } 1943 }; 1944 1945 class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode { 1946 public: 1947 const SDValue &getValue() const { return getOperand(1); } 1948 1949 static bool classof(const SDNode *N) { 1950 return N->getOpcode() == X86ISD::MSCATTER; 1951 } 1952 }; 1953 1954 /// Generate unpacklo/unpackhi shuffle mask. 1955 void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo, 1956 bool Unary); 1957 1958 /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation 1959 /// imposed by AVX and specific to the unary pattern. Example: 1960 /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3> 1961 /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7> 1962 void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo); 1963 1964 } // end namespace llvm 1965 1966 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H 1967