1f4a2713aSLionel Sambuc//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===// 2f4a2713aSLionel Sambuc// 3f4a2713aSLionel Sambuc// The LLVM Compiler Infrastructure 4f4a2713aSLionel Sambuc// 5f4a2713aSLionel Sambuc// This file is distributed under the University of Illinois Open Source 6f4a2713aSLionel Sambuc// License. See LICENSE.TXT for details. 7f4a2713aSLionel Sambuc// 8f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 9f4a2713aSLionel Sambuc// 10f4a2713aSLionel Sambuc// This file describes the X86 SSE instruction set, defining the instructions, 11f4a2713aSLionel Sambuc// and properties of the instructions which are needed for code generation, 12f4a2713aSLionel Sambuc// machine code emission, and analysis. 13f4a2713aSLionel Sambuc// 14f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 15f4a2713aSLionel Sambuc 16f4a2713aSLionel Sambucclass OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> { 17f4a2713aSLionel Sambuc InstrItinClass rr = arg_rr; 18f4a2713aSLionel Sambuc InstrItinClass rm = arg_rm; 19f4a2713aSLionel Sambuc // InstrSchedModel info. 20f4a2713aSLionel Sambuc X86FoldableSchedWrite Sched = WriteFAdd; 21f4a2713aSLionel Sambuc} 22f4a2713aSLionel Sambuc 23f4a2713aSLionel Sambucclass SizeItins<OpndItins arg_s, OpndItins arg_d> { 24f4a2713aSLionel Sambuc OpndItins s = arg_s; 25f4a2713aSLionel Sambuc OpndItins d = arg_d; 26f4a2713aSLionel Sambuc} 27f4a2713aSLionel Sambuc 28f4a2713aSLionel Sambuc 29f4a2713aSLionel Sambucclass ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm, 30f4a2713aSLionel Sambuc InstrItinClass arg_ri> { 31f4a2713aSLionel Sambuc InstrItinClass rr = arg_rr; 32f4a2713aSLionel Sambuc InstrItinClass rm = arg_rm; 33f4a2713aSLionel Sambuc InstrItinClass ri = arg_ri; 34f4a2713aSLionel Sambuc} 35f4a2713aSLionel Sambuc 36f4a2713aSLionel Sambuc 37f4a2713aSLionel Sambuc// scalar 38f4a2713aSLionel Sambuclet Sched = WriteFAdd in { 39f4a2713aSLionel Sambucdef SSE_ALU_F32S : OpndItins< 40f4a2713aSLionel Sambuc IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM 41f4a2713aSLionel Sambuc>; 42f4a2713aSLionel Sambuc 43f4a2713aSLionel Sambucdef SSE_ALU_F64S : OpndItins< 44f4a2713aSLionel Sambuc IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM 45f4a2713aSLionel Sambuc>; 46f4a2713aSLionel Sambuc} 47f4a2713aSLionel Sambuc 48f4a2713aSLionel Sambucdef SSE_ALU_ITINS_S : SizeItins< 49f4a2713aSLionel Sambuc SSE_ALU_F32S, SSE_ALU_F64S 50f4a2713aSLionel Sambuc>; 51f4a2713aSLionel Sambuc 52f4a2713aSLionel Sambuclet Sched = WriteFMul in { 53f4a2713aSLionel Sambucdef SSE_MUL_F32S : OpndItins< 54f4a2713aSLionel Sambuc IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM 55f4a2713aSLionel Sambuc>; 56f4a2713aSLionel Sambuc 57f4a2713aSLionel Sambucdef SSE_MUL_F64S : OpndItins< 58f4a2713aSLionel Sambuc IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM 59f4a2713aSLionel Sambuc>; 60f4a2713aSLionel Sambuc} 61f4a2713aSLionel Sambuc 62f4a2713aSLionel Sambucdef SSE_MUL_ITINS_S : SizeItins< 63f4a2713aSLionel Sambuc SSE_MUL_F32S, SSE_MUL_F64S 64f4a2713aSLionel Sambuc>; 65f4a2713aSLionel Sambuc 66f4a2713aSLionel Sambuclet Sched = WriteFDiv in { 67f4a2713aSLionel Sambucdef SSE_DIV_F32S : OpndItins< 68f4a2713aSLionel Sambuc IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM 69f4a2713aSLionel Sambuc>; 70f4a2713aSLionel Sambuc 71f4a2713aSLionel Sambucdef SSE_DIV_F64S : OpndItins< 72f4a2713aSLionel Sambuc IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM 73f4a2713aSLionel Sambuc>; 74f4a2713aSLionel Sambuc} 75f4a2713aSLionel Sambuc 76f4a2713aSLionel Sambucdef SSE_DIV_ITINS_S : SizeItins< 77f4a2713aSLionel Sambuc SSE_DIV_F32S, SSE_DIV_F64S 78f4a2713aSLionel Sambuc>; 79f4a2713aSLionel Sambuc 80f4a2713aSLionel Sambuc// parallel 81f4a2713aSLionel Sambuclet Sched = WriteFAdd in { 82f4a2713aSLionel Sambucdef SSE_ALU_F32P : OpndItins< 83f4a2713aSLionel Sambuc IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM 84f4a2713aSLionel Sambuc>; 85f4a2713aSLionel Sambuc 86f4a2713aSLionel Sambucdef SSE_ALU_F64P : OpndItins< 87f4a2713aSLionel Sambuc IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM 88f4a2713aSLionel Sambuc>; 89f4a2713aSLionel Sambuc} 90f4a2713aSLionel Sambuc 91f4a2713aSLionel Sambucdef SSE_ALU_ITINS_P : SizeItins< 92f4a2713aSLionel Sambuc SSE_ALU_F32P, SSE_ALU_F64P 93f4a2713aSLionel Sambuc>; 94f4a2713aSLionel Sambuc 95f4a2713aSLionel Sambuclet Sched = WriteFMul in { 96f4a2713aSLionel Sambucdef SSE_MUL_F32P : OpndItins< 97f4a2713aSLionel Sambuc IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM 98f4a2713aSLionel Sambuc>; 99f4a2713aSLionel Sambuc 100f4a2713aSLionel Sambucdef SSE_MUL_F64P : OpndItins< 101f4a2713aSLionel Sambuc IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM 102f4a2713aSLionel Sambuc>; 103f4a2713aSLionel Sambuc} 104f4a2713aSLionel Sambuc 105f4a2713aSLionel Sambucdef SSE_MUL_ITINS_P : SizeItins< 106f4a2713aSLionel Sambuc SSE_MUL_F32P, SSE_MUL_F64P 107f4a2713aSLionel Sambuc>; 108f4a2713aSLionel Sambuc 109f4a2713aSLionel Sambuclet Sched = WriteFDiv in { 110f4a2713aSLionel Sambucdef SSE_DIV_F32P : OpndItins< 111f4a2713aSLionel Sambuc IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM 112f4a2713aSLionel Sambuc>; 113f4a2713aSLionel Sambuc 114f4a2713aSLionel Sambucdef SSE_DIV_F64P : OpndItins< 115f4a2713aSLionel Sambuc IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM 116f4a2713aSLionel Sambuc>; 117f4a2713aSLionel Sambuc} 118f4a2713aSLionel Sambuc 119f4a2713aSLionel Sambucdef SSE_DIV_ITINS_P : SizeItins< 120f4a2713aSLionel Sambuc SSE_DIV_F32P, SSE_DIV_F64P 121f4a2713aSLionel Sambuc>; 122f4a2713aSLionel Sambuc 123*0a6a1f1dSLionel Sambuclet Sched = WriteVecLogic in 124*0a6a1f1dSLionel Sambucdef SSE_VEC_BIT_ITINS_P : OpndItins< 125*0a6a1f1dSLionel Sambuc IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM 126*0a6a1f1dSLionel Sambuc>; 127*0a6a1f1dSLionel Sambuc 128f4a2713aSLionel Sambucdef SSE_BIT_ITINS_P : OpndItins< 129f4a2713aSLionel Sambuc IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM 130f4a2713aSLionel Sambuc>; 131f4a2713aSLionel Sambuc 132f4a2713aSLionel Sambuclet Sched = WriteVecALU in { 133f4a2713aSLionel Sambucdef SSE_INTALU_ITINS_P : OpndItins< 134f4a2713aSLionel Sambuc IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM 135f4a2713aSLionel Sambuc>; 136f4a2713aSLionel Sambuc 137f4a2713aSLionel Sambucdef SSE_INTALUQ_ITINS_P : OpndItins< 138f4a2713aSLionel Sambuc IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM 139f4a2713aSLionel Sambuc>; 140f4a2713aSLionel Sambuc} 141f4a2713aSLionel Sambuc 142f4a2713aSLionel Sambuclet Sched = WriteVecIMul in 143f4a2713aSLionel Sambucdef SSE_INTMUL_ITINS_P : OpndItins< 144f4a2713aSLionel Sambuc IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM 145f4a2713aSLionel Sambuc>; 146f4a2713aSLionel Sambuc 147f4a2713aSLionel Sambucdef SSE_INTSHIFT_ITINS_P : ShiftOpndItins< 148f4a2713aSLionel Sambuc IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM, IIC_SSE_INTSH_P_RI 149f4a2713aSLionel Sambuc>; 150f4a2713aSLionel Sambuc 151f4a2713aSLionel Sambucdef SSE_MOVA_ITINS : OpndItins< 152f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RR, IIC_SSE_MOVA_P_RM 153f4a2713aSLionel Sambuc>; 154f4a2713aSLionel Sambuc 155f4a2713aSLionel Sambucdef SSE_MOVU_ITINS : OpndItins< 156f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM 157f4a2713aSLionel Sambuc>; 158f4a2713aSLionel Sambuc 159f4a2713aSLionel Sambucdef SSE_DPPD_ITINS : OpndItins< 160f4a2713aSLionel Sambuc IIC_SSE_DPPD_RR, IIC_SSE_DPPD_RM 161f4a2713aSLionel Sambuc>; 162f4a2713aSLionel Sambuc 163f4a2713aSLionel Sambucdef SSE_DPPS_ITINS : OpndItins< 164f4a2713aSLionel Sambuc IIC_SSE_DPPS_RR, IIC_SSE_DPPD_RM 165f4a2713aSLionel Sambuc>; 166f4a2713aSLionel Sambuc 167f4a2713aSLionel Sambucdef DEFAULT_ITINS : OpndItins< 168f4a2713aSLionel Sambuc IIC_ALU_NONMEM, IIC_ALU_MEM 169f4a2713aSLionel Sambuc>; 170f4a2713aSLionel Sambuc 171f4a2713aSLionel Sambucdef SSE_EXTRACT_ITINS : OpndItins< 172f4a2713aSLionel Sambuc IIC_SSE_EXTRACTPS_RR, IIC_SSE_EXTRACTPS_RM 173f4a2713aSLionel Sambuc>; 174f4a2713aSLionel Sambuc 175f4a2713aSLionel Sambucdef SSE_INSERT_ITINS : OpndItins< 176f4a2713aSLionel Sambuc IIC_SSE_INSERTPS_RR, IIC_SSE_INSERTPS_RM 177f4a2713aSLionel Sambuc>; 178f4a2713aSLionel Sambuc 179*0a6a1f1dSLionel Sambuclet Sched = WriteMPSAD in 180f4a2713aSLionel Sambucdef SSE_MPSADBW_ITINS : OpndItins< 181f4a2713aSLionel Sambuc IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM 182f4a2713aSLionel Sambuc>; 183f4a2713aSLionel Sambuc 184*0a6a1f1dSLionel Sambuclet Sched = WriteVecIMul in 185f4a2713aSLionel Sambucdef SSE_PMULLD_ITINS : OpndItins< 186f4a2713aSLionel Sambuc IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM 187f4a2713aSLionel Sambuc>; 188f4a2713aSLionel Sambuc 189*0a6a1f1dSLionel Sambuc// Definitions for backward compatibility. 190*0a6a1f1dSLionel Sambuc// The instructions mapped on these definitions uses a different itinerary 191*0a6a1f1dSLionel Sambuc// than the actual scheduling model. 192*0a6a1f1dSLionel Sambuclet Sched = WriteShuffle in 193*0a6a1f1dSLionel Sambucdef DEFAULT_ITINS_SHUFFLESCHED : OpndItins< 194*0a6a1f1dSLionel Sambuc IIC_ALU_NONMEM, IIC_ALU_MEM 195*0a6a1f1dSLionel Sambuc>; 196*0a6a1f1dSLionel Sambuc 197*0a6a1f1dSLionel Sambuclet Sched = WriteVecIMul in 198*0a6a1f1dSLionel Sambucdef DEFAULT_ITINS_VECIMULSCHED : OpndItins< 199*0a6a1f1dSLionel Sambuc IIC_ALU_NONMEM, IIC_ALU_MEM 200*0a6a1f1dSLionel Sambuc>; 201*0a6a1f1dSLionel Sambuc 202*0a6a1f1dSLionel Sambuclet Sched = WriteShuffle in 203*0a6a1f1dSLionel Sambucdef SSE_INTALU_ITINS_SHUFF_P : OpndItins< 204*0a6a1f1dSLionel Sambuc IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM 205*0a6a1f1dSLionel Sambuc>; 206*0a6a1f1dSLionel Sambuc 207*0a6a1f1dSLionel Sambuclet Sched = WriteMPSAD in 208*0a6a1f1dSLionel Sambucdef DEFAULT_ITINS_MPSADSCHED : OpndItins< 209*0a6a1f1dSLionel Sambuc IIC_ALU_NONMEM, IIC_ALU_MEM 210*0a6a1f1dSLionel Sambuc>; 211*0a6a1f1dSLionel Sambuc 212*0a6a1f1dSLionel Sambuclet Sched = WriteFBlend in 213*0a6a1f1dSLionel Sambucdef DEFAULT_ITINS_FBLENDSCHED : OpndItins< 214*0a6a1f1dSLionel Sambuc IIC_ALU_NONMEM, IIC_ALU_MEM 215*0a6a1f1dSLionel Sambuc>; 216*0a6a1f1dSLionel Sambuc 217*0a6a1f1dSLionel Sambuclet Sched = WriteBlend in 218*0a6a1f1dSLionel Sambucdef DEFAULT_ITINS_BLENDSCHED : OpndItins< 219*0a6a1f1dSLionel Sambuc IIC_ALU_NONMEM, IIC_ALU_MEM 220*0a6a1f1dSLionel Sambuc>; 221*0a6a1f1dSLionel Sambuc 222*0a6a1f1dSLionel Sambuclet Sched = WriteVarBlend in 223*0a6a1f1dSLionel Sambucdef DEFAULT_ITINS_VARBLENDSCHED : OpndItins< 224*0a6a1f1dSLionel Sambuc IIC_ALU_NONMEM, IIC_ALU_MEM 225*0a6a1f1dSLionel Sambuc>; 226*0a6a1f1dSLionel Sambuc 227*0a6a1f1dSLionel Sambuclet Sched = WriteFBlend in 228*0a6a1f1dSLionel Sambucdef SSE_INTALU_ITINS_FBLEND_P : OpndItins< 229*0a6a1f1dSLionel Sambuc IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM 230*0a6a1f1dSLionel Sambuc>; 231*0a6a1f1dSLionel Sambuc 232*0a6a1f1dSLionel Sambuclet Sched = WriteBlend in 233*0a6a1f1dSLionel Sambucdef SSE_INTALU_ITINS_BLEND_P : OpndItins< 234*0a6a1f1dSLionel Sambuc IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM 235*0a6a1f1dSLionel Sambuc>; 236*0a6a1f1dSLionel Sambuc 237f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 238f4a2713aSLionel Sambuc// SSE 1 & 2 Instructions Classes 239f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 240f4a2713aSLionel Sambuc 241f4a2713aSLionel Sambuc/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class 242f4a2713aSLionel Sambucmulticlass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 243f4a2713aSLionel Sambuc RegisterClass RC, X86MemOperand x86memop, 244f4a2713aSLionel Sambuc OpndItins itins, 245f4a2713aSLionel Sambuc bit Is2Addr = 1> { 246f4a2713aSLionel Sambuc let isCommutable = 1 in { 247f4a2713aSLionel Sambuc def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 248f4a2713aSLionel Sambuc !if(Is2Addr, 249f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 250f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 251f4a2713aSLionel Sambuc [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>, 252f4a2713aSLionel Sambuc Sched<[itins.Sched]>; 253f4a2713aSLionel Sambuc } 254f4a2713aSLionel Sambuc def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 255f4a2713aSLionel Sambuc !if(Is2Addr, 256f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 257f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 258f4a2713aSLionel Sambuc [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>, 259f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 260f4a2713aSLionel Sambuc} 261f4a2713aSLionel Sambuc 262f4a2713aSLionel Sambuc/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class 263f4a2713aSLionel Sambucmulticlass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, 264f4a2713aSLionel Sambuc string asm, string SSEVer, string FPSizeStr, 265f4a2713aSLionel Sambuc Operand memopr, ComplexPattern mem_cpat, 266f4a2713aSLionel Sambuc OpndItins itins, 267f4a2713aSLionel Sambuc bit Is2Addr = 1> { 268*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in { 269f4a2713aSLionel Sambuc def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 270f4a2713aSLionel Sambuc !if(Is2Addr, 271f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 272f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 273f4a2713aSLionel Sambuc [(set RC:$dst, (!cast<Intrinsic>( 274f4a2713aSLionel Sambuc !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) 275f4a2713aSLionel Sambuc RC:$src1, RC:$src2))], itins.rr>, 276f4a2713aSLionel Sambuc Sched<[itins.Sched]>; 277f4a2713aSLionel Sambuc def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), 278f4a2713aSLionel Sambuc !if(Is2Addr, 279f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 280f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 281f4a2713aSLionel Sambuc [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", 282f4a2713aSLionel Sambuc SSEVer, "_", OpcodeStr, FPSizeStr)) 283f4a2713aSLionel Sambuc RC:$src1, mem_cpat:$src2))], itins.rm>, 284f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 285f4a2713aSLionel Sambuc} 286*0a6a1f1dSLionel Sambuc} 287f4a2713aSLionel Sambuc 288f4a2713aSLionel Sambuc/// sse12_fp_packed - SSE 1 & 2 packed instructions class 289f4a2713aSLionel Sambucmulticlass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, 290f4a2713aSLionel Sambuc RegisterClass RC, ValueType vt, 291f4a2713aSLionel Sambuc X86MemOperand x86memop, PatFrag mem_frag, 292f4a2713aSLionel Sambuc Domain d, OpndItins itins, bit Is2Addr = 1> { 293f4a2713aSLionel Sambuc let isCommutable = 1 in 294f4a2713aSLionel Sambuc def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 295f4a2713aSLionel Sambuc !if(Is2Addr, 296f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 297f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 298f4a2713aSLionel Sambuc [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>, 299f4a2713aSLionel Sambuc Sched<[itins.Sched]>; 300f4a2713aSLionel Sambuc let mayLoad = 1 in 301f4a2713aSLionel Sambuc def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 302f4a2713aSLionel Sambuc !if(Is2Addr, 303f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 304f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 305f4a2713aSLionel Sambuc [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], 306f4a2713aSLionel Sambuc itins.rm, d>, 307f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 308f4a2713aSLionel Sambuc} 309f4a2713aSLionel Sambuc 310f4a2713aSLionel Sambuc/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class 311f4a2713aSLionel Sambucmulticlass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, 312f4a2713aSLionel Sambuc string OpcodeStr, X86MemOperand x86memop, 313f4a2713aSLionel Sambuc list<dag> pat_rr, list<dag> pat_rm, 314f4a2713aSLionel Sambuc bit Is2Addr = 1> { 315f4a2713aSLionel Sambuc let isCommutable = 1, hasSideEffects = 0 in 316f4a2713aSLionel Sambuc def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 317f4a2713aSLionel Sambuc !if(Is2Addr, 318f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 319f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 320f4a2713aSLionel Sambuc pat_rr, NoItinerary, d>, 321f4a2713aSLionel Sambuc Sched<[WriteVecLogic]>; 322f4a2713aSLionel Sambuc def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 323f4a2713aSLionel Sambuc !if(Is2Addr, 324f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 325f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 326f4a2713aSLionel Sambuc pat_rm, NoItinerary, d>, 327f4a2713aSLionel Sambuc Sched<[WriteVecLogicLd, ReadAfterLd]>; 328f4a2713aSLionel Sambuc} 329f4a2713aSLionel Sambuc 330f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 331f4a2713aSLionel Sambuc// Non-instruction patterns 332f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 333f4a2713aSLionel Sambuc 334f4a2713aSLionel Sambuc// A vector extract of the first f32/f64 position is a subregister copy 335f4a2713aSLionel Sambucdef : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 336f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>; 337f4a2713aSLionel Sambucdef : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 338f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>; 339f4a2713aSLionel Sambuc 340f4a2713aSLionel Sambuc// A 128-bit subvector extract from the first 256-bit vector position 341f4a2713aSLionel Sambuc// is a subregister copy that needs no instruction. 342f4a2713aSLionel Sambucdef : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (iPTR 0))), 343f4a2713aSLionel Sambuc (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>; 344f4a2713aSLionel Sambucdef : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (iPTR 0))), 345f4a2713aSLionel Sambuc (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>; 346f4a2713aSLionel Sambuc 347f4a2713aSLionel Sambucdef : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (iPTR 0))), 348f4a2713aSLionel Sambuc (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>; 349f4a2713aSLionel Sambucdef : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (iPTR 0))), 350f4a2713aSLionel Sambuc (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>; 351f4a2713aSLionel Sambuc 352f4a2713aSLionel Sambucdef : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), 353f4a2713aSLionel Sambuc (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>; 354f4a2713aSLionel Sambucdef : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), 355f4a2713aSLionel Sambuc (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>; 356f4a2713aSLionel Sambuc 357f4a2713aSLionel Sambuc// A 128-bit subvector insert to the first 256-bit vector position 358f4a2713aSLionel Sambuc// is a subregister copy that needs no instruction. 359f4a2713aSLionel Sambuclet AddedComplexity = 25 in { // to give priority over vinsertf128rm 360f4a2713aSLionel Sambucdef : Pat<(insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)), 361f4a2713aSLionel Sambuc (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 362f4a2713aSLionel Sambucdef : Pat<(insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)), 363f4a2713aSLionel Sambuc (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 364f4a2713aSLionel Sambucdef : Pat<(insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)), 365f4a2713aSLionel Sambuc (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 366f4a2713aSLionel Sambucdef : Pat<(insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)), 367f4a2713aSLionel Sambuc (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 368f4a2713aSLionel Sambucdef : Pat<(insert_subvector undef, (v8i16 VR128:$src), (iPTR 0)), 369f4a2713aSLionel Sambuc (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 370f4a2713aSLionel Sambucdef : Pat<(insert_subvector undef, (v16i8 VR128:$src), (iPTR 0)), 371f4a2713aSLionel Sambuc (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 372f4a2713aSLionel Sambuc} 373f4a2713aSLionel Sambuc 374f4a2713aSLionel Sambuc// Implicitly promote a 32-bit scalar to a vector. 375f4a2713aSLionel Sambucdef : Pat<(v4f32 (scalar_to_vector FR32:$src)), 376f4a2713aSLionel Sambuc (COPY_TO_REGCLASS FR32:$src, VR128)>; 377f4a2713aSLionel Sambucdef : Pat<(v8f32 (scalar_to_vector FR32:$src)), 378f4a2713aSLionel Sambuc (COPY_TO_REGCLASS FR32:$src, VR128)>; 379f4a2713aSLionel Sambuc// Implicitly promote a 64-bit scalar to a vector. 380f4a2713aSLionel Sambucdef : Pat<(v2f64 (scalar_to_vector FR64:$src)), 381f4a2713aSLionel Sambuc (COPY_TO_REGCLASS FR64:$src, VR128)>; 382f4a2713aSLionel Sambucdef : Pat<(v4f64 (scalar_to_vector FR64:$src)), 383f4a2713aSLionel Sambuc (COPY_TO_REGCLASS FR64:$src, VR128)>; 384f4a2713aSLionel Sambuc 385f4a2713aSLionel Sambuc// Bitcasts between 128-bit vector types. Return the original type since 386f4a2713aSLionel Sambuc// no instruction is needed for the conversion 387f4a2713aSLionel Sambuclet Predicates = [HasSSE2] in { 388f4a2713aSLionel Sambuc def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; 389f4a2713aSLionel Sambuc def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; 390f4a2713aSLionel Sambuc def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; 391f4a2713aSLionel Sambuc def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; 392f4a2713aSLionel Sambuc def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; 393f4a2713aSLionel Sambuc def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; 394f4a2713aSLionel Sambuc def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; 395f4a2713aSLionel Sambuc def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; 396f4a2713aSLionel Sambuc def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; 397f4a2713aSLionel Sambuc def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; 398f4a2713aSLionel Sambuc def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; 399f4a2713aSLionel Sambuc def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; 400f4a2713aSLionel Sambuc def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; 401f4a2713aSLionel Sambuc def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; 402f4a2713aSLionel Sambuc def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; 403f4a2713aSLionel Sambuc def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; 404f4a2713aSLionel Sambuc def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; 405f4a2713aSLionel Sambuc def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; 406f4a2713aSLionel Sambuc def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; 407f4a2713aSLionel Sambuc def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; 408f4a2713aSLionel Sambuc def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; 409f4a2713aSLionel Sambuc def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; 410f4a2713aSLionel Sambuc def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; 411f4a2713aSLionel Sambuc def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; 412f4a2713aSLionel Sambuc def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; 413f4a2713aSLionel Sambuc def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; 414f4a2713aSLionel Sambuc def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; 415f4a2713aSLionel Sambuc def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; 416f4a2713aSLionel Sambuc def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; 417f4a2713aSLionel Sambuc def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; 418f4a2713aSLionel Sambuc} 419f4a2713aSLionel Sambuc 420f4a2713aSLionel Sambuc// Bitcasts between 256-bit vector types. Return the original type since 421f4a2713aSLionel Sambuc// no instruction is needed for the conversion 422f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 423f4a2713aSLionel Sambuc def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; 424f4a2713aSLionel Sambuc def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>; 425f4a2713aSLionel Sambuc def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; 426f4a2713aSLionel Sambuc def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>; 427f4a2713aSLionel Sambuc def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; 428f4a2713aSLionel Sambuc def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>; 429f4a2713aSLionel Sambuc def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; 430f4a2713aSLionel Sambuc def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; 431f4a2713aSLionel Sambuc def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; 432f4a2713aSLionel Sambuc def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>; 433f4a2713aSLionel Sambuc def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; 434f4a2713aSLionel Sambuc def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>; 435f4a2713aSLionel Sambuc def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; 436f4a2713aSLionel Sambuc def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; 437f4a2713aSLionel Sambuc def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>; 438f4a2713aSLionel Sambuc def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; 439f4a2713aSLionel Sambuc def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; 440f4a2713aSLionel Sambuc def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; 441f4a2713aSLionel Sambuc def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; 442f4a2713aSLionel Sambuc def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>; 443f4a2713aSLionel Sambuc def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; 444f4a2713aSLionel Sambuc def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>; 445f4a2713aSLionel Sambuc def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>; 446f4a2713aSLionel Sambuc def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>; 447f4a2713aSLionel Sambuc def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>; 448f4a2713aSLionel Sambuc def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>; 449f4a2713aSLionel Sambuc def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>; 450f4a2713aSLionel Sambuc def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>; 451f4a2713aSLionel Sambuc def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>; 452f4a2713aSLionel Sambuc def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>; 453f4a2713aSLionel Sambuc} 454f4a2713aSLionel Sambuc 455f4a2713aSLionel Sambuc// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 456f4a2713aSLionel Sambuc// This is expanded by ExpandPostRAPseudos. 457f4a2713aSLionel Sambuclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 458f4a2713aSLionel Sambuc isPseudo = 1, SchedRW = [WriteZero] in { 459f4a2713aSLionel Sambuc def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", 460f4a2713aSLionel Sambuc [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>; 461f4a2713aSLionel Sambuc def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", 462f4a2713aSLionel Sambuc [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>; 463f4a2713aSLionel Sambuc} 464f4a2713aSLionel Sambuc 465f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 466f4a2713aSLionel Sambuc// AVX & SSE - Zero/One Vectors 467f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 468f4a2713aSLionel Sambuc 469f4a2713aSLionel Sambuc// Alias instruction that maps zero vector to pxor / xorp* for sse. 470f4a2713aSLionel Sambuc// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 471f4a2713aSLionel Sambuc// swizzled by ExecutionDepsFix to pxor. 472f4a2713aSLionel Sambuc// We set canFoldAsLoad because this can be converted to a constant-pool 473f4a2713aSLionel Sambuc// load of an all-zeros value if folding it would be beneficial. 474f4a2713aSLionel Sambuclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 475f4a2713aSLionel Sambuc isPseudo = 1, SchedRW = [WriteZero] in { 476f4a2713aSLionel Sambucdef V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 477f4a2713aSLionel Sambuc [(set VR128:$dst, (v4f32 immAllZerosV))]>; 478f4a2713aSLionel Sambuc} 479f4a2713aSLionel Sambuc 480f4a2713aSLionel Sambucdef : Pat<(v2f64 immAllZerosV), (V_SET0)>; 481f4a2713aSLionel Sambucdef : Pat<(v4i32 immAllZerosV), (V_SET0)>; 482f4a2713aSLionel Sambucdef : Pat<(v2i64 immAllZerosV), (V_SET0)>; 483f4a2713aSLionel Sambucdef : Pat<(v8i16 immAllZerosV), (V_SET0)>; 484f4a2713aSLionel Sambucdef : Pat<(v16i8 immAllZerosV), (V_SET0)>; 485f4a2713aSLionel Sambuc 486f4a2713aSLionel Sambuc 487f4a2713aSLionel Sambuc// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, 488f4a2713aSLionel Sambuc// and doesn't need it because on sandy bridge the register is set to zero 489f4a2713aSLionel Sambuc// at the rename stage without using any execution unit, so SET0PSY 490f4a2713aSLionel Sambuc// and SET0PDY can be used for vector int instructions without penalty 491f4a2713aSLionel Sambuclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 492f4a2713aSLionel Sambuc isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in { 493f4a2713aSLionel Sambucdef AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", 494f4a2713aSLionel Sambuc [(set VR256:$dst, (v8f32 immAllZerosV))]>; 495f4a2713aSLionel Sambuc} 496f4a2713aSLionel Sambuc 497f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 498f4a2713aSLionel Sambuc def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; 499f4a2713aSLionel Sambuc 500f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 501f4a2713aSLionel Sambuc def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; 502f4a2713aSLionel Sambuc def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>; 503f4a2713aSLionel Sambuc def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; 504f4a2713aSLionel Sambuc def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; 505f4a2713aSLionel Sambuc} 506f4a2713aSLionel Sambuc 507f4a2713aSLionel Sambuc// AVX1 has no support for 256-bit integer instructions, but since the 128-bit 508f4a2713aSLionel Sambuc// VPXOR instruction writes zero to its upper part, it's safe build zeros. 509f4a2713aSLionel Sambuclet Predicates = [HasAVX1Only] in { 510f4a2713aSLionel Sambucdef : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; 511f4a2713aSLionel Sambucdef : Pat<(bc_v32i8 (v8f32 immAllZerosV)), 512f4a2713aSLionel Sambuc (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; 513f4a2713aSLionel Sambuc 514f4a2713aSLionel Sambucdef : Pat<(v16i16 immAllZerosV), (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>; 515f4a2713aSLionel Sambucdef : Pat<(bc_v16i16 (v8f32 immAllZerosV)), 516f4a2713aSLionel Sambuc (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>; 517f4a2713aSLionel Sambuc 518f4a2713aSLionel Sambucdef : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; 519f4a2713aSLionel Sambucdef : Pat<(bc_v8i32 (v8f32 immAllZerosV)), 520f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; 521f4a2713aSLionel Sambuc 522f4a2713aSLionel Sambucdef : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; 523f4a2713aSLionel Sambucdef : Pat<(bc_v4i64 (v8f32 immAllZerosV)), 524f4a2713aSLionel Sambuc (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; 525f4a2713aSLionel Sambuc} 526f4a2713aSLionel Sambuc 527f4a2713aSLionel Sambuc// We set canFoldAsLoad because this can be converted to a constant-pool 528f4a2713aSLionel Sambuc// load of an all-ones value if folding it would be beneficial. 529f4a2713aSLionel Sambuclet isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 530f4a2713aSLionel Sambuc isPseudo = 1, SchedRW = [WriteZero] in { 531f4a2713aSLionel Sambuc def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", 532f4a2713aSLionel Sambuc [(set VR128:$dst, (v4i32 immAllOnesV))]>; 533f4a2713aSLionel Sambuc let Predicates = [HasAVX2] in 534f4a2713aSLionel Sambuc def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", 535f4a2713aSLionel Sambuc [(set VR256:$dst, (v8i32 immAllOnesV))]>; 536f4a2713aSLionel Sambuc} 537f4a2713aSLionel Sambuc 538f4a2713aSLionel Sambuc 539f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 540f4a2713aSLionel Sambuc// SSE 1 & 2 - Move FP Scalar Instructions 541f4a2713aSLionel Sambuc// 542f4a2713aSLionel Sambuc// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 543f4a2713aSLionel Sambuc// register copies because it's a partial register update; Register-to-register 544f4a2713aSLionel Sambuc// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires 545f4a2713aSLionel Sambuc// that the insert be implementable in terms of a copy, and just mentioned, we 546f4a2713aSLionel Sambuc// don't use movss/movsd for copies. 547f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 548f4a2713aSLionel Sambuc 549f4a2713aSLionel Sambucmulticlass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, 550f4a2713aSLionel Sambuc X86MemOperand x86memop, string base_opc, 551f4a2713aSLionel Sambuc string asm_opr> { 552f4a2713aSLionel Sambuc def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), 553f4a2713aSLionel Sambuc (ins VR128:$src1, RC:$src2), 554f4a2713aSLionel Sambuc !strconcat(base_opc, asm_opr), 555f4a2713aSLionel Sambuc [(set VR128:$dst, (vt (OpNode VR128:$src1, 556f4a2713aSLionel Sambuc (scalar_to_vector RC:$src2))))], 557*0a6a1f1dSLionel Sambuc IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>; 558f4a2713aSLionel Sambuc 559f4a2713aSLionel Sambuc // For the disassembler 560*0a6a1f1dSLionel Sambuc let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 561f4a2713aSLionel Sambuc def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 562f4a2713aSLionel Sambuc (ins VR128:$src1, RC:$src2), 563f4a2713aSLionel Sambuc !strconcat(base_opc, asm_opr), 564*0a6a1f1dSLionel Sambuc [], IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>; 565f4a2713aSLionel Sambuc} 566f4a2713aSLionel Sambuc 567f4a2713aSLionel Sambucmulticlass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, 568f4a2713aSLionel Sambuc X86MemOperand x86memop, string OpcodeStr> { 569f4a2713aSLionel Sambuc // AVX 570f4a2713aSLionel Sambuc defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, 571f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, 572f4a2713aSLionel Sambuc VEX_4V, VEX_LIG; 573f4a2713aSLionel Sambuc 574f4a2713aSLionel Sambuc def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 575f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 576f4a2713aSLionel Sambuc [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, 577f4a2713aSLionel Sambuc VEX, VEX_LIG, Sched<[WriteStore]>; 578f4a2713aSLionel Sambuc // SSE1 & 2 579f4a2713aSLionel Sambuc let Constraints = "$src1 = $dst" in { 580f4a2713aSLionel Sambuc defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, 581f4a2713aSLionel Sambuc "\t{$src2, $dst|$dst, $src2}">; 582f4a2713aSLionel Sambuc } 583f4a2713aSLionel Sambuc 584f4a2713aSLionel Sambuc def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 585f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 586f4a2713aSLionel Sambuc [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, 587f4a2713aSLionel Sambuc Sched<[WriteStore]>; 588f4a2713aSLionel Sambuc} 589f4a2713aSLionel Sambuc 590f4a2713aSLionel Sambuc// Loading from memory automatically zeroing upper bits. 591f4a2713aSLionel Sambucmulticlass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, 592f4a2713aSLionel Sambuc PatFrag mem_pat, string OpcodeStr> { 593f4a2713aSLionel Sambuc def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 594f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 595f4a2713aSLionel Sambuc [(set RC:$dst, (mem_pat addr:$src))], 596f4a2713aSLionel Sambuc IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>; 597f4a2713aSLionel Sambuc def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 598f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 599f4a2713aSLionel Sambuc [(set RC:$dst, (mem_pat addr:$src))], 600f4a2713aSLionel Sambuc IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>; 601f4a2713aSLionel Sambuc} 602f4a2713aSLionel Sambuc 603f4a2713aSLionel Sambucdefm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS; 604f4a2713aSLionel Sambucdefm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd">, XD; 605f4a2713aSLionel Sambuc 606f4a2713aSLionel Sambuclet canFoldAsLoad = 1, isReMaterializable = 1 in { 607f4a2713aSLionel Sambuc defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS; 608f4a2713aSLionel Sambuc 609f4a2713aSLionel Sambuc let AddedComplexity = 20 in 610f4a2713aSLionel Sambuc defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD; 611f4a2713aSLionel Sambuc} 612f4a2713aSLionel Sambuc 613f4a2713aSLionel Sambuc// Patterns 614f4a2713aSLionel Sambuclet Predicates = [UseAVX] in { 615f4a2713aSLionel Sambuc let AddedComplexity = 20 in { 616f4a2713aSLionel Sambuc // MOVSSrm zeros the high parts of the register; represent this 617f4a2713aSLionel Sambuc // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 618f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 619f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 620f4a2713aSLionel Sambuc def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 621f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 622f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), 623f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 624f4a2713aSLionel Sambuc 625f4a2713aSLionel Sambuc // MOVSDrm zeros the high parts of the register; represent this 626f4a2713aSLionel Sambuc // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 627f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 628f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 629f4a2713aSLionel Sambuc def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 630f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 631f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), 632f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 633f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), 634f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 635f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86vzload addr:$src)), 636f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 637f4a2713aSLionel Sambuc 638f4a2713aSLionel Sambuc // Represent the same patterns above but in the form they appear for 639f4a2713aSLionel Sambuc // 256-bit types 640f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 641f4a2713aSLionel Sambuc (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), 642f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 643f4a2713aSLionel Sambuc def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, 644f4a2713aSLionel Sambuc (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), 645f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 646f4a2713aSLionel Sambuc def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, 647f4a2713aSLionel Sambuc (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), 648f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; 649f4a2713aSLionel Sambuc } 650f4a2713aSLionel Sambuc def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 651f4a2713aSLionel Sambuc (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), 652f4a2713aSLionel Sambuc (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>; 653f4a2713aSLionel Sambuc 654f4a2713aSLionel Sambuc // Extract and store. 655f4a2713aSLionel Sambuc def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 656f4a2713aSLionel Sambuc addr:$dst), 657f4a2713aSLionel Sambuc (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>; 658f4a2713aSLionel Sambuc def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 659f4a2713aSLionel Sambuc addr:$dst), 660f4a2713aSLionel Sambuc (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>; 661f4a2713aSLionel Sambuc 662f4a2713aSLionel Sambuc // Shuffle with VMOVSS 663f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), 664f4a2713aSLionel Sambuc (VMOVSSrr (v4i32 VR128:$src1), 665f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>; 666f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 667f4a2713aSLionel Sambuc (VMOVSSrr (v4f32 VR128:$src1), 668f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>; 669f4a2713aSLionel Sambuc 670f4a2713aSLionel Sambuc // 256-bit variants 671f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)), 672f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), 673f4a2713aSLionel Sambuc (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_xmm), 674f4a2713aSLionel Sambuc (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_xmm)), 675f4a2713aSLionel Sambuc sub_xmm)>; 676f4a2713aSLionel Sambuc def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)), 677f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), 678f4a2713aSLionel Sambuc (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_xmm), 679f4a2713aSLionel Sambuc (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_xmm)), 680f4a2713aSLionel Sambuc sub_xmm)>; 681f4a2713aSLionel Sambuc 682f4a2713aSLionel Sambuc // Shuffle with VMOVSD 683f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), 684f4a2713aSLionel Sambuc (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 685f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 686f4a2713aSLionel Sambuc (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 687f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), 688f4a2713aSLionel Sambuc (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 689f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), 690f4a2713aSLionel Sambuc (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 691f4a2713aSLionel Sambuc 692f4a2713aSLionel Sambuc // 256-bit variants 693f4a2713aSLionel Sambuc def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)), 694f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), 695f4a2713aSLionel Sambuc (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_xmm), 696f4a2713aSLionel Sambuc (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_xmm)), 697f4a2713aSLionel Sambuc sub_xmm)>; 698f4a2713aSLionel Sambuc def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)), 699f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), 700f4a2713aSLionel Sambuc (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_xmm), 701f4a2713aSLionel Sambuc (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)), 702f4a2713aSLionel Sambuc sub_xmm)>; 703f4a2713aSLionel Sambuc 704f4a2713aSLionel Sambuc // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem 705f4a2713aSLionel Sambuc // is during lowering, where it's not possible to recognize the fold cause 706f4a2713aSLionel Sambuc // it has two uses through a bitcast. One use disappears at isel time and the 707f4a2713aSLionel Sambuc // fold opportunity reappears. 708f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), 709f4a2713aSLionel Sambuc (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 710f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), 711f4a2713aSLionel Sambuc (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 712f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), 713f4a2713aSLionel Sambuc (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 714f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 715f4a2713aSLionel Sambuc (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 716f4a2713aSLionel Sambuc} 717f4a2713aSLionel Sambuc 718f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in { 719*0a6a1f1dSLionel Sambuc let Predicates = [NoSSE41], AddedComplexity = 15 in { 720f4a2713aSLionel Sambuc // Move scalar to XMM zero-extended, zeroing a VR128 then do a 721f4a2713aSLionel Sambuc // MOVSS to the lower bits. 722f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), 723f4a2713aSLionel Sambuc (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; 724f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 725f4a2713aSLionel Sambuc (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 726f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 727f4a2713aSLionel Sambuc (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 728f4a2713aSLionel Sambuc } 729f4a2713aSLionel Sambuc 730f4a2713aSLionel Sambuc let AddedComplexity = 20 in { 731f4a2713aSLionel Sambuc // MOVSSrm already zeros the high parts of the register. 732f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 733f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 734f4a2713aSLionel Sambuc def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 735f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 736f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), 737f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 738f4a2713aSLionel Sambuc } 739f4a2713aSLionel Sambuc 740f4a2713aSLionel Sambuc // Extract and store. 741f4a2713aSLionel Sambuc def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 742f4a2713aSLionel Sambuc addr:$dst), 743f4a2713aSLionel Sambuc (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>; 744f4a2713aSLionel Sambuc 745f4a2713aSLionel Sambuc // Shuffle with MOVSS 746f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), 747f4a2713aSLionel Sambuc (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; 748f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 749f4a2713aSLionel Sambuc (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; 750f4a2713aSLionel Sambuc} 751f4a2713aSLionel Sambuc 752f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in { 753*0a6a1f1dSLionel Sambuc let Predicates = [NoSSE41], AddedComplexity = 15 in { 754f4a2713aSLionel Sambuc // Move scalar to XMM zero-extended, zeroing a VR128 then do a 755f4a2713aSLionel Sambuc // MOVSD to the lower bits. 756f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), 757f4a2713aSLionel Sambuc (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; 758f4a2713aSLionel Sambuc } 759f4a2713aSLionel Sambuc 760f4a2713aSLionel Sambuc let AddedComplexity = 20 in { 761f4a2713aSLionel Sambuc // MOVSDrm already zeros the high parts of the register. 762f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 763f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 764f4a2713aSLionel Sambuc def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 765f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 766f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), 767f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 768f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), 769f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 770f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86vzload addr:$src)), 771f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 772f4a2713aSLionel Sambuc } 773f4a2713aSLionel Sambuc 774f4a2713aSLionel Sambuc // Extract and store. 775f4a2713aSLionel Sambuc def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 776f4a2713aSLionel Sambuc addr:$dst), 777f4a2713aSLionel Sambuc (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>; 778f4a2713aSLionel Sambuc 779f4a2713aSLionel Sambuc // Shuffle with MOVSD 780f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), 781f4a2713aSLionel Sambuc (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 782f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 783f4a2713aSLionel Sambuc (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 784f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), 785f4a2713aSLionel Sambuc (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 786f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), 787f4a2713aSLionel Sambuc (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 788f4a2713aSLionel Sambuc 789f4a2713aSLionel Sambuc // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem 790f4a2713aSLionel Sambuc // is during lowering, where it's not possible to recognize the fold cause 791f4a2713aSLionel Sambuc // it has two uses through a bitcast. One use disappears at isel time and the 792f4a2713aSLionel Sambuc // fold opportunity reappears. 793f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), 794f4a2713aSLionel Sambuc (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 795f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), 796f4a2713aSLionel Sambuc (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 797f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), 798f4a2713aSLionel Sambuc (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 799f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 800f4a2713aSLionel Sambuc (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 801f4a2713aSLionel Sambuc} 802f4a2713aSLionel Sambuc 803f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 804f4a2713aSLionel Sambuc// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions 805f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 806f4a2713aSLionel Sambuc 807f4a2713aSLionel Sambucmulticlass sse12_mov_packed<bits<8> opc, RegisterClass RC, 808f4a2713aSLionel Sambuc X86MemOperand x86memop, PatFrag ld_frag, 809f4a2713aSLionel Sambuc string asm, Domain d, 810f4a2713aSLionel Sambuc OpndItins itins, 811f4a2713aSLionel Sambuc bit IsReMaterializable = 1> { 812*0a6a1f1dSLionel Sambuclet hasSideEffects = 0 in 813f4a2713aSLionel Sambuc def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 814f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>, 815*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffle]>; 816f4a2713aSLionel Sambuclet canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in 817f4a2713aSLionel Sambuc def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 818f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 819f4a2713aSLionel Sambuc [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>, 820f4a2713aSLionel Sambuc Sched<[WriteLoad]>; 821f4a2713aSLionel Sambuc} 822f4a2713aSLionel Sambuc 823*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, NoVLX] in { 824f4a2713aSLionel Sambucdefm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, 825f4a2713aSLionel Sambuc "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 826*0a6a1f1dSLionel Sambuc PS, VEX; 827f4a2713aSLionel Sambucdefm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, 828f4a2713aSLionel Sambuc "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 829*0a6a1f1dSLionel Sambuc PD, VEX; 830f4a2713aSLionel Sambucdefm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, 831f4a2713aSLionel Sambuc "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 832*0a6a1f1dSLionel Sambuc PS, VEX; 833f4a2713aSLionel Sambucdefm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, 834f4a2713aSLionel Sambuc "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 835*0a6a1f1dSLionel Sambuc PD, VEX; 836f4a2713aSLionel Sambuc 837f4a2713aSLionel Sambucdefm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, 838f4a2713aSLionel Sambuc "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 839*0a6a1f1dSLionel Sambuc PS, VEX, VEX_L; 840f4a2713aSLionel Sambucdefm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, 841f4a2713aSLionel Sambuc "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 842*0a6a1f1dSLionel Sambuc PD, VEX, VEX_L; 843f4a2713aSLionel Sambucdefm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, 844f4a2713aSLionel Sambuc "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 845*0a6a1f1dSLionel Sambuc PS, VEX, VEX_L; 846f4a2713aSLionel Sambucdefm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, 847f4a2713aSLionel Sambuc "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 848*0a6a1f1dSLionel Sambuc PD, VEX, VEX_L; 849*0a6a1f1dSLionel Sambuc} 850*0a6a1f1dSLionel Sambuc 851*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE1] in { 852f4a2713aSLionel Sambucdefm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, 853f4a2713aSLionel Sambuc "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 854*0a6a1f1dSLionel Sambuc PS; 855f4a2713aSLionel Sambucdefm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, 856f4a2713aSLionel Sambuc "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 857*0a6a1f1dSLionel Sambuc PS; 858*0a6a1f1dSLionel Sambuc} 859*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE2] in { 860*0a6a1f1dSLionel Sambucdefm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, 861*0a6a1f1dSLionel Sambuc "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 862*0a6a1f1dSLionel Sambuc PD; 863f4a2713aSLionel Sambucdefm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, 864f4a2713aSLionel Sambuc "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 865*0a6a1f1dSLionel Sambuc PD; 866*0a6a1f1dSLionel Sambuc} 867f4a2713aSLionel Sambuc 868*0a6a1f1dSLionel Sambuclet SchedRW = [WriteStore], Predicates = [HasAVX, NoVLX] in { 869f4a2713aSLionel Sambucdef VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 870f4a2713aSLionel Sambuc "movaps\t{$src, $dst|$dst, $src}", 871f4a2713aSLionel Sambuc [(alignedstore (v4f32 VR128:$src), addr:$dst)], 872f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_MR>, VEX; 873f4a2713aSLionel Sambucdef VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 874f4a2713aSLionel Sambuc "movapd\t{$src, $dst|$dst, $src}", 875f4a2713aSLionel Sambuc [(alignedstore (v2f64 VR128:$src), addr:$dst)], 876f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_MR>, VEX; 877f4a2713aSLionel Sambucdef VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 878f4a2713aSLionel Sambuc "movups\t{$src, $dst|$dst, $src}", 879f4a2713aSLionel Sambuc [(store (v4f32 VR128:$src), addr:$dst)], 880f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_MR>, VEX; 881f4a2713aSLionel Sambucdef VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 882f4a2713aSLionel Sambuc "movupd\t{$src, $dst|$dst, $src}", 883f4a2713aSLionel Sambuc [(store (v2f64 VR128:$src), addr:$dst)], 884f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_MR>, VEX; 885f4a2713aSLionel Sambucdef VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 886f4a2713aSLionel Sambuc "movaps\t{$src, $dst|$dst, $src}", 887f4a2713aSLionel Sambuc [(alignedstore256 (v8f32 VR256:$src), addr:$dst)], 888f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_MR>, VEX, VEX_L; 889f4a2713aSLionel Sambucdef VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 890f4a2713aSLionel Sambuc "movapd\t{$src, $dst|$dst, $src}", 891f4a2713aSLionel Sambuc [(alignedstore256 (v4f64 VR256:$src), addr:$dst)], 892f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_MR>, VEX, VEX_L; 893f4a2713aSLionel Sambucdef VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 894f4a2713aSLionel Sambuc "movups\t{$src, $dst|$dst, $src}", 895f4a2713aSLionel Sambuc [(store (v8f32 VR256:$src), addr:$dst)], 896f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_MR>, VEX, VEX_L; 897f4a2713aSLionel Sambucdef VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 898f4a2713aSLionel Sambuc "movupd\t{$src, $dst|$dst, $src}", 899f4a2713aSLionel Sambuc [(store (v4f64 VR256:$src), addr:$dst)], 900f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_MR>, VEX, VEX_L; 901f4a2713aSLionel Sambuc} // SchedRW 902f4a2713aSLionel Sambuc 903f4a2713aSLionel Sambuc// For disassembler 904*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 905*0a6a1f1dSLionel Sambuc SchedRW = [WriteFShuffle] in { 906f4a2713aSLionel Sambuc def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), 907f4a2713aSLionel Sambuc (ins VR128:$src), 908f4a2713aSLionel Sambuc "movaps\t{$src, $dst|$dst, $src}", [], 909f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RR>, VEX; 910f4a2713aSLionel Sambuc def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), 911f4a2713aSLionel Sambuc (ins VR128:$src), 912f4a2713aSLionel Sambuc "movapd\t{$src, $dst|$dst, $src}", [], 913f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RR>, VEX; 914f4a2713aSLionel Sambuc def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), 915f4a2713aSLionel Sambuc (ins VR128:$src), 916f4a2713aSLionel Sambuc "movups\t{$src, $dst|$dst, $src}", [], 917f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_RR>, VEX; 918f4a2713aSLionel Sambuc def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), 919f4a2713aSLionel Sambuc (ins VR128:$src), 920f4a2713aSLionel Sambuc "movupd\t{$src, $dst|$dst, $src}", [], 921f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_RR>, VEX; 922f4a2713aSLionel Sambuc def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), 923f4a2713aSLionel Sambuc (ins VR256:$src), 924f4a2713aSLionel Sambuc "movaps\t{$src, $dst|$dst, $src}", [], 925f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 926f4a2713aSLionel Sambuc def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), 927f4a2713aSLionel Sambuc (ins VR256:$src), 928f4a2713aSLionel Sambuc "movapd\t{$src, $dst|$dst, $src}", [], 929f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 930f4a2713aSLionel Sambuc def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), 931f4a2713aSLionel Sambuc (ins VR256:$src), 932f4a2713aSLionel Sambuc "movups\t{$src, $dst|$dst, $src}", [], 933f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 934f4a2713aSLionel Sambuc def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), 935f4a2713aSLionel Sambuc (ins VR256:$src), 936f4a2713aSLionel Sambuc "movupd\t{$src, $dst|$dst, $src}", [], 937f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 938f4a2713aSLionel Sambuc} 939f4a2713aSLionel Sambuc 940f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 941f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86vzmovl 942f4a2713aSLionel Sambuc (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))), 943f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 944f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86vzmovl 945f4a2713aSLionel Sambuc (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))), 946f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 947f4a2713aSLionel Sambucdef : Pat<(v8f32 (X86vzmovl 948f4a2713aSLionel Sambuc (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))), 949f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 950f4a2713aSLionel Sambucdef : Pat<(v4f64 (X86vzmovl 951f4a2713aSLionel Sambuc (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))), 952f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 953f4a2713aSLionel Sambuc} 954f4a2713aSLionel Sambuc 955f4a2713aSLionel Sambuc 956f4a2713aSLionel Sambucdef : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), 957f4a2713aSLionel Sambuc (VMOVUPSYmr addr:$dst, VR256:$src)>; 958f4a2713aSLionel Sambucdef : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), 959f4a2713aSLionel Sambuc (VMOVUPDYmr addr:$dst, VR256:$src)>; 960f4a2713aSLionel Sambuc 961f4a2713aSLionel Sambuclet SchedRW = [WriteStore] in { 962f4a2713aSLionel Sambucdef MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 963f4a2713aSLionel Sambuc "movaps\t{$src, $dst|$dst, $src}", 964f4a2713aSLionel Sambuc [(alignedstore (v4f32 VR128:$src), addr:$dst)], 965f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_MR>; 966f4a2713aSLionel Sambucdef MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 967f4a2713aSLionel Sambuc "movapd\t{$src, $dst|$dst, $src}", 968f4a2713aSLionel Sambuc [(alignedstore (v2f64 VR128:$src), addr:$dst)], 969f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_MR>; 970f4a2713aSLionel Sambucdef MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 971f4a2713aSLionel Sambuc "movups\t{$src, $dst|$dst, $src}", 972f4a2713aSLionel Sambuc [(store (v4f32 VR128:$src), addr:$dst)], 973f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_MR>; 974f4a2713aSLionel Sambucdef MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 975f4a2713aSLionel Sambuc "movupd\t{$src, $dst|$dst, $src}", 976f4a2713aSLionel Sambuc [(store (v2f64 VR128:$src), addr:$dst)], 977f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_MR>; 978f4a2713aSLionel Sambuc} // SchedRW 979f4a2713aSLionel Sambuc 980f4a2713aSLionel Sambuc// For disassembler 981*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 982*0a6a1f1dSLionel Sambuc SchedRW = [WriteFShuffle] in { 983f4a2713aSLionel Sambuc def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 984f4a2713aSLionel Sambuc "movaps\t{$src, $dst|$dst, $src}", [], 985f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RR>; 986f4a2713aSLionel Sambuc def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 987f4a2713aSLionel Sambuc "movapd\t{$src, $dst|$dst, $src}", [], 988f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RR>; 989f4a2713aSLionel Sambuc def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 990f4a2713aSLionel Sambuc "movups\t{$src, $dst|$dst, $src}", [], 991f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_RR>; 992f4a2713aSLionel Sambuc def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 993f4a2713aSLionel Sambuc "movupd\t{$src, $dst|$dst, $src}", [], 994f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_RR>; 995f4a2713aSLionel Sambuc} 996f4a2713aSLionel Sambuc 997f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 998f4a2713aSLionel Sambuc def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), 999f4a2713aSLionel Sambuc (VMOVUPSmr addr:$dst, VR128:$src)>; 1000f4a2713aSLionel Sambuc def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), 1001f4a2713aSLionel Sambuc (VMOVUPDmr addr:$dst, VR128:$src)>; 1002f4a2713aSLionel Sambuc} 1003f4a2713aSLionel Sambuc 1004f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in 1005f4a2713aSLionel Sambuc def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), 1006f4a2713aSLionel Sambuc (MOVUPSmr addr:$dst, VR128:$src)>; 1007f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in 1008f4a2713aSLionel Sambuc def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), 1009f4a2713aSLionel Sambuc (MOVUPDmr addr:$dst, VR128:$src)>; 1010f4a2713aSLionel Sambuc 1011f4a2713aSLionel Sambuc// Use vmovaps/vmovups for AVX integer load/store. 1012*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, NoVLX] in { 1013f4a2713aSLionel Sambuc // 128-bit load/store 1014f4a2713aSLionel Sambuc def : Pat<(alignedloadv2i64 addr:$src), 1015f4a2713aSLionel Sambuc (VMOVAPSrm addr:$src)>; 1016f4a2713aSLionel Sambuc def : Pat<(loadv2i64 addr:$src), 1017f4a2713aSLionel Sambuc (VMOVUPSrm addr:$src)>; 1018f4a2713aSLionel Sambuc 1019f4a2713aSLionel Sambuc def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 1020f4a2713aSLionel Sambuc (VMOVAPSmr addr:$dst, VR128:$src)>; 1021f4a2713aSLionel Sambuc def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 1022f4a2713aSLionel Sambuc (VMOVAPSmr addr:$dst, VR128:$src)>; 1023f4a2713aSLionel Sambuc def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 1024f4a2713aSLionel Sambuc (VMOVAPSmr addr:$dst, VR128:$src)>; 1025f4a2713aSLionel Sambuc def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 1026f4a2713aSLionel Sambuc (VMOVAPSmr addr:$dst, VR128:$src)>; 1027f4a2713aSLionel Sambuc def : Pat<(store (v2i64 VR128:$src), addr:$dst), 1028f4a2713aSLionel Sambuc (VMOVUPSmr addr:$dst, VR128:$src)>; 1029f4a2713aSLionel Sambuc def : Pat<(store (v4i32 VR128:$src), addr:$dst), 1030f4a2713aSLionel Sambuc (VMOVUPSmr addr:$dst, VR128:$src)>; 1031f4a2713aSLionel Sambuc def : Pat<(store (v8i16 VR128:$src), addr:$dst), 1032f4a2713aSLionel Sambuc (VMOVUPSmr addr:$dst, VR128:$src)>; 1033f4a2713aSLionel Sambuc def : Pat<(store (v16i8 VR128:$src), addr:$dst), 1034f4a2713aSLionel Sambuc (VMOVUPSmr addr:$dst, VR128:$src)>; 1035f4a2713aSLionel Sambuc 1036f4a2713aSLionel Sambuc // 256-bit load/store 1037f4a2713aSLionel Sambuc def : Pat<(alignedloadv4i64 addr:$src), 1038f4a2713aSLionel Sambuc (VMOVAPSYrm addr:$src)>; 1039f4a2713aSLionel Sambuc def : Pat<(loadv4i64 addr:$src), 1040f4a2713aSLionel Sambuc (VMOVUPSYrm addr:$src)>; 1041f4a2713aSLionel Sambuc def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst), 1042f4a2713aSLionel Sambuc (VMOVAPSYmr addr:$dst, VR256:$src)>; 1043f4a2713aSLionel Sambuc def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst), 1044f4a2713aSLionel Sambuc (VMOVAPSYmr addr:$dst, VR256:$src)>; 1045f4a2713aSLionel Sambuc def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst), 1046f4a2713aSLionel Sambuc (VMOVAPSYmr addr:$dst, VR256:$src)>; 1047f4a2713aSLionel Sambuc def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst), 1048f4a2713aSLionel Sambuc (VMOVAPSYmr addr:$dst, VR256:$src)>; 1049f4a2713aSLionel Sambuc def : Pat<(store (v4i64 VR256:$src), addr:$dst), 1050f4a2713aSLionel Sambuc (VMOVUPSYmr addr:$dst, VR256:$src)>; 1051f4a2713aSLionel Sambuc def : Pat<(store (v8i32 VR256:$src), addr:$dst), 1052f4a2713aSLionel Sambuc (VMOVUPSYmr addr:$dst, VR256:$src)>; 1053f4a2713aSLionel Sambuc def : Pat<(store (v16i16 VR256:$src), addr:$dst), 1054f4a2713aSLionel Sambuc (VMOVUPSYmr addr:$dst, VR256:$src)>; 1055f4a2713aSLionel Sambuc def : Pat<(store (v32i8 VR256:$src), addr:$dst), 1056f4a2713aSLionel Sambuc (VMOVUPSYmr addr:$dst, VR256:$src)>; 1057f4a2713aSLionel Sambuc 1058f4a2713aSLionel Sambuc // Special patterns for storing subvector extracts of lower 128-bits 1059f4a2713aSLionel Sambuc // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr 1060f4a2713aSLionel Sambuc def : Pat<(alignedstore (v2f64 (extract_subvector 1061f4a2713aSLionel Sambuc (v4f64 VR256:$src), (iPTR 0))), addr:$dst), 1062f4a2713aSLionel Sambuc (VMOVAPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1063f4a2713aSLionel Sambuc def : Pat<(alignedstore (v4f32 (extract_subvector 1064f4a2713aSLionel Sambuc (v8f32 VR256:$src), (iPTR 0))), addr:$dst), 1065f4a2713aSLionel Sambuc (VMOVAPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1066f4a2713aSLionel Sambuc def : Pat<(alignedstore (v2i64 (extract_subvector 1067f4a2713aSLionel Sambuc (v4i64 VR256:$src), (iPTR 0))), addr:$dst), 1068f4a2713aSLionel Sambuc (VMOVAPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1069f4a2713aSLionel Sambuc def : Pat<(alignedstore (v4i32 (extract_subvector 1070f4a2713aSLionel Sambuc (v8i32 VR256:$src), (iPTR 0))), addr:$dst), 1071f4a2713aSLionel Sambuc (VMOVAPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1072f4a2713aSLionel Sambuc def : Pat<(alignedstore (v8i16 (extract_subvector 1073f4a2713aSLionel Sambuc (v16i16 VR256:$src), (iPTR 0))), addr:$dst), 1074f4a2713aSLionel Sambuc (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1075f4a2713aSLionel Sambuc def : Pat<(alignedstore (v16i8 (extract_subvector 1076f4a2713aSLionel Sambuc (v32i8 VR256:$src), (iPTR 0))), addr:$dst), 1077f4a2713aSLionel Sambuc (VMOVAPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1078f4a2713aSLionel Sambuc 1079f4a2713aSLionel Sambuc def : Pat<(store (v2f64 (extract_subvector 1080f4a2713aSLionel Sambuc (v4f64 VR256:$src), (iPTR 0))), addr:$dst), 1081f4a2713aSLionel Sambuc (VMOVUPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1082f4a2713aSLionel Sambuc def : Pat<(store (v4f32 (extract_subvector 1083f4a2713aSLionel Sambuc (v8f32 VR256:$src), (iPTR 0))), addr:$dst), 1084f4a2713aSLionel Sambuc (VMOVUPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1085f4a2713aSLionel Sambuc def : Pat<(store (v2i64 (extract_subvector 1086f4a2713aSLionel Sambuc (v4i64 VR256:$src), (iPTR 0))), addr:$dst), 1087f4a2713aSLionel Sambuc (VMOVUPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1088f4a2713aSLionel Sambuc def : Pat<(store (v4i32 (extract_subvector 1089f4a2713aSLionel Sambuc (v8i32 VR256:$src), (iPTR 0))), addr:$dst), 1090f4a2713aSLionel Sambuc (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1091f4a2713aSLionel Sambuc def : Pat<(store (v8i16 (extract_subvector 1092f4a2713aSLionel Sambuc (v16i16 VR256:$src), (iPTR 0))), addr:$dst), 1093f4a2713aSLionel Sambuc (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1094f4a2713aSLionel Sambuc def : Pat<(store (v16i8 (extract_subvector 1095f4a2713aSLionel Sambuc (v32i8 VR256:$src), (iPTR 0))), addr:$dst), 1096f4a2713aSLionel Sambuc (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1097f4a2713aSLionel Sambuc} 1098f4a2713aSLionel Sambuc 1099f4a2713aSLionel Sambuc// Use movaps / movups for SSE integer load / store (one byte shorter). 1100f4a2713aSLionel Sambuc// The instructions selected below are then converted to MOVDQA/MOVDQU 1101f4a2713aSLionel Sambuc// during the SSE domain pass. 1102f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in { 1103f4a2713aSLionel Sambuc def : Pat<(alignedloadv2i64 addr:$src), 1104f4a2713aSLionel Sambuc (MOVAPSrm addr:$src)>; 1105f4a2713aSLionel Sambuc def : Pat<(loadv2i64 addr:$src), 1106f4a2713aSLionel Sambuc (MOVUPSrm addr:$src)>; 1107f4a2713aSLionel Sambuc 1108f4a2713aSLionel Sambuc def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 1109f4a2713aSLionel Sambuc (MOVAPSmr addr:$dst, VR128:$src)>; 1110f4a2713aSLionel Sambuc def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 1111f4a2713aSLionel Sambuc (MOVAPSmr addr:$dst, VR128:$src)>; 1112f4a2713aSLionel Sambuc def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 1113f4a2713aSLionel Sambuc (MOVAPSmr addr:$dst, VR128:$src)>; 1114f4a2713aSLionel Sambuc def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 1115f4a2713aSLionel Sambuc (MOVAPSmr addr:$dst, VR128:$src)>; 1116f4a2713aSLionel Sambuc def : Pat<(store (v2i64 VR128:$src), addr:$dst), 1117f4a2713aSLionel Sambuc (MOVUPSmr addr:$dst, VR128:$src)>; 1118f4a2713aSLionel Sambuc def : Pat<(store (v4i32 VR128:$src), addr:$dst), 1119f4a2713aSLionel Sambuc (MOVUPSmr addr:$dst, VR128:$src)>; 1120f4a2713aSLionel Sambuc def : Pat<(store (v8i16 VR128:$src), addr:$dst), 1121f4a2713aSLionel Sambuc (MOVUPSmr addr:$dst, VR128:$src)>; 1122f4a2713aSLionel Sambuc def : Pat<(store (v16i8 VR128:$src), addr:$dst), 1123f4a2713aSLionel Sambuc (MOVUPSmr addr:$dst, VR128:$src)>; 1124f4a2713aSLionel Sambuc} 1125f4a2713aSLionel Sambuc 1126f4a2713aSLionel Sambuc// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper 1127f4a2713aSLionel Sambuc// bits are disregarded. FIXME: Set encoding to pseudo! 1128f4a2713aSLionel Sambuclet canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { 1129f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in { 1130f4a2713aSLionel Sambuc def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), 1131f4a2713aSLionel Sambuc "movaps\t{$src, $dst|$dst, $src}", 1132f4a2713aSLionel Sambuc [(set FR32:$dst, (alignedloadfsf32 addr:$src))], 1133f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RM>, VEX; 1134f4a2713aSLionel Sambuc def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), 1135f4a2713aSLionel Sambuc "movapd\t{$src, $dst|$dst, $src}", 1136f4a2713aSLionel Sambuc [(set FR64:$dst, (alignedloadfsf64 addr:$src))], 1137f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RM>, VEX; 1138f4a2713aSLionel Sambuc def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), 1139f4a2713aSLionel Sambuc "movaps\t{$src, $dst|$dst, $src}", 1140f4a2713aSLionel Sambuc [(set FR32:$dst, (alignedloadfsf32 addr:$src))], 1141f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RM>; 1142f4a2713aSLionel Sambuc def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), 1143f4a2713aSLionel Sambuc "movapd\t{$src, $dst|$dst, $src}", 1144f4a2713aSLionel Sambuc [(set FR64:$dst, (alignedloadfsf64 addr:$src))], 1145f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RM>; 1146f4a2713aSLionel Sambuc} 1147f4a2713aSLionel Sambuc} 1148f4a2713aSLionel Sambuc 1149f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 1150f4a2713aSLionel Sambuc// SSE 1 & 2 - Move Low packed FP Instructions 1151f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 1152f4a2713aSLionel Sambuc 1153f4a2713aSLionel Sambucmulticlass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode, 1154f4a2713aSLionel Sambuc string base_opc, string asm_opr, 1155f4a2713aSLionel Sambuc InstrItinClass itin> { 1156f4a2713aSLionel Sambuc def PSrm : PI<opc, MRMSrcMem, 1157f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 1158f4a2713aSLionel Sambuc !strconcat(base_opc, "s", asm_opr), 1159f4a2713aSLionel Sambuc [(set VR128:$dst, 1160f4a2713aSLionel Sambuc (psnode VR128:$src1, 1161f4a2713aSLionel Sambuc (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], 1162*0a6a1f1dSLionel Sambuc itin, SSEPackedSingle>, PS, 1163*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffleLd, ReadAfterLd]>; 1164f4a2713aSLionel Sambuc 1165f4a2713aSLionel Sambuc def PDrm : PI<opc, MRMSrcMem, 1166f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 1167f4a2713aSLionel Sambuc !strconcat(base_opc, "d", asm_opr), 1168f4a2713aSLionel Sambuc [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, 1169f4a2713aSLionel Sambuc (scalar_to_vector (loadf64 addr:$src2)))))], 1170*0a6a1f1dSLionel Sambuc itin, SSEPackedDouble>, PD, 1171*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffleLd, ReadAfterLd]>; 1172f4a2713aSLionel Sambuc 1173f4a2713aSLionel Sambuc} 1174f4a2713aSLionel Sambuc 1175f4a2713aSLionel Sambucmulticlass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode, 1176f4a2713aSLionel Sambuc string base_opc, InstrItinClass itin> { 1177f4a2713aSLionel Sambuc defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc, 1178f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1179f4a2713aSLionel Sambuc itin>, VEX_4V; 1180f4a2713aSLionel Sambuc 1181f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in 1182f4a2713aSLionel Sambuc defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc, 1183f4a2713aSLionel Sambuc "\t{$src2, $dst|$dst, $src2}", 1184f4a2713aSLionel Sambuc itin>; 1185f4a2713aSLionel Sambuc} 1186f4a2713aSLionel Sambuc 1187f4a2713aSLionel Sambuclet AddedComplexity = 20 in { 1188f4a2713aSLionel Sambuc defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp", 1189f4a2713aSLionel Sambuc IIC_SSE_MOV_LH>; 1190f4a2713aSLionel Sambuc} 1191f4a2713aSLionel Sambuc 1192f4a2713aSLionel Sambuclet SchedRW = [WriteStore] in { 1193f4a2713aSLionel Sambucdef VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1194f4a2713aSLionel Sambuc "movlps\t{$src, $dst|$dst, $src}", 1195f4a2713aSLionel Sambuc [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), 1196f4a2713aSLionel Sambuc (iPTR 0))), addr:$dst)], 1197f4a2713aSLionel Sambuc IIC_SSE_MOV_LH>, VEX; 1198f4a2713aSLionel Sambucdef VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1199f4a2713aSLionel Sambuc "movlpd\t{$src, $dst|$dst, $src}", 1200f4a2713aSLionel Sambuc [(store (f64 (vector_extract (v2f64 VR128:$src), 1201f4a2713aSLionel Sambuc (iPTR 0))), addr:$dst)], 1202f4a2713aSLionel Sambuc IIC_SSE_MOV_LH>, VEX; 1203f4a2713aSLionel Sambucdef MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1204f4a2713aSLionel Sambuc "movlps\t{$src, $dst|$dst, $src}", 1205f4a2713aSLionel Sambuc [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), 1206f4a2713aSLionel Sambuc (iPTR 0))), addr:$dst)], 1207f4a2713aSLionel Sambuc IIC_SSE_MOV_LH>; 1208f4a2713aSLionel Sambucdef MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1209f4a2713aSLionel Sambuc "movlpd\t{$src, $dst|$dst, $src}", 1210f4a2713aSLionel Sambuc [(store (f64 (vector_extract (v2f64 VR128:$src), 1211f4a2713aSLionel Sambuc (iPTR 0))), addr:$dst)], 1212f4a2713aSLionel Sambuc IIC_SSE_MOV_LH>; 1213f4a2713aSLionel Sambuc} // SchedRW 1214f4a2713aSLionel Sambuc 1215f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 1216f4a2713aSLionel Sambuc // Shuffle with VMOVLPS 1217f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), 1218f4a2713aSLionel Sambuc (VMOVLPSrm VR128:$src1, addr:$src2)>; 1219f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), 1220f4a2713aSLionel Sambuc (VMOVLPSrm VR128:$src1, addr:$src2)>; 1221f4a2713aSLionel Sambuc 1222f4a2713aSLionel Sambuc // Shuffle with VMOVLPD 1223f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1224f4a2713aSLionel Sambuc (VMOVLPDrm VR128:$src1, addr:$src2)>; 1225f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1226f4a2713aSLionel Sambuc (VMOVLPDrm VR128:$src1, addr:$src2)>; 1227*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd VR128:$src1, 1228*0a6a1f1dSLionel Sambuc (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), 1229*0a6a1f1dSLionel Sambuc (VMOVLPDrm VR128:$src1, addr:$src2)>; 1230f4a2713aSLionel Sambuc 1231f4a2713aSLionel Sambuc // Store patterns 1232f4a2713aSLionel Sambuc def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), 1233f4a2713aSLionel Sambuc addr:$src1), 1234f4a2713aSLionel Sambuc (VMOVLPSmr addr:$src1, VR128:$src2)>; 1235f4a2713aSLionel Sambuc def : Pat<(store (v4i32 (X86Movlps 1236f4a2713aSLionel Sambuc (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), 1237f4a2713aSLionel Sambuc (VMOVLPSmr addr:$src1, VR128:$src2)>; 1238f4a2713aSLionel Sambuc def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1239f4a2713aSLionel Sambuc addr:$src1), 1240f4a2713aSLionel Sambuc (VMOVLPDmr addr:$src1, VR128:$src2)>; 1241f4a2713aSLionel Sambuc def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1242f4a2713aSLionel Sambuc addr:$src1), 1243f4a2713aSLionel Sambuc (VMOVLPDmr addr:$src1, VR128:$src2)>; 1244f4a2713aSLionel Sambuc} 1245f4a2713aSLionel Sambuc 1246f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in { 1247f4a2713aSLionel Sambuc // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS 1248f4a2713aSLionel Sambuc def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)), 1249f4a2713aSLionel Sambuc (iPTR 0))), addr:$src1), 1250f4a2713aSLionel Sambuc (MOVLPSmr addr:$src1, VR128:$src2)>; 1251f4a2713aSLionel Sambuc 1252f4a2713aSLionel Sambuc // Shuffle with MOVLPS 1253f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), 1254f4a2713aSLionel Sambuc (MOVLPSrm VR128:$src1, addr:$src2)>; 1255f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), 1256f4a2713aSLionel Sambuc (MOVLPSrm VR128:$src1, addr:$src2)>; 1257f4a2713aSLionel Sambuc def : Pat<(X86Movlps VR128:$src1, 1258f4a2713aSLionel Sambuc (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1259f4a2713aSLionel Sambuc (MOVLPSrm VR128:$src1, addr:$src2)>; 1260f4a2713aSLionel Sambuc 1261f4a2713aSLionel Sambuc // Store patterns 1262f4a2713aSLionel Sambuc def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), 1263f4a2713aSLionel Sambuc addr:$src1), 1264f4a2713aSLionel Sambuc (MOVLPSmr addr:$src1, VR128:$src2)>; 1265f4a2713aSLionel Sambuc def : Pat<(store (v4i32 (X86Movlps 1266f4a2713aSLionel Sambuc (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), 1267f4a2713aSLionel Sambuc addr:$src1), 1268f4a2713aSLionel Sambuc (MOVLPSmr addr:$src1, VR128:$src2)>; 1269f4a2713aSLionel Sambuc} 1270f4a2713aSLionel Sambuc 1271f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in { 1272f4a2713aSLionel Sambuc // Shuffle with MOVLPD 1273f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1274f4a2713aSLionel Sambuc (MOVLPDrm VR128:$src1, addr:$src2)>; 1275f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1276f4a2713aSLionel Sambuc (MOVLPDrm VR128:$src1, addr:$src2)>; 1277*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd VR128:$src1, 1278*0a6a1f1dSLionel Sambuc (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), 1279*0a6a1f1dSLionel Sambuc (MOVLPDrm VR128:$src1, addr:$src2)>; 1280f4a2713aSLionel Sambuc 1281f4a2713aSLionel Sambuc // Store patterns 1282f4a2713aSLionel Sambuc def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1283f4a2713aSLionel Sambuc addr:$src1), 1284f4a2713aSLionel Sambuc (MOVLPDmr addr:$src1, VR128:$src2)>; 1285f4a2713aSLionel Sambuc def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1286f4a2713aSLionel Sambuc addr:$src1), 1287f4a2713aSLionel Sambuc (MOVLPDmr addr:$src1, VR128:$src2)>; 1288f4a2713aSLionel Sambuc} 1289f4a2713aSLionel Sambuc 1290f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 1291f4a2713aSLionel Sambuc// SSE 1 & 2 - Move Hi packed FP Instructions 1292f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 1293f4a2713aSLionel Sambuc 1294f4a2713aSLionel Sambuclet AddedComplexity = 20 in { 1295f4a2713aSLionel Sambuc defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp", 1296f4a2713aSLionel Sambuc IIC_SSE_MOV_LH>; 1297f4a2713aSLionel Sambuc} 1298f4a2713aSLionel Sambuc 1299f4a2713aSLionel Sambuclet SchedRW = [WriteStore] in { 1300f4a2713aSLionel Sambuc// v2f64 extract element 1 is always custom lowered to unpack high to low 1301f4a2713aSLionel Sambuc// and extract element 0 so the non-store version isn't too horrible. 1302f4a2713aSLionel Sambucdef VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1303f4a2713aSLionel Sambuc "movhps\t{$src, $dst|$dst, $src}", 1304f4a2713aSLionel Sambuc [(store (f64 (vector_extract 1305f4a2713aSLionel Sambuc (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), 1306f4a2713aSLionel Sambuc (bc_v2f64 (v4f32 VR128:$src))), 1307f4a2713aSLionel Sambuc (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; 1308f4a2713aSLionel Sambucdef VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1309f4a2713aSLionel Sambuc "movhpd\t{$src, $dst|$dst, $src}", 1310f4a2713aSLionel Sambuc [(store (f64 (vector_extract 1311f4a2713aSLionel Sambuc (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 1312f4a2713aSLionel Sambuc (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; 1313f4a2713aSLionel Sambucdef MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1314f4a2713aSLionel Sambuc "movhps\t{$src, $dst|$dst, $src}", 1315f4a2713aSLionel Sambuc [(store (f64 (vector_extract 1316f4a2713aSLionel Sambuc (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), 1317f4a2713aSLionel Sambuc (bc_v2f64 (v4f32 VR128:$src))), 1318f4a2713aSLionel Sambuc (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; 1319f4a2713aSLionel Sambucdef MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1320f4a2713aSLionel Sambuc "movhpd\t{$src, $dst|$dst, $src}", 1321f4a2713aSLionel Sambuc [(store (f64 (vector_extract 1322f4a2713aSLionel Sambuc (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 1323f4a2713aSLionel Sambuc (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; 1324f4a2713aSLionel Sambuc} // SchedRW 1325f4a2713aSLionel Sambuc 1326f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 1327f4a2713aSLionel Sambuc // VMOVHPS patterns 1328f4a2713aSLionel Sambuc def : Pat<(X86Movlhps VR128:$src1, 1329f4a2713aSLionel Sambuc (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1330f4a2713aSLionel Sambuc (VMOVHPSrm VR128:$src1, addr:$src2)>; 1331f4a2713aSLionel Sambuc def : Pat<(X86Movlhps VR128:$src1, 1332f4a2713aSLionel Sambuc (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), 1333f4a2713aSLionel Sambuc (VMOVHPSrm VR128:$src1, addr:$src2)>; 1334f4a2713aSLionel Sambuc 1335*0a6a1f1dSLionel Sambuc // VMOVHPD patterns 1336*0a6a1f1dSLionel Sambuc 1337f4a2713aSLionel Sambuc // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem 1338f4a2713aSLionel Sambuc // is during lowering, where it's not possible to recognize the load fold 1339f4a2713aSLionel Sambuc // cause it has two uses through a bitcast. One use disappears at isel time 1340f4a2713aSLionel Sambuc // and the fold opportunity reappears. 1341f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86Unpckl VR128:$src1, 1342f4a2713aSLionel Sambuc (scalar_to_vector (loadf64 addr:$src2)))), 1343f4a2713aSLionel Sambuc (VMOVHPDrm VR128:$src1, addr:$src2)>; 1344*0a6a1f1dSLionel Sambuc // Also handle an i64 load because that may get selected as a faster way to 1345*0a6a1f1dSLionel Sambuc // load the data. 1346*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Unpckl VR128:$src1, 1347*0a6a1f1dSLionel Sambuc (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), 1348*0a6a1f1dSLionel Sambuc (VMOVHPDrm VR128:$src1, addr:$src2)>; 1349*0a6a1f1dSLionel Sambuc 1350*0a6a1f1dSLionel Sambuc def : Pat<(store (f64 (vector_extract 1351*0a6a1f1dSLionel Sambuc (v2f64 (X86VPermilpi VR128:$src, (i8 1))), 1352*0a6a1f1dSLionel Sambuc (iPTR 0))), addr:$dst), 1353*0a6a1f1dSLionel Sambuc (VMOVHPDmr addr:$dst, VR128:$src)>; 1354f4a2713aSLionel Sambuc} 1355f4a2713aSLionel Sambuc 1356f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in { 1357f4a2713aSLionel Sambuc // MOVHPS patterns 1358f4a2713aSLionel Sambuc def : Pat<(X86Movlhps VR128:$src1, 1359f4a2713aSLionel Sambuc (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1360f4a2713aSLionel Sambuc (MOVHPSrm VR128:$src1, addr:$src2)>; 1361f4a2713aSLionel Sambuc def : Pat<(X86Movlhps VR128:$src1, 1362f4a2713aSLionel Sambuc (bc_v4f32 (v2i64 (X86vzload addr:$src2)))), 1363f4a2713aSLionel Sambuc (MOVHPSrm VR128:$src1, addr:$src2)>; 1364f4a2713aSLionel Sambuc} 1365f4a2713aSLionel Sambuc 1366f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in { 1367*0a6a1f1dSLionel Sambuc // MOVHPD patterns 1368*0a6a1f1dSLionel Sambuc 1369f4a2713aSLionel Sambuc // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem 1370f4a2713aSLionel Sambuc // is during lowering, where it's not possible to recognize the load fold 1371f4a2713aSLionel Sambuc // cause it has two uses through a bitcast. One use disappears at isel time 1372f4a2713aSLionel Sambuc // and the fold opportunity reappears. 1373f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86Unpckl VR128:$src1, 1374f4a2713aSLionel Sambuc (scalar_to_vector (loadf64 addr:$src2)))), 1375f4a2713aSLionel Sambuc (MOVHPDrm VR128:$src1, addr:$src2)>; 1376*0a6a1f1dSLionel Sambuc // Also handle an i64 load because that may get selected as a faster way to 1377*0a6a1f1dSLionel Sambuc // load the data. 1378*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Unpckl VR128:$src1, 1379*0a6a1f1dSLionel Sambuc (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), 1380*0a6a1f1dSLionel Sambuc (MOVHPDrm VR128:$src1, addr:$src2)>; 1381*0a6a1f1dSLionel Sambuc 1382*0a6a1f1dSLionel Sambuc def : Pat<(store (f64 (vector_extract 1383*0a6a1f1dSLionel Sambuc (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))), 1384*0a6a1f1dSLionel Sambuc (iPTR 0))), addr:$dst), 1385*0a6a1f1dSLionel Sambuc (MOVHPDmr addr:$dst, VR128:$src)>; 1386f4a2713aSLionel Sambuc} 1387f4a2713aSLionel Sambuc 1388f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 1389f4a2713aSLionel Sambuc// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions 1390f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 1391f4a2713aSLionel Sambuc 1392f4a2713aSLionel Sambuclet AddedComplexity = 20, Predicates = [UseAVX] in { 1393f4a2713aSLionel Sambuc def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), 1394f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2), 1395f4a2713aSLionel Sambuc "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1396f4a2713aSLionel Sambuc [(set VR128:$dst, 1397f4a2713aSLionel Sambuc (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], 1398f4a2713aSLionel Sambuc IIC_SSE_MOV_LH>, 1399*0a6a1f1dSLionel Sambuc VEX_4V, Sched<[WriteFShuffle]>; 1400f4a2713aSLionel Sambuc def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), 1401f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2), 1402f4a2713aSLionel Sambuc "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1403f4a2713aSLionel Sambuc [(set VR128:$dst, 1404f4a2713aSLionel Sambuc (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], 1405f4a2713aSLionel Sambuc IIC_SSE_MOV_LH>, 1406*0a6a1f1dSLionel Sambuc VEX_4V, Sched<[WriteFShuffle]>; 1407f4a2713aSLionel Sambuc} 1408f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst", AddedComplexity = 20 in { 1409f4a2713aSLionel Sambuc def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), 1410f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2), 1411f4a2713aSLionel Sambuc "movlhps\t{$src2, $dst|$dst, $src2}", 1412f4a2713aSLionel Sambuc [(set VR128:$dst, 1413f4a2713aSLionel Sambuc (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], 1414*0a6a1f1dSLionel Sambuc IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>; 1415f4a2713aSLionel Sambuc def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), 1416f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2), 1417f4a2713aSLionel Sambuc "movhlps\t{$src2, $dst|$dst, $src2}", 1418f4a2713aSLionel Sambuc [(set VR128:$dst, 1419f4a2713aSLionel Sambuc (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], 1420*0a6a1f1dSLionel Sambuc IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>; 1421f4a2713aSLionel Sambuc} 1422f4a2713aSLionel Sambuc 1423f4a2713aSLionel Sambuclet Predicates = [UseAVX] in { 1424f4a2713aSLionel Sambuc // MOVLHPS patterns 1425f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), 1426f4a2713aSLionel Sambuc (VMOVLHPSrr VR128:$src1, VR128:$src2)>; 1427f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), 1428f4a2713aSLionel Sambuc (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; 1429f4a2713aSLionel Sambuc 1430f4a2713aSLionel Sambuc // MOVHLPS patterns 1431f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), 1432f4a2713aSLionel Sambuc (VMOVHLPSrr VR128:$src1, VR128:$src2)>; 1433f4a2713aSLionel Sambuc} 1434f4a2713aSLionel Sambuc 1435f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in { 1436f4a2713aSLionel Sambuc // MOVLHPS patterns 1437f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), 1438f4a2713aSLionel Sambuc (MOVLHPSrr VR128:$src1, VR128:$src2)>; 1439f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), 1440f4a2713aSLionel Sambuc (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; 1441f4a2713aSLionel Sambuc 1442f4a2713aSLionel Sambuc // MOVHLPS patterns 1443f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), 1444f4a2713aSLionel Sambuc (MOVHLPSrr VR128:$src1, VR128:$src2)>; 1445f4a2713aSLionel Sambuc} 1446f4a2713aSLionel Sambuc 1447f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 1448f4a2713aSLionel Sambuc// SSE 1 & 2 - Conversion Instructions 1449f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 1450f4a2713aSLionel Sambuc 1451f4a2713aSLionel Sambucdef SSE_CVT_PD : OpndItins< 1452f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM 1453f4a2713aSLionel Sambuc>; 1454f4a2713aSLionel Sambuc 1455f4a2713aSLionel Sambuclet Sched = WriteCvtI2F in 1456f4a2713aSLionel Sambucdef SSE_CVT_PS : OpndItins< 1457f4a2713aSLionel Sambuc IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM 1458f4a2713aSLionel Sambuc>; 1459f4a2713aSLionel Sambuc 1460f4a2713aSLionel Sambuclet Sched = WriteCvtI2F in 1461f4a2713aSLionel Sambucdef SSE_CVT_Scalar : OpndItins< 1462f4a2713aSLionel Sambuc IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM 1463f4a2713aSLionel Sambuc>; 1464f4a2713aSLionel Sambuc 1465f4a2713aSLionel Sambuclet Sched = WriteCvtF2I in 1466f4a2713aSLionel Sambucdef SSE_CVT_SS2SI_32 : OpndItins< 1467f4a2713aSLionel Sambuc IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM 1468f4a2713aSLionel Sambuc>; 1469f4a2713aSLionel Sambuc 1470f4a2713aSLionel Sambuclet Sched = WriteCvtF2I in 1471f4a2713aSLionel Sambucdef SSE_CVT_SS2SI_64 : OpndItins< 1472f4a2713aSLionel Sambuc IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM 1473f4a2713aSLionel Sambuc>; 1474f4a2713aSLionel Sambuc 1475f4a2713aSLionel Sambuclet Sched = WriteCvtF2I in 1476f4a2713aSLionel Sambucdef SSE_CVT_SD2SI : OpndItins< 1477f4a2713aSLionel Sambuc IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM 1478f4a2713aSLionel Sambuc>; 1479f4a2713aSLionel Sambuc 1480f4a2713aSLionel Sambucmulticlass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1481f4a2713aSLionel Sambuc SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, 1482f4a2713aSLionel Sambuc string asm, OpndItins itins> { 1483f4a2713aSLionel Sambuc def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, 1484f4a2713aSLionel Sambuc [(set DstRC:$dst, (OpNode SrcRC:$src))], 1485f4a2713aSLionel Sambuc itins.rr>, Sched<[itins.Sched]>; 1486f4a2713aSLionel Sambuc def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, 1487f4a2713aSLionel Sambuc [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], 1488f4a2713aSLionel Sambuc itins.rm>, Sched<[itins.Sched.Folded]>; 1489f4a2713aSLionel Sambuc} 1490f4a2713aSLionel Sambuc 1491f4a2713aSLionel Sambucmulticlass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1492f4a2713aSLionel Sambuc X86MemOperand x86memop, string asm, Domain d, 1493f4a2713aSLionel Sambuc OpndItins itins> { 1494*0a6a1f1dSLionel Sambuclet hasSideEffects = 0 in { 1495f4a2713aSLionel Sambuc def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, 1496f4a2713aSLionel Sambuc [], itins.rr, d>, Sched<[itins.Sched]>; 1497f4a2713aSLionel Sambuc let mayLoad = 1 in 1498f4a2713aSLionel Sambuc def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, 1499f4a2713aSLionel Sambuc [], itins.rm, d>, Sched<[itins.Sched.Folded]>; 1500f4a2713aSLionel Sambuc} 1501f4a2713aSLionel Sambuc} 1502f4a2713aSLionel Sambuc 1503f4a2713aSLionel Sambucmulticlass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1504f4a2713aSLionel Sambuc X86MemOperand x86memop, string asm> { 1505*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, Predicates = [UseAVX] in { 1506f4a2713aSLionel Sambuc def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), 1507f4a2713aSLionel Sambuc !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 1508f4a2713aSLionel Sambuc Sched<[WriteCvtI2F]>; 1509f4a2713aSLionel Sambuc let mayLoad = 1 in 1510f4a2713aSLionel Sambuc def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 1511f4a2713aSLionel Sambuc (ins DstRC:$src1, x86memop:$src), 1512f4a2713aSLionel Sambuc !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 1513f4a2713aSLionel Sambuc Sched<[WriteCvtI2FLd, ReadAfterLd]>; 1514*0a6a1f1dSLionel Sambuc} // hasSideEffects = 0 1515f4a2713aSLionel Sambuc} 1516f4a2713aSLionel Sambuc 1517f4a2713aSLionel Sambuclet Predicates = [UseAVX] in { 1518f4a2713aSLionel Sambucdefm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 1519f4a2713aSLionel Sambuc "cvttss2si\t{$src, $dst|$dst, $src}", 1520f4a2713aSLionel Sambuc SSE_CVT_SS2SI_32>, 1521f4a2713aSLionel Sambuc XS, VEX, VEX_LIG; 1522f4a2713aSLionel Sambucdefm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 1523f4a2713aSLionel Sambuc "cvttss2si\t{$src, $dst|$dst, $src}", 1524f4a2713aSLionel Sambuc SSE_CVT_SS2SI_64>, 1525f4a2713aSLionel Sambuc XS, VEX, VEX_W, VEX_LIG; 1526f4a2713aSLionel Sambucdefm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 1527f4a2713aSLionel Sambuc "cvttsd2si\t{$src, $dst|$dst, $src}", 1528f4a2713aSLionel Sambuc SSE_CVT_SD2SI>, 1529f4a2713aSLionel Sambuc XD, VEX, VEX_LIG; 1530f4a2713aSLionel Sambucdefm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 1531f4a2713aSLionel Sambuc "cvttsd2si\t{$src, $dst|$dst, $src}", 1532f4a2713aSLionel Sambuc SSE_CVT_SD2SI>, 1533f4a2713aSLionel Sambuc XD, VEX, VEX_W, VEX_LIG; 1534f4a2713aSLionel Sambuc 1535f4a2713aSLionel Sambucdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1536f4a2713aSLionel Sambuc (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0>; 1537f4a2713aSLionel Sambucdef : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1538f4a2713aSLionel Sambuc (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0>; 1539f4a2713aSLionel Sambucdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1540f4a2713aSLionel Sambuc (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0>; 1541f4a2713aSLionel Sambucdef : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1542f4a2713aSLionel Sambuc (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0>; 1543f4a2713aSLionel Sambucdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1544f4a2713aSLionel Sambuc (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0>; 1545f4a2713aSLionel Sambucdef : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1546f4a2713aSLionel Sambuc (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>; 1547f4a2713aSLionel Sambucdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1548f4a2713aSLionel Sambuc (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; 1549f4a2713aSLionel Sambucdef : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1550f4a2713aSLionel Sambuc (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; 1551f4a2713aSLionel Sambuc} 1552f4a2713aSLionel Sambuc// The assembler can recognize rr 64-bit instructions by seeing a rxx 1553f4a2713aSLionel Sambuc// register, but the same isn't true when only using memory operands, 1554f4a2713aSLionel Sambuc// provide other assembly "l" and "q" forms to address this explicitly 1555f4a2713aSLionel Sambuc// where appropriate to do so. 1556f4a2713aSLionel Sambucdefm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}">, 1557f4a2713aSLionel Sambuc XS, VEX_4V, VEX_LIG; 1558f4a2713aSLionel Sambucdefm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, 1559f4a2713aSLionel Sambuc XS, VEX_4V, VEX_W, VEX_LIG; 1560f4a2713aSLionel Sambucdefm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, 1561f4a2713aSLionel Sambuc XD, VEX_4V, VEX_LIG; 1562f4a2713aSLionel Sambucdefm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, 1563f4a2713aSLionel Sambuc XD, VEX_4V, VEX_W, VEX_LIG; 1564f4a2713aSLionel Sambuc 1565f4a2713aSLionel Sambuclet Predicates = [UseAVX] in { 1566f4a2713aSLionel Sambuc def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 1567*0a6a1f1dSLionel Sambuc (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src), 0>; 1568f4a2713aSLionel Sambuc def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 1569*0a6a1f1dSLionel Sambuc (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src), 0>; 1570f4a2713aSLionel Sambuc 1571f4a2713aSLionel Sambuc def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), 1572f4a2713aSLionel Sambuc (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 1573f4a2713aSLionel Sambuc def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), 1574f4a2713aSLionel Sambuc (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>; 1575f4a2713aSLionel Sambuc def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), 1576f4a2713aSLionel Sambuc (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 1577f4a2713aSLionel Sambuc def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), 1578f4a2713aSLionel Sambuc (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>; 1579f4a2713aSLionel Sambuc 1580f4a2713aSLionel Sambuc def : Pat<(f32 (sint_to_fp GR32:$src)), 1581f4a2713aSLionel Sambuc (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 1582f4a2713aSLionel Sambuc def : Pat<(f32 (sint_to_fp GR64:$src)), 1583f4a2713aSLionel Sambuc (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>; 1584f4a2713aSLionel Sambuc def : Pat<(f64 (sint_to_fp GR32:$src)), 1585f4a2713aSLionel Sambuc (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 1586f4a2713aSLionel Sambuc def : Pat<(f64 (sint_to_fp GR64:$src)), 1587f4a2713aSLionel Sambuc (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>; 1588f4a2713aSLionel Sambuc} 1589f4a2713aSLionel Sambuc 1590f4a2713aSLionel Sambucdefm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 1591f4a2713aSLionel Sambuc "cvttss2si\t{$src, $dst|$dst, $src}", 1592f4a2713aSLionel Sambuc SSE_CVT_SS2SI_32>, XS; 1593f4a2713aSLionel Sambucdefm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 1594f4a2713aSLionel Sambuc "cvttss2si\t{$src, $dst|$dst, $src}", 1595f4a2713aSLionel Sambuc SSE_CVT_SS2SI_64>, XS, REX_W; 1596f4a2713aSLionel Sambucdefm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 1597f4a2713aSLionel Sambuc "cvttsd2si\t{$src, $dst|$dst, $src}", 1598f4a2713aSLionel Sambuc SSE_CVT_SD2SI>, XD; 1599f4a2713aSLionel Sambucdefm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 1600f4a2713aSLionel Sambuc "cvttsd2si\t{$src, $dst|$dst, $src}", 1601f4a2713aSLionel Sambuc SSE_CVT_SD2SI>, XD, REX_W; 1602f4a2713aSLionel Sambucdefm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, 1603f4a2713aSLionel Sambuc "cvtsi2ss{l}\t{$src, $dst|$dst, $src}", 1604f4a2713aSLionel Sambuc SSE_CVT_Scalar>, XS; 1605f4a2713aSLionel Sambucdefm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, 1606f4a2713aSLionel Sambuc "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", 1607f4a2713aSLionel Sambuc SSE_CVT_Scalar>, XS, REX_W; 1608f4a2713aSLionel Sambucdefm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, 1609f4a2713aSLionel Sambuc "cvtsi2sd{l}\t{$src, $dst|$dst, $src}", 1610f4a2713aSLionel Sambuc SSE_CVT_Scalar>, XD; 1611f4a2713aSLionel Sambucdefm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, 1612f4a2713aSLionel Sambuc "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", 1613f4a2713aSLionel Sambuc SSE_CVT_Scalar>, XD, REX_W; 1614f4a2713aSLionel Sambuc 1615f4a2713aSLionel Sambucdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1616f4a2713aSLionel Sambuc (CVTTSS2SIrr GR32:$dst, FR32:$src), 0>; 1617f4a2713aSLionel Sambucdef : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1618f4a2713aSLionel Sambuc (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0>; 1619f4a2713aSLionel Sambucdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1620f4a2713aSLionel Sambuc (CVTTSD2SIrr GR32:$dst, FR64:$src), 0>; 1621f4a2713aSLionel Sambucdef : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1622f4a2713aSLionel Sambuc (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0>; 1623f4a2713aSLionel Sambucdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1624f4a2713aSLionel Sambuc (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0>; 1625f4a2713aSLionel Sambucdef : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1626f4a2713aSLionel Sambuc (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>; 1627f4a2713aSLionel Sambucdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1628f4a2713aSLionel Sambuc (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; 1629f4a2713aSLionel Sambucdef : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1630f4a2713aSLionel Sambuc (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; 1631f4a2713aSLionel Sambuc 1632f4a2713aSLionel Sambucdef : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", 1633*0a6a1f1dSLionel Sambuc (CVTSI2SSrm FR64:$dst, i32mem:$src), 0>; 1634f4a2713aSLionel Sambucdef : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", 1635*0a6a1f1dSLionel Sambuc (CVTSI2SDrm FR64:$dst, i32mem:$src), 0>; 1636f4a2713aSLionel Sambuc 1637f4a2713aSLionel Sambuc// Conversion Instructions Intrinsics - Match intrinsics which expect MM 1638f4a2713aSLionel Sambuc// and/or XMM operand(s). 1639f4a2713aSLionel Sambuc 1640f4a2713aSLionel Sambucmulticlass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1641f4a2713aSLionel Sambuc Intrinsic Int, Operand memop, ComplexPattern mem_cpat, 1642f4a2713aSLionel Sambuc string asm, OpndItins itins> { 1643f4a2713aSLionel Sambuc def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 1644f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1645f4a2713aSLionel Sambuc [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>, 1646f4a2713aSLionel Sambuc Sched<[itins.Sched]>; 1647f4a2713aSLionel Sambuc def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), 1648f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1649f4a2713aSLionel Sambuc [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>, 1650f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded]>; 1651f4a2713aSLionel Sambuc} 1652f4a2713aSLionel Sambuc 1653f4a2713aSLionel Sambucmulticlass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, 1654f4a2713aSLionel Sambuc RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, 1655f4a2713aSLionel Sambuc PatFrag ld_frag, string asm, OpndItins itins, 1656f4a2713aSLionel Sambuc bit Is2Addr = 1> { 1657f4a2713aSLionel Sambuc def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), 1658f4a2713aSLionel Sambuc !if(Is2Addr, 1659f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 1660f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1661f4a2713aSLionel Sambuc [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], 1662f4a2713aSLionel Sambuc itins.rr>, Sched<[itins.Sched]>; 1663f4a2713aSLionel Sambuc def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 1664f4a2713aSLionel Sambuc (ins DstRC:$src1, x86memop:$src2), 1665f4a2713aSLionel Sambuc !if(Is2Addr, 1666f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 1667f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1668f4a2713aSLionel Sambuc [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], 1669f4a2713aSLionel Sambuc itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 1670f4a2713aSLionel Sambuc} 1671f4a2713aSLionel Sambuc 1672f4a2713aSLionel Sambuclet Predicates = [UseAVX] in { 1673f4a2713aSLionel Sambucdefm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, 1674f4a2713aSLionel Sambuc int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si", 1675f4a2713aSLionel Sambuc SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; 1676f4a2713aSLionel Sambucdefm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, 1677f4a2713aSLionel Sambuc int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si", 1678f4a2713aSLionel Sambuc SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; 1679f4a2713aSLionel Sambuc} 1680f4a2713aSLionel Sambucdefm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, 1681f4a2713aSLionel Sambuc sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD; 1682f4a2713aSLionel Sambucdefm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, 1683f4a2713aSLionel Sambuc sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W; 1684f4a2713aSLionel Sambuc 1685f4a2713aSLionel Sambuc 1686*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in { 1687f4a2713aSLionel Sambuc let Predicates = [UseAVX] in { 1688f4a2713aSLionel Sambuc defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1689f4a2713aSLionel Sambuc int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", 1690f4a2713aSLionel Sambuc SSE_CVT_Scalar, 0>, XS, VEX_4V; 1691f4a2713aSLionel Sambuc defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1692f4a2713aSLionel Sambuc int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", 1693f4a2713aSLionel Sambuc SSE_CVT_Scalar, 0>, XS, VEX_4V, 1694f4a2713aSLionel Sambuc VEX_W; 1695f4a2713aSLionel Sambuc defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1696f4a2713aSLionel Sambuc int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}", 1697f4a2713aSLionel Sambuc SSE_CVT_Scalar, 0>, XD, VEX_4V; 1698f4a2713aSLionel Sambuc defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1699f4a2713aSLionel Sambuc int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", 1700f4a2713aSLionel Sambuc SSE_CVT_Scalar, 0>, XD, 1701f4a2713aSLionel Sambuc VEX_4V, VEX_W; 1702f4a2713aSLionel Sambuc } 1703f4a2713aSLionel Sambuc let Constraints = "$src1 = $dst" in { 1704f4a2713aSLionel Sambuc defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1705f4a2713aSLionel Sambuc int_x86_sse_cvtsi2ss, i32mem, loadi32, 1706f4a2713aSLionel Sambuc "cvtsi2ss{l}", SSE_CVT_Scalar>, XS; 1707f4a2713aSLionel Sambuc defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1708f4a2713aSLionel Sambuc int_x86_sse_cvtsi642ss, i64mem, loadi64, 1709f4a2713aSLionel Sambuc "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W; 1710f4a2713aSLionel Sambuc defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1711f4a2713aSLionel Sambuc int_x86_sse2_cvtsi2sd, i32mem, loadi32, 1712f4a2713aSLionel Sambuc "cvtsi2sd{l}", SSE_CVT_Scalar>, XD; 1713f4a2713aSLionel Sambuc defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1714f4a2713aSLionel Sambuc int_x86_sse2_cvtsi642sd, i64mem, loadi64, 1715f4a2713aSLionel Sambuc "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W; 1716f4a2713aSLionel Sambuc } 1717*0a6a1f1dSLionel Sambuc} // isCodeGenOnly = 1 1718f4a2713aSLionel Sambuc 1719f4a2713aSLionel Sambuc/// SSE 1 Only 1720f4a2713aSLionel Sambuc 1721f4a2713aSLionel Sambuc// Aliases for intrinsics 1722*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in { 1723f4a2713aSLionel Sambuclet Predicates = [UseAVX] in { 1724f4a2713aSLionel Sambucdefm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, 1725f4a2713aSLionel Sambuc ssmem, sse_load_f32, "cvttss2si", 1726f4a2713aSLionel Sambuc SSE_CVT_SS2SI_32>, XS, VEX; 1727f4a2713aSLionel Sambucdefm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1728f4a2713aSLionel Sambuc int_x86_sse_cvttss2si64, ssmem, sse_load_f32, 1729f4a2713aSLionel Sambuc "cvttss2si", SSE_CVT_SS2SI_64>, 1730f4a2713aSLionel Sambuc XS, VEX, VEX_W; 1731f4a2713aSLionel Sambucdefm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, 1732f4a2713aSLionel Sambuc sdmem, sse_load_f64, "cvttsd2si", 1733f4a2713aSLionel Sambuc SSE_CVT_SD2SI>, XD, VEX; 1734f4a2713aSLionel Sambucdefm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1735f4a2713aSLionel Sambuc int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, 1736f4a2713aSLionel Sambuc "cvttsd2si", SSE_CVT_SD2SI>, 1737f4a2713aSLionel Sambuc XD, VEX, VEX_W; 1738f4a2713aSLionel Sambuc} 1739f4a2713aSLionel Sambucdefm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, 1740f4a2713aSLionel Sambuc ssmem, sse_load_f32, "cvttss2si", 1741f4a2713aSLionel Sambuc SSE_CVT_SS2SI_32>, XS; 1742f4a2713aSLionel Sambucdefm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1743f4a2713aSLionel Sambuc int_x86_sse_cvttss2si64, ssmem, sse_load_f32, 1744f4a2713aSLionel Sambuc "cvttss2si", SSE_CVT_SS2SI_64>, XS, REX_W; 1745f4a2713aSLionel Sambucdefm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, 1746f4a2713aSLionel Sambuc sdmem, sse_load_f64, "cvttsd2si", 1747f4a2713aSLionel Sambuc SSE_CVT_SD2SI>, XD; 1748f4a2713aSLionel Sambucdefm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1749f4a2713aSLionel Sambuc int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, 1750f4a2713aSLionel Sambuc "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W; 1751*0a6a1f1dSLionel Sambuc} // isCodeGenOnly = 1 1752f4a2713aSLionel Sambuc 1753f4a2713aSLionel Sambuclet Predicates = [UseAVX] in { 1754f4a2713aSLionel Sambucdefm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, 1755f4a2713aSLionel Sambuc ssmem, sse_load_f32, "cvtss2si", 1756f4a2713aSLionel Sambuc SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; 1757f4a2713aSLionel Sambucdefm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, 1758f4a2713aSLionel Sambuc ssmem, sse_load_f32, "cvtss2si", 1759f4a2713aSLionel Sambuc SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; 1760f4a2713aSLionel Sambuc} 1761f4a2713aSLionel Sambucdefm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, 1762f4a2713aSLionel Sambuc ssmem, sse_load_f32, "cvtss2si", 1763f4a2713aSLionel Sambuc SSE_CVT_SS2SI_32>, XS; 1764f4a2713aSLionel Sambucdefm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, 1765f4a2713aSLionel Sambuc ssmem, sse_load_f32, "cvtss2si", 1766f4a2713aSLionel Sambuc SSE_CVT_SS2SI_64>, XS, REX_W; 1767f4a2713aSLionel Sambuc 1768f4a2713aSLionel Sambucdefm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, 1769f4a2713aSLionel Sambuc "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1770f4a2713aSLionel Sambuc SSEPackedSingle, SSE_CVT_PS>, 1771*0a6a1f1dSLionel Sambuc PS, VEX, Requires<[HasAVX]>; 1772f4a2713aSLionel Sambucdefm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem, 1773f4a2713aSLionel Sambuc "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1774f4a2713aSLionel Sambuc SSEPackedSingle, SSE_CVT_PS>, 1775*0a6a1f1dSLionel Sambuc PS, VEX, VEX_L, Requires<[HasAVX]>; 1776f4a2713aSLionel Sambuc 1777f4a2713aSLionel Sambucdefm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, 1778f4a2713aSLionel Sambuc "cvtdq2ps\t{$src, $dst|$dst, $src}", 1779f4a2713aSLionel Sambuc SSEPackedSingle, SSE_CVT_PS>, 1780*0a6a1f1dSLionel Sambuc PS, Requires<[UseSSE2]>; 1781f4a2713aSLionel Sambuc 1782f4a2713aSLionel Sambuclet Predicates = [UseAVX] in { 1783f4a2713aSLionel Sambucdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1784f4a2713aSLionel Sambuc (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>; 1785f4a2713aSLionel Sambucdef : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1786f4a2713aSLionel Sambuc (VCVTSS2SIrm GR32:$dst, ssmem:$src), 0>; 1787f4a2713aSLionel Sambucdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1788f4a2713aSLionel Sambuc (VCVTSD2SIrr GR32:$dst, VR128:$src), 0>; 1789f4a2713aSLionel Sambucdef : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1790f4a2713aSLionel Sambuc (VCVTSD2SIrm GR32:$dst, sdmem:$src), 0>; 1791f4a2713aSLionel Sambucdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1792f4a2713aSLionel Sambuc (VCVTSS2SI64rr GR64:$dst, VR128:$src), 0>; 1793f4a2713aSLionel Sambucdef : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1794f4a2713aSLionel Sambuc (VCVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; 1795f4a2713aSLionel Sambucdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1796f4a2713aSLionel Sambuc (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>; 1797f4a2713aSLionel Sambucdef : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1798f4a2713aSLionel Sambuc (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>; 1799f4a2713aSLionel Sambuc} 1800f4a2713aSLionel Sambuc 1801f4a2713aSLionel Sambucdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1802f4a2713aSLionel Sambuc (CVTSS2SIrr GR32:$dst, VR128:$src), 0>; 1803f4a2713aSLionel Sambucdef : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1804f4a2713aSLionel Sambuc (CVTSS2SIrm GR32:$dst, ssmem:$src), 0>; 1805f4a2713aSLionel Sambucdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1806f4a2713aSLionel Sambuc (CVTSD2SIrr GR32:$dst, VR128:$src), 0>; 1807f4a2713aSLionel Sambucdef : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1808f4a2713aSLionel Sambuc (CVTSD2SIrm GR32:$dst, sdmem:$src), 0>; 1809f4a2713aSLionel Sambucdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1810f4a2713aSLionel Sambuc (CVTSS2SI64rr GR64:$dst, VR128:$src), 0>; 1811f4a2713aSLionel Sambucdef : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1812f4a2713aSLionel Sambuc (CVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; 1813f4a2713aSLionel Sambucdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1814f4a2713aSLionel Sambuc (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>; 1815f4a2713aSLionel Sambucdef : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1816f4a2713aSLionel Sambuc (CVTSD2SI64rm GR64:$dst, sdmem:$src)>; 1817f4a2713aSLionel Sambuc 1818f4a2713aSLionel Sambuc/// SSE 2 Only 1819f4a2713aSLionel Sambuc 1820f4a2713aSLionel Sambuc// Convert scalar double to scalar single 1821*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, Predicates = [UseAVX] in { 1822f4a2713aSLionel Sambucdef VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), 1823f4a2713aSLionel Sambuc (ins FR64:$src1, FR64:$src2), 1824f4a2713aSLionel Sambuc "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], 1825f4a2713aSLionel Sambuc IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG, 1826f4a2713aSLionel Sambuc Sched<[WriteCvtF2F]>; 1827f4a2713aSLionel Sambuclet mayLoad = 1 in 1828f4a2713aSLionel Sambucdef VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), 1829f4a2713aSLionel Sambuc (ins FR64:$src1, f64mem:$src2), 1830f4a2713aSLionel Sambuc "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1831f4a2713aSLionel Sambuc [], IIC_SSE_CVT_Scalar_RM>, 1832f4a2713aSLionel Sambuc XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, 1833f4a2713aSLionel Sambuc Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1834f4a2713aSLionel Sambuc} 1835f4a2713aSLionel Sambuc 1836f4a2713aSLionel Sambucdef : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, 1837f4a2713aSLionel Sambuc Requires<[UseAVX]>; 1838f4a2713aSLionel Sambuc 1839f4a2713aSLionel Sambucdef CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), 1840f4a2713aSLionel Sambuc "cvtsd2ss\t{$src, $dst|$dst, $src}", 1841f4a2713aSLionel Sambuc [(set FR32:$dst, (fround FR64:$src))], 1842f4a2713aSLionel Sambuc IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>; 1843f4a2713aSLionel Sambucdef CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), 1844f4a2713aSLionel Sambuc "cvtsd2ss\t{$src, $dst|$dst, $src}", 1845f4a2713aSLionel Sambuc [(set FR32:$dst, (fround (loadf64 addr:$src)))], 1846f4a2713aSLionel Sambuc IIC_SSE_CVT_Scalar_RM>, 1847f4a2713aSLionel Sambuc XD, 1848f4a2713aSLionel Sambuc Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; 1849f4a2713aSLionel Sambuc 1850*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in { 1851f4a2713aSLionel Sambucdef Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, 1852f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1853f4a2713aSLionel Sambuc "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1854f4a2713aSLionel Sambuc [(set VR128:$dst, 1855f4a2713aSLionel Sambuc (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], 1856f4a2713aSLionel Sambuc IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>, 1857f4a2713aSLionel Sambuc Sched<[WriteCvtF2F]>; 1858f4a2713aSLionel Sambucdef Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, 1859f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1860f4a2713aSLionel Sambuc "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1861f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvtsd2ss 1862f4a2713aSLionel Sambuc VR128:$src1, sse_load_f64:$src2))], 1863f4a2713aSLionel Sambuc IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>, 1864f4a2713aSLionel Sambuc Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1865f4a2713aSLionel Sambuc 1866f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { 1867f4a2713aSLionel Sambucdef Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, 1868f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1869f4a2713aSLionel Sambuc "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 1870f4a2713aSLionel Sambuc [(set VR128:$dst, 1871f4a2713aSLionel Sambuc (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], 1872f4a2713aSLionel Sambuc IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>, 1873f4a2713aSLionel Sambuc Sched<[WriteCvtF2F]>; 1874f4a2713aSLionel Sambucdef Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, 1875f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1876f4a2713aSLionel Sambuc "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 1877f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvtsd2ss 1878f4a2713aSLionel Sambuc VR128:$src1, sse_load_f64:$src2))], 1879f4a2713aSLionel Sambuc IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>, 1880f4a2713aSLionel Sambuc Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1881f4a2713aSLionel Sambuc} 1882*0a6a1f1dSLionel Sambuc} // isCodeGenOnly = 1 1883f4a2713aSLionel Sambuc 1884f4a2713aSLionel Sambuc// Convert scalar single to scalar double 1885f4a2713aSLionel Sambuc// SSE2 instructions with XS prefix 1886*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, Predicates = [UseAVX] in { 1887f4a2713aSLionel Sambucdef VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), 1888f4a2713aSLionel Sambuc (ins FR32:$src1, FR32:$src2), 1889f4a2713aSLionel Sambuc "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1890f4a2713aSLionel Sambuc [], IIC_SSE_CVT_Scalar_RR>, 1891f4a2713aSLionel Sambuc XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, 1892f4a2713aSLionel Sambuc Sched<[WriteCvtF2F]>; 1893f4a2713aSLionel Sambuclet mayLoad = 1 in 1894f4a2713aSLionel Sambucdef VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), 1895f4a2713aSLionel Sambuc (ins FR32:$src1, f32mem:$src2), 1896f4a2713aSLionel Sambuc "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1897f4a2713aSLionel Sambuc [], IIC_SSE_CVT_Scalar_RM>, 1898f4a2713aSLionel Sambuc XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, 1899f4a2713aSLionel Sambuc Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1900f4a2713aSLionel Sambuc} 1901f4a2713aSLionel Sambuc 1902f4a2713aSLionel Sambucdef : Pat<(f64 (fextend FR32:$src)), 1903f4a2713aSLionel Sambuc (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>; 1904f4a2713aSLionel Sambucdef : Pat<(fextend (loadf32 addr:$src)), 1905f4a2713aSLionel Sambuc (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>; 1906f4a2713aSLionel Sambuc 1907f4a2713aSLionel Sambucdef : Pat<(extloadf32 addr:$src), 1908f4a2713aSLionel Sambuc (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, 1909f4a2713aSLionel Sambuc Requires<[UseAVX, OptForSize]>; 1910f4a2713aSLionel Sambucdef : Pat<(extloadf32 addr:$src), 1911f4a2713aSLionel Sambuc (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, 1912f4a2713aSLionel Sambuc Requires<[UseAVX, OptForSpeed]>; 1913f4a2713aSLionel Sambuc 1914f4a2713aSLionel Sambucdef CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), 1915f4a2713aSLionel Sambuc "cvtss2sd\t{$src, $dst|$dst, $src}", 1916f4a2713aSLionel Sambuc [(set FR64:$dst, (fextend FR32:$src))], 1917f4a2713aSLionel Sambuc IIC_SSE_CVT_Scalar_RR>, XS, 1918f4a2713aSLionel Sambuc Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; 1919f4a2713aSLionel Sambucdef CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), 1920f4a2713aSLionel Sambuc "cvtss2sd\t{$src, $dst|$dst, $src}", 1921f4a2713aSLionel Sambuc [(set FR64:$dst, (extloadf32 addr:$src))], 1922f4a2713aSLionel Sambuc IIC_SSE_CVT_Scalar_RM>, XS, 1923f4a2713aSLionel Sambuc Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; 1924f4a2713aSLionel Sambuc 1925f4a2713aSLionel Sambuc// extload f32 -> f64. This matches load+fextend because we have a hack in 1926f4a2713aSLionel Sambuc// the isel (PreprocessForFPConvert) that can introduce loads after dag 1927f4a2713aSLionel Sambuc// combine. 1928f4a2713aSLionel Sambuc// Since these loads aren't folded into the fextend, we have to match it 1929f4a2713aSLionel Sambuc// explicitly here. 1930f4a2713aSLionel Sambucdef : Pat<(fextend (loadf32 addr:$src)), 1931f4a2713aSLionel Sambuc (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>; 1932f4a2713aSLionel Sambucdef : Pat<(extloadf32 addr:$src), 1933f4a2713aSLionel Sambuc (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; 1934f4a2713aSLionel Sambuc 1935*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in { 1936f4a2713aSLionel Sambucdef Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, 1937f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1938f4a2713aSLionel Sambuc "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1939f4a2713aSLionel Sambuc [(set VR128:$dst, 1940f4a2713aSLionel Sambuc (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], 1941f4a2713aSLionel Sambuc IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>, 1942f4a2713aSLionel Sambuc Sched<[WriteCvtF2F]>; 1943f4a2713aSLionel Sambucdef Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, 1944f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1945f4a2713aSLionel Sambuc "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1946f4a2713aSLionel Sambuc [(set VR128:$dst, 1947f4a2713aSLionel Sambuc (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], 1948f4a2713aSLionel Sambuc IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>, 1949f4a2713aSLionel Sambuc Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1950f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix 1951f4a2713aSLionel Sambucdef Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, 1952f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1953f4a2713aSLionel Sambuc "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1954f4a2713aSLionel Sambuc [(set VR128:$dst, 1955f4a2713aSLionel Sambuc (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], 1956f4a2713aSLionel Sambuc IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>, 1957f4a2713aSLionel Sambuc Sched<[WriteCvtF2F]>; 1958f4a2713aSLionel Sambucdef Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, 1959f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1960f4a2713aSLionel Sambuc "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1961f4a2713aSLionel Sambuc [(set VR128:$dst, 1962f4a2713aSLionel Sambuc (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], 1963f4a2713aSLionel Sambuc IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>, 1964f4a2713aSLionel Sambuc Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1965f4a2713aSLionel Sambuc} 1966*0a6a1f1dSLionel Sambuc} // isCodeGenOnly = 1 1967f4a2713aSLionel Sambuc 1968f4a2713aSLionel Sambuc// Convert packed single/double fp to doubleword 1969f4a2713aSLionel Sambucdef VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1970f4a2713aSLionel Sambuc "cvtps2dq\t{$src, $dst|$dst, $src}", 1971f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], 1972f4a2713aSLionel Sambuc IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; 1973f4a2713aSLionel Sambucdef VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1974f4a2713aSLionel Sambuc "cvtps2dq\t{$src, $dst|$dst, $src}", 1975f4a2713aSLionel Sambuc [(set VR128:$dst, 1976f4a2713aSLionel Sambuc (int_x86_sse2_cvtps2dq (loadv4f32 addr:$src)))], 1977f4a2713aSLionel Sambuc IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; 1978f4a2713aSLionel Sambucdef VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1979f4a2713aSLionel Sambuc "cvtps2dq\t{$src, $dst|$dst, $src}", 1980f4a2713aSLionel Sambuc [(set VR256:$dst, 1981f4a2713aSLionel Sambuc (int_x86_avx_cvt_ps2dq_256 VR256:$src))], 1982f4a2713aSLionel Sambuc IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; 1983f4a2713aSLionel Sambucdef VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 1984f4a2713aSLionel Sambuc "cvtps2dq\t{$src, $dst|$dst, $src}", 1985f4a2713aSLionel Sambuc [(set VR256:$dst, 1986f4a2713aSLionel Sambuc (int_x86_avx_cvt_ps2dq_256 (loadv8f32 addr:$src)))], 1987f4a2713aSLionel Sambuc IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; 1988f4a2713aSLionel Sambucdef CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1989f4a2713aSLionel Sambuc "cvtps2dq\t{$src, $dst|$dst, $src}", 1990f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], 1991f4a2713aSLionel Sambuc IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; 1992f4a2713aSLionel Sambucdef CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1993f4a2713aSLionel Sambuc "cvtps2dq\t{$src, $dst|$dst, $src}", 1994f4a2713aSLionel Sambuc [(set VR128:$dst, 1995f4a2713aSLionel Sambuc (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], 1996f4a2713aSLionel Sambuc IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; 1997f4a2713aSLionel Sambuc 1998f4a2713aSLionel Sambuc 1999f4a2713aSLionel Sambuc// Convert Packed Double FP to Packed DW Integers 2000f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 2001f4a2713aSLionel Sambuc// The assembler can recognize rr 256-bit instructions by seeing a ymm 2002f4a2713aSLionel Sambuc// register, but the same isn't true when using memory operands instead. 2003f4a2713aSLionel Sambuc// Provide other assembly rr and rm forms to address this explicitly. 2004f4a2713aSLionel Sambucdef VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2005f4a2713aSLionel Sambuc "vcvtpd2dq\t{$src, $dst|$dst, $src}", 2006f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, 2007f4a2713aSLionel Sambuc VEX, Sched<[WriteCvtF2I]>; 2008f4a2713aSLionel Sambuc 2009f4a2713aSLionel Sambuc// XMM only 2010f4a2713aSLionel Sambucdef : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", 2011*0a6a1f1dSLionel Sambuc (VCVTPD2DQrr VR128:$dst, VR128:$src), 0>; 2012f4a2713aSLionel Sambucdef VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2013f4a2713aSLionel Sambuc "vcvtpd2dqx\t{$src, $dst|$dst, $src}", 2014f4a2713aSLionel Sambuc [(set VR128:$dst, 2015f4a2713aSLionel Sambuc (int_x86_sse2_cvtpd2dq (loadv2f64 addr:$src)))]>, VEX, 2016f4a2713aSLionel Sambuc Sched<[WriteCvtF2ILd]>; 2017f4a2713aSLionel Sambuc 2018f4a2713aSLionel Sambuc// YMM only 2019f4a2713aSLionel Sambucdef VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 2020f4a2713aSLionel Sambuc "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 2021f4a2713aSLionel Sambuc [(set VR128:$dst, 2022f4a2713aSLionel Sambuc (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L, 2023f4a2713aSLionel Sambuc Sched<[WriteCvtF2I]>; 2024f4a2713aSLionel Sambucdef VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 2025f4a2713aSLionel Sambuc "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 2026f4a2713aSLionel Sambuc [(set VR128:$dst, 2027f4a2713aSLionel Sambuc (int_x86_avx_cvt_pd2dq_256 (loadv4f64 addr:$src)))]>, 2028f4a2713aSLionel Sambuc VEX, VEX_L, Sched<[WriteCvtF2ILd]>; 2029f4a2713aSLionel Sambucdef : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", 2030*0a6a1f1dSLionel Sambuc (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0>; 2031f4a2713aSLionel Sambuc} 2032f4a2713aSLionel Sambuc 2033f4a2713aSLionel Sambucdef CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2034f4a2713aSLionel Sambuc "cvtpd2dq\t{$src, $dst|$dst, $src}", 2035f4a2713aSLionel Sambuc [(set VR128:$dst, 2036f4a2713aSLionel Sambuc (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], 2037f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>; 2038f4a2713aSLionel Sambucdef CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2039f4a2713aSLionel Sambuc "cvtpd2dq\t{$src, $dst|$dst, $src}", 2040f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], 2041f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; 2042f4a2713aSLionel Sambuc 2043f4a2713aSLionel Sambuc// Convert with truncation packed single/double fp to doubleword 2044f4a2713aSLionel Sambuc// SSE2 packed instructions with XS prefix 2045f4a2713aSLionel Sambucdef VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2046f4a2713aSLionel Sambuc "cvttps2dq\t{$src, $dst|$dst, $src}", 2047f4a2713aSLionel Sambuc [(set VR128:$dst, 2048f4a2713aSLionel Sambuc (int_x86_sse2_cvttps2dq VR128:$src))], 2049f4a2713aSLionel Sambuc IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; 2050f4a2713aSLionel Sambucdef VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2051f4a2713aSLionel Sambuc "cvttps2dq\t{$src, $dst|$dst, $src}", 2052f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvttps2dq 2053f4a2713aSLionel Sambuc (loadv4f32 addr:$src)))], 2054f4a2713aSLionel Sambuc IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; 2055f4a2713aSLionel Sambucdef VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 2056f4a2713aSLionel Sambuc "cvttps2dq\t{$src, $dst|$dst, $src}", 2057f4a2713aSLionel Sambuc [(set VR256:$dst, 2058f4a2713aSLionel Sambuc (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], 2059f4a2713aSLionel Sambuc IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; 2060f4a2713aSLionel Sambucdef VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 2061f4a2713aSLionel Sambuc "cvttps2dq\t{$src, $dst|$dst, $src}", 2062f4a2713aSLionel Sambuc [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 2063f4a2713aSLionel Sambuc (loadv8f32 addr:$src)))], 2064f4a2713aSLionel Sambuc IIC_SSE_CVT_PS_RM>, VEX, VEX_L, 2065f4a2713aSLionel Sambuc Sched<[WriteCvtF2ILd]>; 2066f4a2713aSLionel Sambuc 2067f4a2713aSLionel Sambucdef CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2068f4a2713aSLionel Sambuc "cvttps2dq\t{$src, $dst|$dst, $src}", 2069f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], 2070f4a2713aSLionel Sambuc IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; 2071f4a2713aSLionel Sambucdef CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2072f4a2713aSLionel Sambuc "cvttps2dq\t{$src, $dst|$dst, $src}", 2073f4a2713aSLionel Sambuc [(set VR128:$dst, 2074f4a2713aSLionel Sambuc (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], 2075f4a2713aSLionel Sambuc IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; 2076f4a2713aSLionel Sambuc 2077f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 2078f4a2713aSLionel Sambuc def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), 2079f4a2713aSLionel Sambuc (VCVTDQ2PSrr VR128:$src)>; 2080f4a2713aSLionel Sambuc def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))), 2081f4a2713aSLionel Sambuc (VCVTDQ2PSrm addr:$src)>; 2082f4a2713aSLionel Sambuc 2083f4a2713aSLionel Sambuc def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), 2084f4a2713aSLionel Sambuc (VCVTDQ2PSrr VR128:$src)>; 2085f4a2713aSLionel Sambuc def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))), 2086f4a2713aSLionel Sambuc (VCVTDQ2PSrm addr:$src)>; 2087f4a2713aSLionel Sambuc 2088f4a2713aSLionel Sambuc def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), 2089f4a2713aSLionel Sambuc (VCVTTPS2DQrr VR128:$src)>; 2090f4a2713aSLionel Sambuc def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))), 2091f4a2713aSLionel Sambuc (VCVTTPS2DQrm addr:$src)>; 2092f4a2713aSLionel Sambuc 2093f4a2713aSLionel Sambuc def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))), 2094f4a2713aSLionel Sambuc (VCVTDQ2PSYrr VR256:$src)>; 2095f4a2713aSLionel Sambuc def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (loadv4i64 addr:$src)))), 2096f4a2713aSLionel Sambuc (VCVTDQ2PSYrm addr:$src)>; 2097f4a2713aSLionel Sambuc 2098f4a2713aSLionel Sambuc def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))), 2099f4a2713aSLionel Sambuc (VCVTTPS2DQYrr VR256:$src)>; 2100f4a2713aSLionel Sambuc def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))), 2101f4a2713aSLionel Sambuc (VCVTTPS2DQYrm addr:$src)>; 2102f4a2713aSLionel Sambuc} 2103f4a2713aSLionel Sambuc 2104f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in { 2105f4a2713aSLionel Sambuc def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), 2106f4a2713aSLionel Sambuc (CVTDQ2PSrr VR128:$src)>; 2107f4a2713aSLionel Sambuc def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), 2108f4a2713aSLionel Sambuc (CVTDQ2PSrm addr:$src)>; 2109f4a2713aSLionel Sambuc 2110f4a2713aSLionel Sambuc def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), 2111f4a2713aSLionel Sambuc (CVTDQ2PSrr VR128:$src)>; 2112f4a2713aSLionel Sambuc def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), 2113f4a2713aSLionel Sambuc (CVTDQ2PSrm addr:$src)>; 2114f4a2713aSLionel Sambuc 2115f4a2713aSLionel Sambuc def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), 2116f4a2713aSLionel Sambuc (CVTTPS2DQrr VR128:$src)>; 2117f4a2713aSLionel Sambuc def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))), 2118f4a2713aSLionel Sambuc (CVTTPS2DQrm addr:$src)>; 2119f4a2713aSLionel Sambuc} 2120f4a2713aSLionel Sambuc 2121f4a2713aSLionel Sambucdef VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2122f4a2713aSLionel Sambuc "cvttpd2dq\t{$src, $dst|$dst, $src}", 2123f4a2713aSLionel Sambuc [(set VR128:$dst, 2124f4a2713aSLionel Sambuc (int_x86_sse2_cvttpd2dq VR128:$src))], 2125f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>; 2126f4a2713aSLionel Sambuc 2127f4a2713aSLionel Sambuc// The assembler can recognize rr 256-bit instructions by seeing a ymm 2128f4a2713aSLionel Sambuc// register, but the same isn't true when using memory operands instead. 2129f4a2713aSLionel Sambuc// Provide other assembly rr and rm forms to address this explicitly. 2130f4a2713aSLionel Sambuc 2131f4a2713aSLionel Sambuc// XMM only 2132f4a2713aSLionel Sambucdef : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", 2133*0a6a1f1dSLionel Sambuc (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>; 2134f4a2713aSLionel Sambucdef VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2135f4a2713aSLionel Sambuc "cvttpd2dqx\t{$src, $dst|$dst, $src}", 2136f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvttpd2dq 2137f4a2713aSLionel Sambuc (loadv2f64 addr:$src)))], 2138f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>; 2139f4a2713aSLionel Sambuc 2140f4a2713aSLionel Sambuc// YMM only 2141f4a2713aSLionel Sambucdef VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 2142f4a2713aSLionel Sambuc "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 2143f4a2713aSLionel Sambuc [(set VR128:$dst, 2144f4a2713aSLionel Sambuc (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], 2145f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; 2146f4a2713aSLionel Sambucdef VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 2147f4a2713aSLionel Sambuc "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 2148f4a2713aSLionel Sambuc [(set VR128:$dst, 2149f4a2713aSLionel Sambuc (int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))], 2150f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; 2151f4a2713aSLionel Sambucdef : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", 2152*0a6a1f1dSLionel Sambuc (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>; 2153f4a2713aSLionel Sambuc 2154f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 2155f4a2713aSLionel Sambuc def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), 2156f4a2713aSLionel Sambuc (VCVTTPD2DQYrr VR256:$src)>; 2157f4a2713aSLionel Sambuc def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), 2158f4a2713aSLionel Sambuc (VCVTTPD2DQYrm addr:$src)>; 2159f4a2713aSLionel Sambuc} // Predicates = [HasAVX] 2160f4a2713aSLionel Sambuc 2161f4a2713aSLionel Sambucdef CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2162f4a2713aSLionel Sambuc "cvttpd2dq\t{$src, $dst|$dst, $src}", 2163f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], 2164f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; 2165f4a2713aSLionel Sambucdef CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), 2166f4a2713aSLionel Sambuc "cvttpd2dq\t{$src, $dst|$dst, $src}", 2167f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvttpd2dq 2168f4a2713aSLionel Sambuc (memopv2f64 addr:$src)))], 2169f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RM>, 2170f4a2713aSLionel Sambuc Sched<[WriteCvtF2ILd]>; 2171f4a2713aSLionel Sambuc 2172f4a2713aSLionel Sambuc// Convert packed single to packed double 2173f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 2174f4a2713aSLionel Sambuc // SSE2 instructions without OpSize prefix 2175f4a2713aSLionel Sambucdef VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2176f4a2713aSLionel Sambuc "vcvtps2pd\t{$src, $dst|$dst, $src}", 2177f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], 2178*0a6a1f1dSLionel Sambuc IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>; 2179f4a2713aSLionel Sambucdef VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 2180f4a2713aSLionel Sambuc "vcvtps2pd\t{$src, $dst|$dst, $src}", 2181f4a2713aSLionel Sambuc [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], 2182*0a6a1f1dSLionel Sambuc IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>; 2183f4a2713aSLionel Sambucdef VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 2184f4a2713aSLionel Sambuc "vcvtps2pd\t{$src, $dst|$dst, $src}", 2185f4a2713aSLionel Sambuc [(set VR256:$dst, 2186f4a2713aSLionel Sambuc (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], 2187*0a6a1f1dSLionel Sambuc IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>; 2188f4a2713aSLionel Sambucdef VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), 2189f4a2713aSLionel Sambuc "vcvtps2pd\t{$src, $dst|$dst, $src}", 2190f4a2713aSLionel Sambuc [(set VR256:$dst, 2191f4a2713aSLionel Sambuc (int_x86_avx_cvt_ps2_pd_256 (loadv4f32 addr:$src)))], 2192*0a6a1f1dSLionel Sambuc IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; 2193f4a2713aSLionel Sambuc} 2194f4a2713aSLionel Sambuc 2195f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in { 2196f4a2713aSLionel Sambucdef CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2197f4a2713aSLionel Sambuc "cvtps2pd\t{$src, $dst|$dst, $src}", 2198f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], 2199*0a6a1f1dSLionel Sambuc IIC_SSE_CVT_PD_RR>, PS, Sched<[WriteCvtF2F]>; 2200f4a2713aSLionel Sambucdef CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 2201f4a2713aSLionel Sambuc "cvtps2pd\t{$src, $dst|$dst, $src}", 2202f4a2713aSLionel Sambuc [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], 2203*0a6a1f1dSLionel Sambuc IIC_SSE_CVT_PD_RM>, PS, Sched<[WriteCvtF2FLd]>; 2204f4a2713aSLionel Sambuc} 2205f4a2713aSLionel Sambuc 2206f4a2713aSLionel Sambuc// Convert Packed DW Integers to Packed Double FP 2207f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 2208*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, mayLoad = 1 in 2209f4a2713aSLionel Sambucdef VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 2210f4a2713aSLionel Sambuc "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2211f4a2713aSLionel Sambuc []>, VEX, Sched<[WriteCvtI2FLd]>; 2212f4a2713aSLionel Sambucdef VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2213f4a2713aSLionel Sambuc "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2214f4a2713aSLionel Sambuc [(set VR128:$dst, 2215f4a2713aSLionel Sambuc (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX, 2216f4a2713aSLionel Sambuc Sched<[WriteCvtI2F]>; 2217f4a2713aSLionel Sambucdef VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), 2218f4a2713aSLionel Sambuc "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2219f4a2713aSLionel Sambuc [(set VR256:$dst, 2220f4a2713aSLionel Sambuc (int_x86_avx_cvtdq2_pd_256 2221f4a2713aSLionel Sambuc (bitconvert (loadv2i64 addr:$src))))]>, VEX, VEX_L, 2222f4a2713aSLionel Sambuc Sched<[WriteCvtI2FLd]>; 2223f4a2713aSLionel Sambucdef VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 2224f4a2713aSLionel Sambuc "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2225f4a2713aSLionel Sambuc [(set VR256:$dst, 2226f4a2713aSLionel Sambuc (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L, 2227f4a2713aSLionel Sambuc Sched<[WriteCvtI2F]>; 2228f4a2713aSLionel Sambuc} 2229f4a2713aSLionel Sambuc 2230*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, mayLoad = 1 in 2231f4a2713aSLionel Sambucdef CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 2232f4a2713aSLionel Sambuc "cvtdq2pd\t{$src, $dst|$dst, $src}", [], 2233f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>; 2234f4a2713aSLionel Sambucdef CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2235f4a2713aSLionel Sambuc "cvtdq2pd\t{$src, $dst|$dst, $src}", 2236f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], 2237f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>; 2238f4a2713aSLionel Sambuc 2239f4a2713aSLionel Sambuc// AVX 256-bit register conversion intrinsics 2240f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 2241f4a2713aSLionel Sambuc def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), 2242f4a2713aSLionel Sambuc (VCVTDQ2PDYrr VR128:$src)>; 2243f4a2713aSLionel Sambuc def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))), 2244f4a2713aSLionel Sambuc (VCVTDQ2PDYrm addr:$src)>; 2245f4a2713aSLionel Sambuc} // Predicates = [HasAVX] 2246f4a2713aSLionel Sambuc 2247f4a2713aSLionel Sambuc// Convert packed double to packed single 2248f4a2713aSLionel Sambuc// The assembler can recognize rr 256-bit instructions by seeing a ymm 2249f4a2713aSLionel Sambuc// register, but the same isn't true when using memory operands instead. 2250f4a2713aSLionel Sambuc// Provide other assembly rr and rm forms to address this explicitly. 2251f4a2713aSLionel Sambucdef VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2252f4a2713aSLionel Sambuc "cvtpd2ps\t{$src, $dst|$dst, $src}", 2253f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], 2254f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>; 2255f4a2713aSLionel Sambuc 2256f4a2713aSLionel Sambuc// XMM only 2257f4a2713aSLionel Sambucdef : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", 2258*0a6a1f1dSLionel Sambuc (VCVTPD2PSrr VR128:$dst, VR128:$src), 0>; 2259f4a2713aSLionel Sambucdef VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2260f4a2713aSLionel Sambuc "cvtpd2psx\t{$src, $dst|$dst, $src}", 2261f4a2713aSLionel Sambuc [(set VR128:$dst, 2262f4a2713aSLionel Sambuc (int_x86_sse2_cvtpd2ps (loadv2f64 addr:$src)))], 2263f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>; 2264f4a2713aSLionel Sambuc 2265f4a2713aSLionel Sambuc// YMM only 2266f4a2713aSLionel Sambucdef VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 2267f4a2713aSLionel Sambuc "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 2268f4a2713aSLionel Sambuc [(set VR128:$dst, 2269f4a2713aSLionel Sambuc (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], 2270f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>; 2271f4a2713aSLionel Sambucdef VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 2272f4a2713aSLionel Sambuc "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 2273f4a2713aSLionel Sambuc [(set VR128:$dst, 2274f4a2713aSLionel Sambuc (int_x86_avx_cvt_pd2_ps_256 (loadv4f64 addr:$src)))], 2275f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; 2276f4a2713aSLionel Sambucdef : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", 2277*0a6a1f1dSLionel Sambuc (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>; 2278f4a2713aSLionel Sambuc 2279f4a2713aSLionel Sambucdef CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2280f4a2713aSLionel Sambuc "cvtpd2ps\t{$src, $dst|$dst, $src}", 2281f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], 2282f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>; 2283f4a2713aSLionel Sambucdef CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2284f4a2713aSLionel Sambuc "cvtpd2ps\t{$src, $dst|$dst, $src}", 2285f4a2713aSLionel Sambuc [(set VR128:$dst, 2286f4a2713aSLionel Sambuc (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], 2287f4a2713aSLionel Sambuc IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>; 2288f4a2713aSLionel Sambuc 2289f4a2713aSLionel Sambuc 2290f4a2713aSLionel Sambuc// AVX 256-bit register conversion intrinsics 2291f4a2713aSLionel Sambuc// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below 2292f4a2713aSLionel Sambuc// whenever possible to avoid declaring two versions of each one. 2293f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 2294f4a2713aSLionel Sambuc def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), 2295f4a2713aSLionel Sambuc (VCVTDQ2PSYrr VR256:$src)>; 2296f4a2713aSLionel Sambuc def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))), 2297f4a2713aSLionel Sambuc (VCVTDQ2PSYrm addr:$src)>; 2298f4a2713aSLionel Sambuc 2299f4a2713aSLionel Sambuc // Match fround and fextend for 128/256-bit conversions 2300f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), 2301f4a2713aSLionel Sambuc (VCVTPD2PSrr VR128:$src)>; 2302f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))), 2303f4a2713aSLionel Sambuc (VCVTPD2PSXrm addr:$src)>; 2304f4a2713aSLionel Sambuc def : Pat<(v4f32 (fround (v4f64 VR256:$src))), 2305f4a2713aSLionel Sambuc (VCVTPD2PSYrr VR256:$src)>; 2306f4a2713aSLionel Sambuc def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), 2307f4a2713aSLionel Sambuc (VCVTPD2PSYrm addr:$src)>; 2308f4a2713aSLionel Sambuc 2309f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), 2310f4a2713aSLionel Sambuc (VCVTPS2PDrr VR128:$src)>; 2311f4a2713aSLionel Sambuc def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), 2312f4a2713aSLionel Sambuc (VCVTPS2PDYrr VR128:$src)>; 2313f4a2713aSLionel Sambuc def : Pat<(v4f64 (extloadv4f32 addr:$src)), 2314f4a2713aSLionel Sambuc (VCVTPS2PDYrm addr:$src)>; 2315f4a2713aSLionel Sambuc} 2316f4a2713aSLionel Sambuc 2317f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in { 2318f4a2713aSLionel Sambuc // Match fround and fextend for 128 conversions 2319f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), 2320f4a2713aSLionel Sambuc (CVTPD2PSrr VR128:$src)>; 2321f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))), 2322f4a2713aSLionel Sambuc (CVTPD2PSrm addr:$src)>; 2323f4a2713aSLionel Sambuc 2324f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), 2325f4a2713aSLionel Sambuc (CVTPS2PDrr VR128:$src)>; 2326f4a2713aSLionel Sambuc} 2327f4a2713aSLionel Sambuc 2328f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 2329f4a2713aSLionel Sambuc// SSE 1 & 2 - Compare Instructions 2330f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 2331f4a2713aSLionel Sambuc 2332f4a2713aSLionel Sambuc// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions 2333f4a2713aSLionel Sambucmulticlass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, 2334f4a2713aSLionel Sambuc Operand CC, SDNode OpNode, ValueType VT, 2335f4a2713aSLionel Sambuc PatFrag ld_frag, string asm, string asm_alt, 2336*0a6a1f1dSLionel Sambuc OpndItins itins, ImmLeaf immLeaf> { 2337f4a2713aSLionel Sambuc def rr : SIi8<0xC2, MRMSrcReg, 2338f4a2713aSLionel Sambuc (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, 2339*0a6a1f1dSLionel Sambuc [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, immLeaf:$cc))], 2340f4a2713aSLionel Sambuc itins.rr>, Sched<[itins.Sched]>; 2341f4a2713aSLionel Sambuc def rm : SIi8<0xC2, MRMSrcMem, 2342f4a2713aSLionel Sambuc (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, 2343f4a2713aSLionel Sambuc [(set RC:$dst, (OpNode (VT RC:$src1), 2344*0a6a1f1dSLionel Sambuc (ld_frag addr:$src2), immLeaf:$cc))], 2345f4a2713aSLionel Sambuc itins.rm>, 2346f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 2347f4a2713aSLionel Sambuc 2348f4a2713aSLionel Sambuc // Accept explicit immediate argument form instead of comparison code. 2349*0a6a1f1dSLionel Sambuc let isAsmParserOnly = 1, hasSideEffects = 0 in { 2350f4a2713aSLionel Sambuc def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), 2351f4a2713aSLionel Sambuc (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [], 2352f4a2713aSLionel Sambuc IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>; 2353f4a2713aSLionel Sambuc let mayLoad = 1 in 2354f4a2713aSLionel Sambuc def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), 2355f4a2713aSLionel Sambuc (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [], 2356f4a2713aSLionel Sambuc IIC_SSE_ALU_F32S_RM>, 2357f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 2358f4a2713aSLionel Sambuc } 2359f4a2713aSLionel Sambuc} 2360f4a2713aSLionel Sambuc 2361*0a6a1f1dSLionel Sambucdefm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32, 2362f4a2713aSLionel Sambuc "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2363f4a2713aSLionel Sambuc "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2364*0a6a1f1dSLionel Sambuc SSE_ALU_F32S, i8immZExt5>, XS, VEX_4V, VEX_LIG; 2365*0a6a1f1dSLionel Sambucdefm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64, 2366f4a2713aSLionel Sambuc "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2367f4a2713aSLionel Sambuc "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2368*0a6a1f1dSLionel Sambuc SSE_ALU_F32S, i8immZExt5>, // same latency as 32 bit compare 2369f4a2713aSLionel Sambuc XD, VEX_4V, VEX_LIG; 2370f4a2713aSLionel Sambuc 2371f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { 2372*0a6a1f1dSLionel Sambuc defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32, 2373f4a2713aSLionel Sambuc "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", 2374*0a6a1f1dSLionel Sambuc "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S, 2375*0a6a1f1dSLionel Sambuc i8immZExt3>, XS; 2376*0a6a1f1dSLionel Sambuc defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64, 2377f4a2713aSLionel Sambuc "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", 2378f4a2713aSLionel Sambuc "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2379*0a6a1f1dSLionel Sambuc SSE_ALU_F64S, i8immZExt3>, XD; 2380f4a2713aSLionel Sambuc} 2381f4a2713aSLionel Sambuc 2382f4a2713aSLionel Sambucmulticlass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, 2383*0a6a1f1dSLionel Sambuc Intrinsic Int, string asm, OpndItins itins, 2384*0a6a1f1dSLionel Sambuc ImmLeaf immLeaf> { 2385f4a2713aSLionel Sambuc def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), 2386f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src, CC:$cc), asm, 2387f4a2713aSLionel Sambuc [(set VR128:$dst, (Int VR128:$src1, 2388*0a6a1f1dSLionel Sambuc VR128:$src, immLeaf:$cc))], 2389f4a2713aSLionel Sambuc itins.rr>, 2390f4a2713aSLionel Sambuc Sched<[itins.Sched]>; 2391f4a2713aSLionel Sambuc def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), 2392f4a2713aSLionel Sambuc (ins VR128:$src1, x86memop:$src, CC:$cc), asm, 2393f4a2713aSLionel Sambuc [(set VR128:$dst, (Int VR128:$src1, 2394*0a6a1f1dSLionel Sambuc (load addr:$src), immLeaf:$cc))], 2395f4a2713aSLionel Sambuc itins.rm>, 2396f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 2397f4a2713aSLionel Sambuc} 2398f4a2713aSLionel Sambuc 2399*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in { 2400f4a2713aSLionel Sambuc // Aliases to match intrinsics which expect XMM operand(s). 2401f4a2713aSLionel Sambuc defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss, 2402f4a2713aSLionel Sambuc "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", 2403*0a6a1f1dSLionel Sambuc SSE_ALU_F32S, i8immZExt5>, 2404f4a2713aSLionel Sambuc XS, VEX_4V; 2405f4a2713aSLionel Sambuc defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd, 2406f4a2713aSLionel Sambuc "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", 2407*0a6a1f1dSLionel Sambuc SSE_ALU_F32S, i8immZExt5>, // same latency as f32 2408f4a2713aSLionel Sambuc XD, VEX_4V; 2409f4a2713aSLionel Sambuc let Constraints = "$src1 = $dst" in { 2410f4a2713aSLionel Sambuc defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss, 2411f4a2713aSLionel Sambuc "cmp${cc}ss\t{$src, $dst|$dst, $src}", 2412*0a6a1f1dSLionel Sambuc SSE_ALU_F32S, i8immZExt3>, XS; 2413f4a2713aSLionel Sambuc defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd, 2414f4a2713aSLionel Sambuc "cmp${cc}sd\t{$src, $dst|$dst, $src}", 2415*0a6a1f1dSLionel Sambuc SSE_ALU_F64S, i8immZExt3>, 2416f4a2713aSLionel Sambuc XD; 2417f4a2713aSLionel Sambuc} 2418*0a6a1f1dSLionel Sambuc} 2419f4a2713aSLionel Sambuc 2420f4a2713aSLionel Sambuc 2421f4a2713aSLionel Sambuc// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS 2422f4a2713aSLionel Sambucmulticlass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, 2423f4a2713aSLionel Sambuc ValueType vt, X86MemOperand x86memop, 2424f4a2713aSLionel Sambuc PatFrag ld_frag, string OpcodeStr> { 2425f4a2713aSLionel Sambuc def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 2426f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2427f4a2713aSLionel Sambuc [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], 2428f4a2713aSLionel Sambuc IIC_SSE_COMIS_RR>, 2429f4a2713aSLionel Sambuc Sched<[WriteFAdd]>; 2430f4a2713aSLionel Sambuc def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 2431f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2432f4a2713aSLionel Sambuc [(set EFLAGS, (OpNode (vt RC:$src1), 2433f4a2713aSLionel Sambuc (ld_frag addr:$src2)))], 2434f4a2713aSLionel Sambuc IIC_SSE_COMIS_RM>, 2435f4a2713aSLionel Sambuc Sched<[WriteFAddLd, ReadAfterLd]>; 2436f4a2713aSLionel Sambuc} 2437f4a2713aSLionel Sambuc 2438f4a2713aSLionel Sambuclet Defs = [EFLAGS] in { 2439f4a2713aSLionel Sambuc defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 2440*0a6a1f1dSLionel Sambuc "ucomiss">, PS, VEX, VEX_LIG; 2441f4a2713aSLionel Sambuc defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 2442*0a6a1f1dSLionel Sambuc "ucomisd">, PD, VEX, VEX_LIG; 2443f4a2713aSLionel Sambuc let Pattern = []<dag> in { 2444f4a2713aSLionel Sambuc defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, 2445*0a6a1f1dSLionel Sambuc "comiss">, PS, VEX, VEX_LIG; 2446f4a2713aSLionel Sambuc defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, 2447*0a6a1f1dSLionel Sambuc "comisd">, PD, VEX, VEX_LIG; 2448f4a2713aSLionel Sambuc } 2449f4a2713aSLionel Sambuc 2450*0a6a1f1dSLionel Sambuc let isCodeGenOnly = 1 in { 2451f4a2713aSLionel Sambuc defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, 2452*0a6a1f1dSLionel Sambuc load, "ucomiss">, PS, VEX; 2453f4a2713aSLionel Sambuc defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, 2454*0a6a1f1dSLionel Sambuc load, "ucomisd">, PD, VEX; 2455f4a2713aSLionel Sambuc 2456f4a2713aSLionel Sambuc defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, 2457*0a6a1f1dSLionel Sambuc load, "comiss">, PS, VEX; 2458f4a2713aSLionel Sambuc defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, 2459*0a6a1f1dSLionel Sambuc load, "comisd">, PD, VEX; 2460*0a6a1f1dSLionel Sambuc } 2461f4a2713aSLionel Sambuc defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 2462*0a6a1f1dSLionel Sambuc "ucomiss">, PS; 2463f4a2713aSLionel Sambuc defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 2464*0a6a1f1dSLionel Sambuc "ucomisd">, PD; 2465f4a2713aSLionel Sambuc 2466f4a2713aSLionel Sambuc let Pattern = []<dag> in { 2467f4a2713aSLionel Sambuc defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, 2468*0a6a1f1dSLionel Sambuc "comiss">, PS; 2469f4a2713aSLionel Sambuc defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, 2470*0a6a1f1dSLionel Sambuc "comisd">, PD; 2471f4a2713aSLionel Sambuc } 2472f4a2713aSLionel Sambuc 2473*0a6a1f1dSLionel Sambuc let isCodeGenOnly = 1 in { 2474f4a2713aSLionel Sambuc defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, 2475*0a6a1f1dSLionel Sambuc load, "ucomiss">, PS; 2476f4a2713aSLionel Sambuc defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, 2477*0a6a1f1dSLionel Sambuc load, "ucomisd">, PD; 2478f4a2713aSLionel Sambuc 2479f4a2713aSLionel Sambuc defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, 2480*0a6a1f1dSLionel Sambuc "comiss">, PS; 2481f4a2713aSLionel Sambuc defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, 2482*0a6a1f1dSLionel Sambuc "comisd">, PD; 2483*0a6a1f1dSLionel Sambuc } 2484f4a2713aSLionel Sambuc} // Defs = [EFLAGS] 2485f4a2713aSLionel Sambuc 2486f4a2713aSLionel Sambuc// sse12_cmp_packed - sse 1 & 2 compare packed instructions 2487f4a2713aSLionel Sambucmulticlass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, 2488f4a2713aSLionel Sambuc Operand CC, Intrinsic Int, string asm, 2489*0a6a1f1dSLionel Sambuc string asm_alt, Domain d, ImmLeaf immLeaf, 2490f4a2713aSLionel Sambuc OpndItins itins = SSE_ALU_F32P> { 2491f4a2713aSLionel Sambuc def rri : PIi8<0xC2, MRMSrcReg, 2492f4a2713aSLionel Sambuc (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, 2493*0a6a1f1dSLionel Sambuc [(set RC:$dst, (Int RC:$src1, RC:$src2, immLeaf:$cc))], 2494f4a2713aSLionel Sambuc itins.rr, d>, 2495f4a2713aSLionel Sambuc Sched<[WriteFAdd]>; 2496f4a2713aSLionel Sambuc def rmi : PIi8<0xC2, MRMSrcMem, 2497f4a2713aSLionel Sambuc (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, 2498*0a6a1f1dSLionel Sambuc [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), immLeaf:$cc))], 2499f4a2713aSLionel Sambuc itins.rm, d>, 2500f4a2713aSLionel Sambuc Sched<[WriteFAddLd, ReadAfterLd]>; 2501f4a2713aSLionel Sambuc 2502f4a2713aSLionel Sambuc // Accept explicit immediate argument form instead of comparison code. 2503*0a6a1f1dSLionel Sambuc let isAsmParserOnly = 1, hasSideEffects = 0 in { 2504f4a2713aSLionel Sambuc def rri_alt : PIi8<0xC2, MRMSrcReg, 2505f4a2713aSLionel Sambuc (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), 2506f4a2713aSLionel Sambuc asm_alt, [], itins.rr, d>, Sched<[WriteFAdd]>; 2507f4a2713aSLionel Sambuc def rmi_alt : PIi8<0xC2, MRMSrcMem, 2508f4a2713aSLionel Sambuc (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), 2509f4a2713aSLionel Sambuc asm_alt, [], itins.rm, d>, 2510f4a2713aSLionel Sambuc Sched<[WriteFAddLd, ReadAfterLd]>; 2511f4a2713aSLionel Sambuc } 2512f4a2713aSLionel Sambuc} 2513f4a2713aSLionel Sambuc 2514f4a2713aSLionel Sambucdefm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps, 2515f4a2713aSLionel Sambuc "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2516f4a2713aSLionel Sambuc "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2517*0a6a1f1dSLionel Sambuc SSEPackedSingle, i8immZExt5>, PS, VEX_4V; 2518f4a2713aSLionel Sambucdefm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd, 2519f4a2713aSLionel Sambuc "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2520f4a2713aSLionel Sambuc "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2521*0a6a1f1dSLionel Sambuc SSEPackedDouble, i8immZExt5>, PD, VEX_4V; 2522f4a2713aSLionel Sambucdefm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256, 2523f4a2713aSLionel Sambuc "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2524f4a2713aSLionel Sambuc "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2525*0a6a1f1dSLionel Sambuc SSEPackedSingle, i8immZExt5>, PS, VEX_4V, VEX_L; 2526f4a2713aSLionel Sambucdefm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256, 2527f4a2713aSLionel Sambuc "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2528f4a2713aSLionel Sambuc "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2529*0a6a1f1dSLionel Sambuc SSEPackedDouble, i8immZExt5>, PD, VEX_4V, VEX_L; 2530f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { 2531f4a2713aSLionel Sambuc defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps, 2532f4a2713aSLionel Sambuc "cmp${cc}ps\t{$src2, $dst|$dst, $src2}", 2533f4a2713aSLionel Sambuc "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2534*0a6a1f1dSLionel Sambuc SSEPackedSingle, i8immZExt5, SSE_ALU_F32P>, PS; 2535f4a2713aSLionel Sambuc defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd, 2536f4a2713aSLionel Sambuc "cmp${cc}pd\t{$src2, $dst|$dst, $src2}", 2537f4a2713aSLionel Sambuc "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2538*0a6a1f1dSLionel Sambuc SSEPackedDouble, i8immZExt5, SSE_ALU_F64P>, PD; 2539f4a2713aSLionel Sambuc} 2540f4a2713aSLionel Sambuc 2541f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 2542f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), 2543f4a2713aSLionel Sambuc (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; 2544f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), 2545f4a2713aSLionel Sambuc (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; 2546f4a2713aSLionel Sambucdef : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), 2547f4a2713aSLionel Sambuc (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; 2548f4a2713aSLionel Sambucdef : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), 2549f4a2713aSLionel Sambuc (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; 2550f4a2713aSLionel Sambuc 2551f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)), 2552f4a2713aSLionel Sambuc (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>; 2553f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)), 2554f4a2713aSLionel Sambuc (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>; 2555f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)), 2556f4a2713aSLionel Sambuc (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>; 2557f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)), 2558f4a2713aSLionel Sambuc (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; 2559f4a2713aSLionel Sambuc} 2560f4a2713aSLionel Sambuc 2561f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in { 2562f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), 2563f4a2713aSLionel Sambuc (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; 2564f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), 2565f4a2713aSLionel Sambuc (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; 2566f4a2713aSLionel Sambuc} 2567f4a2713aSLionel Sambuc 2568f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in { 2569f4a2713aSLionel Sambucdef : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), 2570f4a2713aSLionel Sambuc (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; 2571f4a2713aSLionel Sambucdef : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), 2572f4a2713aSLionel Sambuc (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; 2573f4a2713aSLionel Sambuc} 2574f4a2713aSLionel Sambuc 2575f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 2576f4a2713aSLionel Sambuc// SSE 1 & 2 - Shuffle Instructions 2577f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 2578f4a2713aSLionel Sambuc 2579*0a6a1f1dSLionel Sambuc/// sse12_shuffle - sse 1 & 2 fp shuffle instructions 2580f4a2713aSLionel Sambucmulticlass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, 2581f4a2713aSLionel Sambuc ValueType vt, string asm, PatFrag mem_frag, 2582*0a6a1f1dSLionel Sambuc Domain d> { 2583f4a2713aSLionel Sambuc def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), 2584f4a2713aSLionel Sambuc (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm, 2585f4a2713aSLionel Sambuc [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), 2586f4a2713aSLionel Sambuc (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, 2587*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffleLd, ReadAfterLd]>; 2588f4a2713aSLionel Sambuc def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), 2589f4a2713aSLionel Sambuc (ins RC:$src1, RC:$src2, i8imm:$src3), asm, 2590f4a2713aSLionel Sambuc [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, 2591f4a2713aSLionel Sambuc (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, 2592*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffle]>; 2593f4a2713aSLionel Sambuc} 2594f4a2713aSLionel Sambuc 2595f4a2713aSLionel Sambucdefm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2596f4a2713aSLionel Sambuc "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2597*0a6a1f1dSLionel Sambuc loadv4f32, SSEPackedSingle>, PS, VEX_4V; 2598f4a2713aSLionel Sambucdefm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, 2599f4a2713aSLionel Sambuc "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2600*0a6a1f1dSLionel Sambuc loadv8f32, SSEPackedSingle>, PS, VEX_4V, VEX_L; 2601f4a2713aSLionel Sambucdefm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2602f4a2713aSLionel Sambuc "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2603*0a6a1f1dSLionel Sambuc loadv2f64, SSEPackedDouble>, PD, VEX_4V; 2604f4a2713aSLionel Sambucdefm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, 2605f4a2713aSLionel Sambuc "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2606*0a6a1f1dSLionel Sambuc loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L; 2607f4a2713aSLionel Sambuc 2608f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { 2609f4a2713aSLionel Sambuc defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2610f4a2713aSLionel Sambuc "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2611*0a6a1f1dSLionel Sambuc memopv4f32, SSEPackedSingle>, PS; 2612f4a2713aSLionel Sambuc defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2613f4a2713aSLionel Sambuc "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2614*0a6a1f1dSLionel Sambuc memopv2f64, SSEPackedDouble>, PD; 2615f4a2713aSLionel Sambuc} 2616f4a2713aSLionel Sambuc 2617f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 2618f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Shufp VR128:$src1, 2619f4a2713aSLionel Sambuc (bc_v4i32 (loadv2i64 addr:$src2)), (i8 imm:$imm))), 2620f4a2713aSLionel Sambuc (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; 2621f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2622f4a2713aSLionel Sambuc (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; 2623f4a2713aSLionel Sambuc 2624f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86Shufp VR128:$src1, 2625f4a2713aSLionel Sambuc (loadv2i64 addr:$src2), (i8 imm:$imm))), 2626f4a2713aSLionel Sambuc (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; 2627f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2628f4a2713aSLionel Sambuc (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; 2629f4a2713aSLionel Sambuc 2630f4a2713aSLionel Sambuc // 256-bit patterns 2631f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))), 2632f4a2713aSLionel Sambuc (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>; 2633f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86Shufp VR256:$src1, 2634f4a2713aSLionel Sambuc (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 2635f4a2713aSLionel Sambuc (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>; 2636f4a2713aSLionel Sambuc 2637f4a2713aSLionel Sambuc def : Pat<(v4i64 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))), 2638f4a2713aSLionel Sambuc (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>; 2639f4a2713aSLionel Sambuc def : Pat<(v4i64 (X86Shufp VR256:$src1, 2640f4a2713aSLionel Sambuc (loadv4i64 addr:$src2), (i8 imm:$imm))), 2641f4a2713aSLionel Sambuc (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; 2642f4a2713aSLionel Sambuc} 2643f4a2713aSLionel Sambuc 2644f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in { 2645f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Shufp VR128:$src1, 2646f4a2713aSLionel Sambuc (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), 2647f4a2713aSLionel Sambuc (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; 2648f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2649f4a2713aSLionel Sambuc (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; 2650f4a2713aSLionel Sambuc} 2651f4a2713aSLionel Sambuc 2652f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in { 2653f4a2713aSLionel Sambuc // Generic SHUFPD patterns 2654f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86Shufp VR128:$src1, 2655f4a2713aSLionel Sambuc (memopv2i64 addr:$src2), (i8 imm:$imm))), 2656f4a2713aSLionel Sambuc (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; 2657f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2658f4a2713aSLionel Sambuc (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; 2659f4a2713aSLionel Sambuc} 2660f4a2713aSLionel Sambuc 2661f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 2662*0a6a1f1dSLionel Sambuc// SSE 1 & 2 - Unpack FP Instructions 2663f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 2664f4a2713aSLionel Sambuc 2665*0a6a1f1dSLionel Sambuc/// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave 2666f4a2713aSLionel Sambucmulticlass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, 2667f4a2713aSLionel Sambuc PatFrag mem_frag, RegisterClass RC, 2668f4a2713aSLionel Sambuc X86MemOperand x86memop, string asm, 2669f4a2713aSLionel Sambuc Domain d> { 2670f4a2713aSLionel Sambuc def rr : PI<opc, MRMSrcReg, 2671f4a2713aSLionel Sambuc (outs RC:$dst), (ins RC:$src1, RC:$src2), 2672f4a2713aSLionel Sambuc asm, [(set RC:$dst, 2673f4a2713aSLionel Sambuc (vt (OpNode RC:$src1, RC:$src2)))], 2674*0a6a1f1dSLionel Sambuc IIC_SSE_UNPCK, d>, Sched<[WriteFShuffle]>; 2675f4a2713aSLionel Sambuc def rm : PI<opc, MRMSrcMem, 2676f4a2713aSLionel Sambuc (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 2677f4a2713aSLionel Sambuc asm, [(set RC:$dst, 2678f4a2713aSLionel Sambuc (vt (OpNode RC:$src1, 2679f4a2713aSLionel Sambuc (mem_frag addr:$src2))))], 2680f4a2713aSLionel Sambuc IIC_SSE_UNPCK, d>, 2681*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffleLd, ReadAfterLd]>; 2682f4a2713aSLionel Sambuc} 2683f4a2713aSLionel Sambuc 2684f4a2713aSLionel Sambucdefm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32, 2685f4a2713aSLionel Sambuc VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2686*0a6a1f1dSLionel Sambuc SSEPackedSingle>, PS, VEX_4V; 2687f4a2713aSLionel Sambucdefm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64, 2688f4a2713aSLionel Sambuc VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2689*0a6a1f1dSLionel Sambuc SSEPackedDouble>, PD, VEX_4V; 2690f4a2713aSLionel Sambucdefm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32, 2691f4a2713aSLionel Sambuc VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2692*0a6a1f1dSLionel Sambuc SSEPackedSingle>, PS, VEX_4V; 2693f4a2713aSLionel Sambucdefm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64, 2694f4a2713aSLionel Sambuc VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2695*0a6a1f1dSLionel Sambuc SSEPackedDouble>, PD, VEX_4V; 2696f4a2713aSLionel Sambuc 2697f4a2713aSLionel Sambucdefm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32, 2698f4a2713aSLionel Sambuc VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2699*0a6a1f1dSLionel Sambuc SSEPackedSingle>, PS, VEX_4V, VEX_L; 2700f4a2713aSLionel Sambucdefm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64, 2701f4a2713aSLionel Sambuc VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2702*0a6a1f1dSLionel Sambuc SSEPackedDouble>, PD, VEX_4V, VEX_L; 2703f4a2713aSLionel Sambucdefm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32, 2704f4a2713aSLionel Sambuc VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2705*0a6a1f1dSLionel Sambuc SSEPackedSingle>, PS, VEX_4V, VEX_L; 2706f4a2713aSLionel Sambucdefm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64, 2707f4a2713aSLionel Sambuc VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2708*0a6a1f1dSLionel Sambuc SSEPackedDouble>, PD, VEX_4V, VEX_L; 2709f4a2713aSLionel Sambuc 2710f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { 2711f4a2713aSLionel Sambuc defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, 2712f4a2713aSLionel Sambuc VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", 2713*0a6a1f1dSLionel Sambuc SSEPackedSingle>, PS; 2714f4a2713aSLionel Sambuc defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64, 2715f4a2713aSLionel Sambuc VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", 2716*0a6a1f1dSLionel Sambuc SSEPackedDouble>, PD; 2717f4a2713aSLionel Sambuc defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32, 2718f4a2713aSLionel Sambuc VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", 2719*0a6a1f1dSLionel Sambuc SSEPackedSingle>, PS; 2720f4a2713aSLionel Sambuc defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64, 2721f4a2713aSLionel Sambuc VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", 2722*0a6a1f1dSLionel Sambuc SSEPackedDouble>, PD; 2723f4a2713aSLionel Sambuc} // Constraints = "$src1 = $dst" 2724f4a2713aSLionel Sambuc 2725f4a2713aSLionel Sambuclet Predicates = [HasAVX1Only] in { 2726f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))), 2727f4a2713aSLionel Sambuc (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; 2728f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), 2729f4a2713aSLionel Sambuc (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; 2730f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))), 2731f4a2713aSLionel Sambuc (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; 2732f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), 2733f4a2713aSLionel Sambuc (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; 2734f4a2713aSLionel Sambuc 2735f4a2713aSLionel Sambuc def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))), 2736f4a2713aSLionel Sambuc (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; 2737f4a2713aSLionel Sambuc def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), 2738f4a2713aSLionel Sambuc (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; 2739f4a2713aSLionel Sambuc def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))), 2740f4a2713aSLionel Sambuc (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; 2741f4a2713aSLionel Sambuc def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), 2742f4a2713aSLionel Sambuc (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; 2743f4a2713aSLionel Sambuc} 2744f4a2713aSLionel Sambuc 2745f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 2746f4a2713aSLionel Sambuc // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the 2747f4a2713aSLionel Sambuc // problem is during lowering, where it's not possible to recognize the load 2748f4a2713aSLionel Sambuc // fold cause it has two uses through a bitcast. One use disappears at isel 2749f4a2713aSLionel Sambuc // time and the fold opportunity reappears. 2750f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86Movddup VR128:$src)), 2751f4a2713aSLionel Sambuc (VUNPCKLPDrr VR128:$src, VR128:$src)>; 2752f4a2713aSLionel Sambuc} 2753f4a2713aSLionel Sambuc 2754f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in { 2755f4a2713aSLionel Sambuc // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the 2756f4a2713aSLionel Sambuc // problem is during lowering, where it's not possible to recognize the load 2757f4a2713aSLionel Sambuc // fold cause it has two uses through a bitcast. One use disappears at isel 2758f4a2713aSLionel Sambuc // time and the fold opportunity reappears. 2759f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86Movddup VR128:$src)), 2760f4a2713aSLionel Sambuc (UNPCKLPDrr VR128:$src, VR128:$src)>; 2761f4a2713aSLionel Sambuc} 2762f4a2713aSLionel Sambuc 2763f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 2764f4a2713aSLionel Sambuc// SSE 1 & 2 - Extract Floating-Point Sign mask 2765f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 2766f4a2713aSLionel Sambuc 2767f4a2713aSLionel Sambuc/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave 2768f4a2713aSLionel Sambucmulticlass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, 2769f4a2713aSLionel Sambuc Domain d> { 2770f4a2713aSLionel Sambuc def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src), 2771f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 2772f4a2713aSLionel Sambuc [(set GR32orGR64:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, 2773f4a2713aSLionel Sambuc Sched<[WriteVecLogic]>; 2774f4a2713aSLionel Sambuc} 2775f4a2713aSLionel Sambuc 2776f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 2777f4a2713aSLionel Sambuc defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, 2778*0a6a1f1dSLionel Sambuc "movmskps", SSEPackedSingle>, PS, VEX; 2779f4a2713aSLionel Sambuc defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, 2780*0a6a1f1dSLionel Sambuc "movmskpd", SSEPackedDouble>, PD, VEX; 2781f4a2713aSLionel Sambuc defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, 2782*0a6a1f1dSLionel Sambuc "movmskps", SSEPackedSingle>, PS, 2783f4a2713aSLionel Sambuc VEX, VEX_L; 2784f4a2713aSLionel Sambuc defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, 2785*0a6a1f1dSLionel Sambuc "movmskpd", SSEPackedDouble>, PD, 2786*0a6a1f1dSLionel Sambuc VEX, VEX_L; 2787f4a2713aSLionel Sambuc 2788f4a2713aSLionel Sambuc def : Pat<(i32 (X86fgetsign FR32:$src)), 2789f4a2713aSLionel Sambuc (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; 2790f4a2713aSLionel Sambuc def : Pat<(i64 (X86fgetsign FR32:$src)), 2791f4a2713aSLionel Sambuc (SUBREG_TO_REG (i64 0), 2792f4a2713aSLionel Sambuc (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>; 2793f4a2713aSLionel Sambuc def : Pat<(i32 (X86fgetsign FR64:$src)), 2794f4a2713aSLionel Sambuc (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>; 2795f4a2713aSLionel Sambuc def : Pat<(i64 (X86fgetsign FR64:$src)), 2796f4a2713aSLionel Sambuc (SUBREG_TO_REG (i64 0), 2797f4a2713aSLionel Sambuc (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>; 2798f4a2713aSLionel Sambuc} 2799f4a2713aSLionel Sambuc 2800f4a2713aSLionel Sambucdefm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", 2801*0a6a1f1dSLionel Sambuc SSEPackedSingle>, PS; 2802f4a2713aSLionel Sambucdefm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", 2803*0a6a1f1dSLionel Sambuc SSEPackedDouble>, PD; 2804f4a2713aSLionel Sambuc 2805f4a2713aSLionel Sambucdef : Pat<(i32 (X86fgetsign FR32:$src)), 2806f4a2713aSLionel Sambuc (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>, 2807f4a2713aSLionel Sambuc Requires<[UseSSE1]>; 2808f4a2713aSLionel Sambucdef : Pat<(i64 (X86fgetsign FR32:$src)), 2809f4a2713aSLionel Sambuc (SUBREG_TO_REG (i64 0), 2810f4a2713aSLionel Sambuc (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>, 2811f4a2713aSLionel Sambuc Requires<[UseSSE1]>; 2812f4a2713aSLionel Sambucdef : Pat<(i32 (X86fgetsign FR64:$src)), 2813f4a2713aSLionel Sambuc (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>, 2814f4a2713aSLionel Sambuc Requires<[UseSSE2]>; 2815f4a2713aSLionel Sambucdef : Pat<(i64 (X86fgetsign FR64:$src)), 2816f4a2713aSLionel Sambuc (SUBREG_TO_REG (i64 0), 2817f4a2713aSLionel Sambuc (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>, 2818f4a2713aSLionel Sambuc Requires<[UseSSE2]>; 2819f4a2713aSLionel Sambuc 2820f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 2821f4a2713aSLionel Sambuc// SSE2 - Packed Integer Logical Instructions 2822f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 2823f4a2713aSLionel Sambuc 2824f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in { // SSE integer instructions 2825f4a2713aSLionel Sambuc 2826f4a2713aSLionel Sambuc/// PDI_binop_rm - Simple SSE2 binary operator. 2827f4a2713aSLionel Sambucmulticlass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 2828f4a2713aSLionel Sambuc ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 2829f4a2713aSLionel Sambuc X86MemOperand x86memop, OpndItins itins, 2830f4a2713aSLionel Sambuc bit IsCommutable, bit Is2Addr> { 2831f4a2713aSLionel Sambuc let isCommutable = IsCommutable in 2832f4a2713aSLionel Sambuc def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 2833f4a2713aSLionel Sambuc (ins RC:$src1, RC:$src2), 2834f4a2713aSLionel Sambuc !if(Is2Addr, 2835f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2836f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2837f4a2713aSLionel Sambuc [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, 2838f4a2713aSLionel Sambuc Sched<[itins.Sched]>; 2839f4a2713aSLionel Sambuc def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 2840f4a2713aSLionel Sambuc (ins RC:$src1, x86memop:$src2), 2841f4a2713aSLionel Sambuc !if(Is2Addr, 2842f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2843f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2844f4a2713aSLionel Sambuc [(set RC:$dst, (OpVT (OpNode RC:$src1, 2845f4a2713aSLionel Sambuc (bitconvert (memop_frag addr:$src2)))))], 2846f4a2713aSLionel Sambuc itins.rm>, 2847f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 2848f4a2713aSLionel Sambuc} 2849f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt 2850f4a2713aSLionel Sambuc 2851f4a2713aSLionel Sambucmulticlass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode, 2852f4a2713aSLionel Sambuc ValueType OpVT128, ValueType OpVT256, 2853f4a2713aSLionel Sambuc OpndItins itins, bit IsCommutable = 0> { 2854*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, NoVLX] in 2855f4a2713aSLionel Sambuc defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128, 2856f4a2713aSLionel Sambuc VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V; 2857f4a2713aSLionel Sambuc 2858f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in 2859f4a2713aSLionel Sambuc defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128, 2860f4a2713aSLionel Sambuc memopv2i64, i128mem, itins, IsCommutable, 1>; 2861f4a2713aSLionel Sambuc 2862*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX2, NoVLX] in 2863f4a2713aSLionel Sambuc defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, 2864f4a2713aSLionel Sambuc OpVT256, VR256, loadv4i64, i256mem, itins, 2865f4a2713aSLionel Sambuc IsCommutable, 0>, VEX_4V, VEX_L; 2866f4a2713aSLionel Sambuc} 2867f4a2713aSLionel Sambuc 2868f4a2713aSLionel Sambuc// These are ordered here for pattern ordering requirements with the fp versions 2869f4a2713aSLionel Sambuc 2870*0a6a1f1dSLionel Sambucdefm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, 2871*0a6a1f1dSLionel Sambuc SSE_VEC_BIT_ITINS_P, 1>; 2872*0a6a1f1dSLionel Sambucdefm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, 2873*0a6a1f1dSLionel Sambuc SSE_VEC_BIT_ITINS_P, 1>; 2874*0a6a1f1dSLionel Sambucdefm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, 2875*0a6a1f1dSLionel Sambuc SSE_VEC_BIT_ITINS_P, 1>; 2876f4a2713aSLionel Sambucdefm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, 2877*0a6a1f1dSLionel Sambuc SSE_VEC_BIT_ITINS_P, 0>; 2878f4a2713aSLionel Sambuc 2879f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 2880f4a2713aSLionel Sambuc// SSE 1 & 2 - Logical Instructions 2881f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 2882f4a2713aSLionel Sambuc 2883f4a2713aSLionel Sambuc/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops 2884f4a2713aSLionel Sambuc/// 2885f4a2713aSLionel Sambucmulticlass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr, 2886f4a2713aSLionel Sambuc SDNode OpNode, OpndItins itins> { 2887f4a2713aSLionel Sambuc defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 2888f4a2713aSLionel Sambuc FR32, f32, f128mem, memopfsf32, SSEPackedSingle, itins, 0>, 2889*0a6a1f1dSLionel Sambuc PS, VEX_4V; 2890f4a2713aSLionel Sambuc 2891f4a2713aSLionel Sambuc defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 2892f4a2713aSLionel Sambuc FR64, f64, f128mem, memopfsf64, SSEPackedDouble, itins, 0>, 2893*0a6a1f1dSLionel Sambuc PD, VEX_4V; 2894f4a2713aSLionel Sambuc 2895f4a2713aSLionel Sambuc let Constraints = "$src1 = $dst" in { 2896f4a2713aSLionel Sambuc defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32, 2897f4a2713aSLionel Sambuc f32, f128mem, memopfsf32, SSEPackedSingle, itins>, 2898*0a6a1f1dSLionel Sambuc PS; 2899f4a2713aSLionel Sambuc 2900f4a2713aSLionel Sambuc defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64, 2901f4a2713aSLionel Sambuc f64, f128mem, memopfsf64, SSEPackedDouble, itins>, 2902*0a6a1f1dSLionel Sambuc PD; 2903f4a2713aSLionel Sambuc } 2904f4a2713aSLionel Sambuc} 2905f4a2713aSLionel Sambuc 2906f4a2713aSLionel Sambuc// Alias bitwise logical operations using SSE logical ops on packed FP values. 2907f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in { 2908f4a2713aSLionel Sambuc defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand, 2909f4a2713aSLionel Sambuc SSE_BIT_ITINS_P>; 2910f4a2713aSLionel Sambuc defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for, 2911f4a2713aSLionel Sambuc SSE_BIT_ITINS_P>; 2912f4a2713aSLionel Sambuc defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor, 2913f4a2713aSLionel Sambuc SSE_BIT_ITINS_P>; 2914f4a2713aSLionel Sambuc 2915f4a2713aSLionel Sambuc let isCommutable = 0 in 2916f4a2713aSLionel Sambuc defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", X86fandn, 2917f4a2713aSLionel Sambuc SSE_BIT_ITINS_P>; 2918f4a2713aSLionel Sambuc} 2919f4a2713aSLionel Sambuc 2920f4a2713aSLionel Sambuc/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops 2921f4a2713aSLionel Sambuc/// 2922f4a2713aSLionel Sambucmulticlass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, 2923f4a2713aSLionel Sambuc SDNode OpNode> { 2924*0a6a1f1dSLionel Sambuc let Predicates = [HasAVX, NoVLX] in { 2925f4a2713aSLionel Sambuc defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, 2926f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "ps"), f256mem, 2927f4a2713aSLionel Sambuc [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], 2928f4a2713aSLionel Sambuc [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), 2929*0a6a1f1dSLionel Sambuc (loadv4i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_L; 2930f4a2713aSLionel Sambuc 2931f4a2713aSLionel Sambuc defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, 2932f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "pd"), f256mem, 2933f4a2713aSLionel Sambuc [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), 2934f4a2713aSLionel Sambuc (bc_v4i64 (v4f64 VR256:$src2))))], 2935f4a2713aSLionel Sambuc [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), 2936f4a2713aSLionel Sambuc (loadv4i64 addr:$src2)))], 0>, 2937*0a6a1f1dSLionel Sambuc PD, VEX_4V, VEX_L; 2938f4a2713aSLionel Sambuc 2939f4a2713aSLionel Sambuc // In AVX no need to add a pattern for 128-bit logical rr ps, because they 2940f4a2713aSLionel Sambuc // are all promoted to v2i64, and the patterns are covered by the int 2941f4a2713aSLionel Sambuc // version. This is needed in SSE only, because v2i64 isn't supported on 2942f4a2713aSLionel Sambuc // SSE1, but only on SSE2. 2943f4a2713aSLionel Sambuc defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2944f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "ps"), f128mem, [], 2945f4a2713aSLionel Sambuc [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), 2946*0a6a1f1dSLionel Sambuc (loadv2i64 addr:$src2)))], 0>, PS, VEX_4V; 2947f4a2713aSLionel Sambuc 2948f4a2713aSLionel Sambuc defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2949f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "pd"), f128mem, 2950f4a2713aSLionel Sambuc [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2951f4a2713aSLionel Sambuc (bc_v2i64 (v2f64 VR128:$src2))))], 2952f4a2713aSLionel Sambuc [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2953f4a2713aSLionel Sambuc (loadv2i64 addr:$src2)))], 0>, 2954*0a6a1f1dSLionel Sambuc PD, VEX_4V; 2955*0a6a1f1dSLionel Sambuc } 2956f4a2713aSLionel Sambuc 2957f4a2713aSLionel Sambuc let Constraints = "$src1 = $dst" in { 2958f4a2713aSLionel Sambuc defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2959f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "ps"), f128mem, 2960f4a2713aSLionel Sambuc [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], 2961f4a2713aSLionel Sambuc [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), 2962*0a6a1f1dSLionel Sambuc (memopv2i64 addr:$src2)))]>, PS; 2963f4a2713aSLionel Sambuc 2964f4a2713aSLionel Sambuc defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2965f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "pd"), f128mem, 2966f4a2713aSLionel Sambuc [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2967f4a2713aSLionel Sambuc (bc_v2i64 (v2f64 VR128:$src2))))], 2968f4a2713aSLionel Sambuc [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2969*0a6a1f1dSLionel Sambuc (memopv2i64 addr:$src2)))]>, PD; 2970f4a2713aSLionel Sambuc } 2971f4a2713aSLionel Sambuc} 2972f4a2713aSLionel Sambuc 2973f4a2713aSLionel Sambucdefm AND : sse12_fp_packed_logical<0x54, "and", and>; 2974f4a2713aSLionel Sambucdefm OR : sse12_fp_packed_logical<0x56, "or", or>; 2975f4a2713aSLionel Sambucdefm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; 2976f4a2713aSLionel Sambuclet isCommutable = 0 in 2977f4a2713aSLionel Sambuc defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>; 2978f4a2713aSLionel Sambuc 2979*0a6a1f1dSLionel Sambuc// AVX1 requires type coercions in order to fold loads directly into logical 2980*0a6a1f1dSLionel Sambuc// operations. 2981*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX1Only] in { 2982*0a6a1f1dSLionel Sambuc def : Pat<(bc_v8f32 (and VR256:$src1, (loadv4i64 addr:$src2))), 2983*0a6a1f1dSLionel Sambuc (VANDPSYrm VR256:$src1, addr:$src2)>; 2984*0a6a1f1dSLionel Sambuc def : Pat<(bc_v8f32 (or VR256:$src1, (loadv4i64 addr:$src2))), 2985*0a6a1f1dSLionel Sambuc (VORPSYrm VR256:$src1, addr:$src2)>; 2986*0a6a1f1dSLionel Sambuc def : Pat<(bc_v8f32 (xor VR256:$src1, (loadv4i64 addr:$src2))), 2987*0a6a1f1dSLionel Sambuc (VXORPSYrm VR256:$src1, addr:$src2)>; 2988*0a6a1f1dSLionel Sambuc def : Pat<(bc_v8f32 (X86andnp VR256:$src1, (loadv4i64 addr:$src2))), 2989*0a6a1f1dSLionel Sambuc (VANDNPSYrm VR256:$src1, addr:$src2)>; 2990*0a6a1f1dSLionel Sambuc} 2991*0a6a1f1dSLionel Sambuc 2992f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 2993f4a2713aSLionel Sambuc// SSE 1 & 2 - Arithmetic Instructions 2994f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 2995f4a2713aSLionel Sambuc 2996f4a2713aSLionel Sambuc/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and 2997f4a2713aSLionel Sambuc/// vector forms. 2998f4a2713aSLionel Sambuc/// 2999f4a2713aSLionel Sambuc/// In addition, we also have a special variant of the scalar form here to 3000f4a2713aSLionel Sambuc/// represent the associated intrinsic operation. This form is unlike the 3001f4a2713aSLionel Sambuc/// plain scalar form, in that it takes an entire vector (instead of a scalar) 3002f4a2713aSLionel Sambuc/// and leaves the top elements unmodified (therefore these cannot be commuted). 3003f4a2713aSLionel Sambuc/// 3004f4a2713aSLionel Sambuc/// These three forms can each be reg+reg or reg+mem. 3005f4a2713aSLionel Sambuc/// 3006f4a2713aSLionel Sambuc 3007f4a2713aSLionel Sambuc/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those 3008f4a2713aSLionel Sambuc/// classes below 3009f4a2713aSLionel Sambucmulticlass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, 3010f4a2713aSLionel Sambuc SDNode OpNode, SizeItins itins> { 3011*0a6a1f1dSLionel Sambuc let Predicates = [HasAVX, NoVLX] in { 3012f4a2713aSLionel Sambuc defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 3013f4a2713aSLionel Sambuc VR128, v4f32, f128mem, loadv4f32, 3014*0a6a1f1dSLionel Sambuc SSEPackedSingle, itins.s, 0>, PS, VEX_4V; 3015f4a2713aSLionel Sambuc defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 3016f4a2713aSLionel Sambuc VR128, v2f64, f128mem, loadv2f64, 3017*0a6a1f1dSLionel Sambuc SSEPackedDouble, itins.d, 0>, PD, VEX_4V; 3018f4a2713aSLionel Sambuc 3019f4a2713aSLionel Sambuc defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), 3020f4a2713aSLionel Sambuc OpNode, VR256, v8f32, f256mem, loadv8f32, 3021*0a6a1f1dSLionel Sambuc SSEPackedSingle, itins.s, 0>, PS, VEX_4V, VEX_L; 3022f4a2713aSLionel Sambuc defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), 3023f4a2713aSLionel Sambuc OpNode, VR256, v4f64, f256mem, loadv4f64, 3024*0a6a1f1dSLionel Sambuc SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_L; 3025*0a6a1f1dSLionel Sambuc } 3026f4a2713aSLionel Sambuc 3027f4a2713aSLionel Sambuc let Constraints = "$src1 = $dst" in { 3028f4a2713aSLionel Sambuc defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, 3029f4a2713aSLionel Sambuc v4f32, f128mem, memopv4f32, SSEPackedSingle, 3030*0a6a1f1dSLionel Sambuc itins.s>, PS; 3031f4a2713aSLionel Sambuc defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, 3032f4a2713aSLionel Sambuc v2f64, f128mem, memopv2f64, SSEPackedDouble, 3033*0a6a1f1dSLionel Sambuc itins.d>, PD; 3034f4a2713aSLionel Sambuc } 3035f4a2713aSLionel Sambuc} 3036f4a2713aSLionel Sambuc 3037f4a2713aSLionel Sambucmulticlass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 3038f4a2713aSLionel Sambuc SizeItins itins> { 3039f4a2713aSLionel Sambuc defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 3040f4a2713aSLionel Sambuc OpNode, FR32, f32mem, itins.s, 0>, XS, VEX_4V, VEX_LIG; 3041f4a2713aSLionel Sambuc defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 3042f4a2713aSLionel Sambuc OpNode, FR64, f64mem, itins.d, 0>, XD, VEX_4V, VEX_LIG; 3043f4a2713aSLionel Sambuc 3044f4a2713aSLionel Sambuc let Constraints = "$src1 = $dst" in { 3045f4a2713aSLionel Sambuc defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 3046f4a2713aSLionel Sambuc OpNode, FR32, f32mem, itins.s>, XS; 3047f4a2713aSLionel Sambuc defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 3048f4a2713aSLionel Sambuc OpNode, FR64, f64mem, itins.d>, XD; 3049f4a2713aSLionel Sambuc } 3050f4a2713aSLionel Sambuc} 3051f4a2713aSLionel Sambuc 3052f4a2713aSLionel Sambucmulticlass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, 3053f4a2713aSLionel Sambuc SizeItins itins> { 3054f4a2713aSLionel Sambuc defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 3055f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, 3056f4a2713aSLionel Sambuc itins.s, 0>, XS, VEX_4V, VEX_LIG; 3057f4a2713aSLionel Sambuc defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 3058f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, 3059f4a2713aSLionel Sambuc itins.d, 0>, XD, VEX_4V, VEX_LIG; 3060f4a2713aSLionel Sambuc 3061f4a2713aSLionel Sambuc let Constraints = "$src1 = $dst" in { 3062f4a2713aSLionel Sambuc defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 3063f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, 3064f4a2713aSLionel Sambuc itins.s>, XS; 3065f4a2713aSLionel Sambuc defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 3066f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, 3067f4a2713aSLionel Sambuc itins.d>, XD; 3068f4a2713aSLionel Sambuc } 3069f4a2713aSLionel Sambuc} 3070f4a2713aSLionel Sambuc 3071f4a2713aSLionel Sambuc// Binary Arithmetic instructions 3072f4a2713aSLionel Sambucdefm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>, 3073f4a2713aSLionel Sambuc basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>, 3074f4a2713aSLionel Sambuc basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>; 3075f4a2713aSLionel Sambucdefm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>, 3076f4a2713aSLionel Sambuc basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>, 3077f4a2713aSLionel Sambuc basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>; 3078f4a2713aSLionel Sambuclet isCommutable = 0 in { 3079f4a2713aSLionel Sambuc defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, 3080f4a2713aSLionel Sambuc basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>, 3081f4a2713aSLionel Sambuc basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>; 3082f4a2713aSLionel Sambuc defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, 3083f4a2713aSLionel Sambuc basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>, 3084f4a2713aSLionel Sambuc basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>; 3085f4a2713aSLionel Sambuc defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, 3086f4a2713aSLionel Sambuc basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>, 3087f4a2713aSLionel Sambuc basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>; 3088f4a2713aSLionel Sambuc defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, 3089f4a2713aSLionel Sambuc basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>, 3090f4a2713aSLionel Sambuc basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>; 3091f4a2713aSLionel Sambuc} 3092f4a2713aSLionel Sambuc 3093f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in { 3094f4a2713aSLionel Sambuc defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, 3095f4a2713aSLionel Sambuc basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>; 3096f4a2713aSLionel Sambuc defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, 3097f4a2713aSLionel Sambuc basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>; 3098f4a2713aSLionel Sambuc} 3099f4a2713aSLionel Sambuc 3100*0a6a1f1dSLionel Sambuc// Patterns used to select SSE scalar fp arithmetic instructions from 3101*0a6a1f1dSLionel Sambuc// a scalar fp operation followed by a blend. 3102*0a6a1f1dSLionel Sambuc// 3103*0a6a1f1dSLionel Sambuc// These patterns know, for example, how to select an ADDSS from a 3104*0a6a1f1dSLionel Sambuc// float add plus vector insert. 3105*0a6a1f1dSLionel Sambuc// 3106*0a6a1f1dSLionel Sambuc// The effect is that the backend no longer emits unnecessary vector 3107*0a6a1f1dSLionel Sambuc// insert instructions immediately after SSE scalar fp instructions 3108*0a6a1f1dSLionel Sambuc// like addss or mulss. 3109*0a6a1f1dSLionel Sambuc// 3110*0a6a1f1dSLionel Sambuc// For example, given the following code: 3111*0a6a1f1dSLionel Sambuc// __m128 foo(__m128 A, __m128 B) { 3112*0a6a1f1dSLionel Sambuc// A[0] += B[0]; 3113*0a6a1f1dSLionel Sambuc// return A; 3114*0a6a1f1dSLionel Sambuc// } 3115*0a6a1f1dSLionel Sambuc// 3116*0a6a1f1dSLionel Sambuc// previously we generated: 3117*0a6a1f1dSLionel Sambuc// addss %xmm0, %xmm1 3118*0a6a1f1dSLionel Sambuc// movss %xmm1, %xmm0 3119*0a6a1f1dSLionel Sambuc// 3120*0a6a1f1dSLionel Sambuc// we now generate: 3121*0a6a1f1dSLionel Sambuc// addss %xmm1, %xmm0 3122*0a6a1f1dSLionel Sambuc 3123*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE1] in { 3124*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd 3125*0a6a1f1dSLionel Sambuc (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3126*0a6a1f1dSLionel Sambuc FR32:$src))))), 3127*0a6a1f1dSLionel Sambuc (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3128*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub 3129*0a6a1f1dSLionel Sambuc (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3130*0a6a1f1dSLionel Sambuc FR32:$src))))), 3131*0a6a1f1dSLionel Sambuc (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3132*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul 3133*0a6a1f1dSLionel Sambuc (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3134*0a6a1f1dSLionel Sambuc FR32:$src))))), 3135*0a6a1f1dSLionel Sambuc (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3136*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv 3137*0a6a1f1dSLionel Sambuc (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3138*0a6a1f1dSLionel Sambuc FR32:$src))))), 3139*0a6a1f1dSLionel Sambuc (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3140*0a6a1f1dSLionel Sambuc} 3141*0a6a1f1dSLionel Sambuc 3142*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE2] in { 3143*0a6a1f1dSLionel Sambuc // SSE2 patterns to select scalar double-precision fp arithmetic instructions 3144*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd 3145*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3146*0a6a1f1dSLionel Sambuc FR64:$src))))), 3147*0a6a1f1dSLionel Sambuc (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3148*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub 3149*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3150*0a6a1f1dSLionel Sambuc FR64:$src))))), 3151*0a6a1f1dSLionel Sambuc (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3152*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul 3153*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3154*0a6a1f1dSLionel Sambuc FR64:$src))))), 3155*0a6a1f1dSLionel Sambuc (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3156*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv 3157*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3158*0a6a1f1dSLionel Sambuc FR64:$src))))), 3159*0a6a1f1dSLionel Sambuc (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3160*0a6a1f1dSLionel Sambuc} 3161*0a6a1f1dSLionel Sambuc 3162*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE41] in { 3163*0a6a1f1dSLionel Sambuc // If the subtarget has SSE4.1 but not AVX, the vector insert instruction is 3164*0a6a1f1dSLionel Sambuc // lowered into a X86insertps or a X86Blendi rather than a X86Movss. When 3165*0a6a1f1dSLionel Sambuc // selecting SSE scalar single-precision fp arithmetic instructions, make 3166*0a6a1f1dSLionel Sambuc // sure that we correctly match them. 3167*0a6a1f1dSLionel Sambuc 3168*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3169*0a6a1f1dSLionel Sambuc (fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3170*0a6a1f1dSLionel Sambuc FR32:$src))), (iPTR 0))), 3171*0a6a1f1dSLionel Sambuc (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3172*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3173*0a6a1f1dSLionel Sambuc (fsub (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3174*0a6a1f1dSLionel Sambuc FR32:$src))), (iPTR 0))), 3175*0a6a1f1dSLionel Sambuc (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3176*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3177*0a6a1f1dSLionel Sambuc (fmul (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3178*0a6a1f1dSLionel Sambuc FR32:$src))), (iPTR 0))), 3179*0a6a1f1dSLionel Sambuc (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3180*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3181*0a6a1f1dSLionel Sambuc (fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3182*0a6a1f1dSLionel Sambuc FR32:$src))), (iPTR 0))), 3183*0a6a1f1dSLionel Sambuc (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3184*0a6a1f1dSLionel Sambuc 3185*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd 3186*0a6a1f1dSLionel Sambuc (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3187*0a6a1f1dSLionel Sambuc FR32:$src))), (i8 1))), 3188*0a6a1f1dSLionel Sambuc (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3189*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub 3190*0a6a1f1dSLionel Sambuc (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3191*0a6a1f1dSLionel Sambuc FR32:$src))), (i8 1))), 3192*0a6a1f1dSLionel Sambuc (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3193*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul 3194*0a6a1f1dSLionel Sambuc (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3195*0a6a1f1dSLionel Sambuc FR32:$src))), (i8 1))), 3196*0a6a1f1dSLionel Sambuc (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3197*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv 3198*0a6a1f1dSLionel Sambuc (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3199*0a6a1f1dSLionel Sambuc FR32:$src))), (i8 1))), 3200*0a6a1f1dSLionel Sambuc (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3201*0a6a1f1dSLionel Sambuc 3202*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd 3203*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3204*0a6a1f1dSLionel Sambuc FR64:$src))), (i8 1))), 3205*0a6a1f1dSLionel Sambuc (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3206*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub 3207*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3208*0a6a1f1dSLionel Sambuc FR64:$src))), (i8 1))), 3209*0a6a1f1dSLionel Sambuc (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3210*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul 3211*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3212*0a6a1f1dSLionel Sambuc FR64:$src))), (i8 1))), 3213*0a6a1f1dSLionel Sambuc (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3214*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv 3215*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3216*0a6a1f1dSLionel Sambuc FR64:$src))), (i8 1))), 3217*0a6a1f1dSLionel Sambuc (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3218*0a6a1f1dSLionel Sambuc 3219*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fadd 3220*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3221*0a6a1f1dSLionel Sambuc FR64:$src))), (v2f64 VR128:$dst), (i8 2))), 3222*0a6a1f1dSLionel Sambuc (ADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3223*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fsub 3224*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3225*0a6a1f1dSLionel Sambuc FR64:$src))), (v2f64 VR128:$dst), (i8 2))), 3226*0a6a1f1dSLionel Sambuc (SUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3227*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fmul 3228*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3229*0a6a1f1dSLionel Sambuc FR64:$src))), (v2f64 VR128:$dst), (i8 2))), 3230*0a6a1f1dSLionel Sambuc (MULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3231*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fdiv 3232*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3233*0a6a1f1dSLionel Sambuc FR64:$src))), (v2f64 VR128:$dst), (i8 2))), 3234*0a6a1f1dSLionel Sambuc (DIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3235*0a6a1f1dSLionel Sambuc} 3236*0a6a1f1dSLionel Sambuc 3237*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX] in { 3238*0a6a1f1dSLionel Sambuc // The following patterns select AVX Scalar single/double precision fp 3239*0a6a1f1dSLionel Sambuc // arithmetic instructions. 3240*0a6a1f1dSLionel Sambuc 3241*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd 3242*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3243*0a6a1f1dSLionel Sambuc FR64:$src))))), 3244*0a6a1f1dSLionel Sambuc (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3245*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub 3246*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3247*0a6a1f1dSLionel Sambuc FR64:$src))))), 3248*0a6a1f1dSLionel Sambuc (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3249*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul 3250*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3251*0a6a1f1dSLionel Sambuc FR64:$src))))), 3252*0a6a1f1dSLionel Sambuc (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3253*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv 3254*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3255*0a6a1f1dSLionel Sambuc FR64:$src))))), 3256*0a6a1f1dSLionel Sambuc (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3257*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3258*0a6a1f1dSLionel Sambuc (fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3259*0a6a1f1dSLionel Sambuc FR32:$src))), (iPTR 0))), 3260*0a6a1f1dSLionel Sambuc (VADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3261*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3262*0a6a1f1dSLionel Sambuc (fsub (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3263*0a6a1f1dSLionel Sambuc FR32:$src))), (iPTR 0))), 3264*0a6a1f1dSLionel Sambuc (VSUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3265*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3266*0a6a1f1dSLionel Sambuc (fmul (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3267*0a6a1f1dSLionel Sambuc FR32:$src))), (iPTR 0))), 3268*0a6a1f1dSLionel Sambuc (VMULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3269*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3270*0a6a1f1dSLionel Sambuc (fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3271*0a6a1f1dSLionel Sambuc FR32:$src))), (iPTR 0))), 3272*0a6a1f1dSLionel Sambuc (VDIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3273*0a6a1f1dSLionel Sambuc 3274*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd 3275*0a6a1f1dSLionel Sambuc (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3276*0a6a1f1dSLionel Sambuc FR32:$src))), (i8 1))), 3277*0a6a1f1dSLionel Sambuc (VADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3278*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub 3279*0a6a1f1dSLionel Sambuc (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3280*0a6a1f1dSLionel Sambuc FR32:$src))), (i8 1))), 3281*0a6a1f1dSLionel Sambuc (VSUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3282*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul 3283*0a6a1f1dSLionel Sambuc (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3284*0a6a1f1dSLionel Sambuc FR32:$src))), (i8 1))), 3285*0a6a1f1dSLionel Sambuc (VMULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3286*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv 3287*0a6a1f1dSLionel Sambuc (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3288*0a6a1f1dSLionel Sambuc FR32:$src))), (i8 1))), 3289*0a6a1f1dSLionel Sambuc (VDIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; 3290*0a6a1f1dSLionel Sambuc 3291*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fadd 3292*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3293*0a6a1f1dSLionel Sambuc FR64:$src))), (i8 1))), 3294*0a6a1f1dSLionel Sambuc (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3295*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fsub 3296*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3297*0a6a1f1dSLionel Sambuc FR64:$src))), (i8 1))), 3298*0a6a1f1dSLionel Sambuc (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3299*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fmul 3300*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3301*0a6a1f1dSLionel Sambuc FR64:$src))), (i8 1))), 3302*0a6a1f1dSLionel Sambuc (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3303*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (fdiv 3304*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3305*0a6a1f1dSLionel Sambuc FR64:$src))), (i8 1))), 3306*0a6a1f1dSLionel Sambuc (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3307*0a6a1f1dSLionel Sambuc 3308*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fadd 3309*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3310*0a6a1f1dSLionel Sambuc FR64:$src))), (v2f64 VR128:$dst), (i8 2))), 3311*0a6a1f1dSLionel Sambuc (VADDSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3312*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fsub 3313*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3314*0a6a1f1dSLionel Sambuc FR64:$src))), (v2f64 VR128:$dst), (i8 2))), 3315*0a6a1f1dSLionel Sambuc (VSUBSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3316*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fmul 3317*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3318*0a6a1f1dSLionel Sambuc FR64:$src))), (v2f64 VR128:$dst), (i8 2))), 3319*0a6a1f1dSLionel Sambuc (VMULSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3320*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 (scalar_to_vector (fdiv 3321*0a6a1f1dSLionel Sambuc (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3322*0a6a1f1dSLionel Sambuc FR64:$src))), (v2f64 VR128:$dst), (i8 2))), 3323*0a6a1f1dSLionel Sambuc (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; 3324*0a6a1f1dSLionel Sambuc} 3325*0a6a1f1dSLionel Sambuc 3326*0a6a1f1dSLionel Sambuc// Patterns used to select SSE scalar fp arithmetic instructions from 3327*0a6a1f1dSLionel Sambuc// a vector packed single/double fp operation followed by a vector insert. 3328*0a6a1f1dSLionel Sambuc// 3329*0a6a1f1dSLionel Sambuc// The effect is that the backend converts the packed fp instruction 3330*0a6a1f1dSLionel Sambuc// followed by a vector insert into a single SSE scalar fp instruction. 3331*0a6a1f1dSLionel Sambuc// 3332*0a6a1f1dSLionel Sambuc// For example, given the following code: 3333*0a6a1f1dSLionel Sambuc// __m128 foo(__m128 A, __m128 B) { 3334*0a6a1f1dSLionel Sambuc// __m128 C = A + B; 3335*0a6a1f1dSLionel Sambuc// return (__m128) {c[0], a[1], a[2], a[3]}; 3336*0a6a1f1dSLionel Sambuc// } 3337*0a6a1f1dSLionel Sambuc// 3338*0a6a1f1dSLionel Sambuc// previously we generated: 3339*0a6a1f1dSLionel Sambuc// addps %xmm0, %xmm1 3340*0a6a1f1dSLionel Sambuc// movss %xmm1, %xmm0 3341*0a6a1f1dSLionel Sambuc// 3342*0a6a1f1dSLionel Sambuc// we now generate: 3343*0a6a1f1dSLionel Sambuc// addss %xmm1, %xmm0 3344*0a6a1f1dSLionel Sambuc 3345*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE1] in { 3346*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3347*0a6a1f1dSLionel Sambuc (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3348*0a6a1f1dSLionel Sambuc (ADDSSrr_Int v4f32:$dst, v4f32:$src)>; 3349*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3350*0a6a1f1dSLionel Sambuc (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3351*0a6a1f1dSLionel Sambuc (SUBSSrr_Int v4f32:$dst, v4f32:$src)>; 3352*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3353*0a6a1f1dSLionel Sambuc (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3354*0a6a1f1dSLionel Sambuc (MULSSrr_Int v4f32:$dst, v4f32:$src)>; 3355*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3356*0a6a1f1dSLionel Sambuc (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3357*0a6a1f1dSLionel Sambuc (DIVSSrr_Int v4f32:$dst, v4f32:$src)>; 3358*0a6a1f1dSLionel Sambuc} 3359*0a6a1f1dSLionel Sambuc 3360*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE2] in { 3361*0a6a1f1dSLionel Sambuc // SSE2 patterns to select scalar double-precision fp arithmetic instructions 3362*0a6a1f1dSLionel Sambuc // from a packed double-precision fp instruction plus movsd. 3363*0a6a1f1dSLionel Sambuc 3364*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3365*0a6a1f1dSLionel Sambuc (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3366*0a6a1f1dSLionel Sambuc (ADDSDrr_Int v2f64:$dst, v2f64:$src)>; 3367*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3368*0a6a1f1dSLionel Sambuc (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3369*0a6a1f1dSLionel Sambuc (SUBSDrr_Int v2f64:$dst, v2f64:$src)>; 3370*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3371*0a6a1f1dSLionel Sambuc (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3372*0a6a1f1dSLionel Sambuc (MULSDrr_Int v2f64:$dst, v2f64:$src)>; 3373*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3374*0a6a1f1dSLionel Sambuc (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3375*0a6a1f1dSLionel Sambuc (DIVSDrr_Int v2f64:$dst, v2f64:$src)>; 3376*0a6a1f1dSLionel Sambuc} 3377*0a6a1f1dSLionel Sambuc 3378*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE41] in { 3379*0a6a1f1dSLionel Sambuc // With SSE4.1 we may see these operations using X86Blendi rather than 3380*0a6a1f1dSLionel Sambuc // X86Movs{s,d}. 3381*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), 3382*0a6a1f1dSLionel Sambuc (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), 3383*0a6a1f1dSLionel Sambuc (ADDSSrr_Int v4f32:$dst, v4f32:$src)>; 3384*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), 3385*0a6a1f1dSLionel Sambuc (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), 3386*0a6a1f1dSLionel Sambuc (SUBSSrr_Int v4f32:$dst, v4f32:$src)>; 3387*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), 3388*0a6a1f1dSLionel Sambuc (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), 3389*0a6a1f1dSLionel Sambuc (MULSSrr_Int v4f32:$dst, v4f32:$src)>; 3390*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), 3391*0a6a1f1dSLionel Sambuc (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), 3392*0a6a1f1dSLionel Sambuc (DIVSSrr_Int v4f32:$dst, v4f32:$src)>; 3393*0a6a1f1dSLionel Sambuc 3394*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), 3395*0a6a1f1dSLionel Sambuc (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))), 3396*0a6a1f1dSLionel Sambuc (ADDSDrr_Int v2f64:$dst, v2f64:$src)>; 3397*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), 3398*0a6a1f1dSLionel Sambuc (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))), 3399*0a6a1f1dSLionel Sambuc (SUBSDrr_Int v2f64:$dst, v2f64:$src)>; 3400*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), 3401*0a6a1f1dSLionel Sambuc (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))), 3402*0a6a1f1dSLionel Sambuc (MULSDrr_Int v2f64:$dst, v2f64:$src)>; 3403*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), 3404*0a6a1f1dSLionel Sambuc (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))), 3405*0a6a1f1dSLionel Sambuc (DIVSDrr_Int v2f64:$dst, v2f64:$src)>; 3406*0a6a1f1dSLionel Sambuc 3407*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)), 3408*0a6a1f1dSLionel Sambuc (v2f64 VR128:$dst), (i8 2))), 3409*0a6a1f1dSLionel Sambuc (ADDSDrr_Int v2f64:$dst, v2f64:$src)>; 3410*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)), 3411*0a6a1f1dSLionel Sambuc (v2f64 VR128:$dst), (i8 2))), 3412*0a6a1f1dSLionel Sambuc (SUBSDrr_Int v2f64:$dst, v2f64:$src)>; 3413*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)), 3414*0a6a1f1dSLionel Sambuc (v2f64 VR128:$dst), (i8 2))), 3415*0a6a1f1dSLionel Sambuc (MULSDrr_Int v2f64:$dst, v2f64:$src)>; 3416*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)), 3417*0a6a1f1dSLionel Sambuc (v2f64 VR128:$dst), (i8 2))), 3418*0a6a1f1dSLionel Sambuc (DIVSDrr_Int v2f64:$dst, v2f64:$src)>; 3419*0a6a1f1dSLionel Sambuc} 3420*0a6a1f1dSLionel Sambuc 3421*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX] in { 3422*0a6a1f1dSLionel Sambuc // The following patterns select AVX Scalar single/double precision fp 3423*0a6a1f1dSLionel Sambuc // arithmetic instructions from a packed single precision fp instruction 3424*0a6a1f1dSLionel Sambuc // plus movss/movsd. 3425*0a6a1f1dSLionel Sambuc 3426*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3427*0a6a1f1dSLionel Sambuc (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3428*0a6a1f1dSLionel Sambuc (VADDSSrr_Int v4f32:$dst, v4f32:$src)>; 3429*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3430*0a6a1f1dSLionel Sambuc (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3431*0a6a1f1dSLionel Sambuc (VSUBSSrr_Int v4f32:$dst, v4f32:$src)>; 3432*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3433*0a6a1f1dSLionel Sambuc (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3434*0a6a1f1dSLionel Sambuc (VMULSSrr_Int v4f32:$dst, v4f32:$src)>; 3435*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3436*0a6a1f1dSLionel Sambuc (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3437*0a6a1f1dSLionel Sambuc (VDIVSSrr_Int v4f32:$dst, v4f32:$src)>; 3438*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3439*0a6a1f1dSLionel Sambuc (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3440*0a6a1f1dSLionel Sambuc (VADDSDrr_Int v2f64:$dst, v2f64:$src)>; 3441*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3442*0a6a1f1dSLionel Sambuc (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3443*0a6a1f1dSLionel Sambuc (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>; 3444*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3445*0a6a1f1dSLionel Sambuc (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3446*0a6a1f1dSLionel Sambuc (VMULSDrr_Int v2f64:$dst, v2f64:$src)>; 3447*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3448*0a6a1f1dSLionel Sambuc (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3449*0a6a1f1dSLionel Sambuc (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>; 3450*0a6a1f1dSLionel Sambuc 3451*0a6a1f1dSLionel Sambuc // Also handle X86Blendi-based patterns. 3452*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), 3453*0a6a1f1dSLionel Sambuc (fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), 3454*0a6a1f1dSLionel Sambuc (VADDSSrr_Int v4f32:$dst, v4f32:$src)>; 3455*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), 3456*0a6a1f1dSLionel Sambuc (fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), 3457*0a6a1f1dSLionel Sambuc (VSUBSSrr_Int v4f32:$dst, v4f32:$src)>; 3458*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), 3459*0a6a1f1dSLionel Sambuc (fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), 3460*0a6a1f1dSLionel Sambuc (VMULSSrr_Int v4f32:$dst, v4f32:$src)>; 3461*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), 3462*0a6a1f1dSLionel Sambuc (fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), 3463*0a6a1f1dSLionel Sambuc (VDIVSSrr_Int v4f32:$dst, v4f32:$src)>; 3464*0a6a1f1dSLionel Sambuc 3465*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), 3466*0a6a1f1dSLionel Sambuc (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))), 3467*0a6a1f1dSLionel Sambuc (VADDSDrr_Int v2f64:$dst, v2f64:$src)>; 3468*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), 3469*0a6a1f1dSLionel Sambuc (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))), 3470*0a6a1f1dSLionel Sambuc (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>; 3471*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), 3472*0a6a1f1dSLionel Sambuc (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))), 3473*0a6a1f1dSLionel Sambuc (VMULSDrr_Int v2f64:$dst, v2f64:$src)>; 3474*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), 3475*0a6a1f1dSLionel Sambuc (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))), 3476*0a6a1f1dSLionel Sambuc (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>; 3477*0a6a1f1dSLionel Sambuc 3478*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (fadd (v2f64 VR128:$dst), (v2f64 VR128:$src)), 3479*0a6a1f1dSLionel Sambuc (v2f64 VR128:$dst), (i8 2))), 3480*0a6a1f1dSLionel Sambuc (VADDSDrr_Int v2f64:$dst, v2f64:$src)>; 3481*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (fsub (v2f64 VR128:$dst), (v2f64 VR128:$src)), 3482*0a6a1f1dSLionel Sambuc (v2f64 VR128:$dst), (i8 2))), 3483*0a6a1f1dSLionel Sambuc (VSUBSDrr_Int v2f64:$dst, v2f64:$src)>; 3484*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (fmul (v2f64 VR128:$dst), (v2f64 VR128:$src)), 3485*0a6a1f1dSLionel Sambuc (v2f64 VR128:$dst), (i8 2))), 3486*0a6a1f1dSLionel Sambuc (VMULSDrr_Int v2f64:$dst, v2f64:$src)>; 3487*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Blendi (fdiv (v2f64 VR128:$dst), (v2f64 VR128:$src)), 3488*0a6a1f1dSLionel Sambuc (v2f64 VR128:$dst), (i8 2))), 3489*0a6a1f1dSLionel Sambuc (VDIVSDrr_Int v2f64:$dst, v2f64:$src)>; 3490*0a6a1f1dSLionel Sambuc} 3491*0a6a1f1dSLionel Sambuc 3492f4a2713aSLionel Sambuc/// Unop Arithmetic 3493f4a2713aSLionel Sambuc/// In addition, we also have a special variant of the scalar form here to 3494f4a2713aSLionel Sambuc/// represent the associated intrinsic operation. This form is unlike the 3495f4a2713aSLionel Sambuc/// plain scalar form, in that it takes an entire vector (instead of a 3496f4a2713aSLionel Sambuc/// scalar) and leaves the top elements undefined. 3497f4a2713aSLionel Sambuc/// 3498f4a2713aSLionel Sambuc/// And, we have a special variant form for a full-vector intrinsic form. 3499f4a2713aSLionel Sambuc 3500f4a2713aSLionel Sambuclet Sched = WriteFSqrt in { 3501f4a2713aSLionel Sambucdef SSE_SQRTPS : OpndItins< 3502f4a2713aSLionel Sambuc IIC_SSE_SQRTPS_RR, IIC_SSE_SQRTPS_RM 3503f4a2713aSLionel Sambuc>; 3504f4a2713aSLionel Sambuc 3505f4a2713aSLionel Sambucdef SSE_SQRTSS : OpndItins< 3506f4a2713aSLionel Sambuc IIC_SSE_SQRTSS_RR, IIC_SSE_SQRTSS_RM 3507f4a2713aSLionel Sambuc>; 3508f4a2713aSLionel Sambuc 3509f4a2713aSLionel Sambucdef SSE_SQRTPD : OpndItins< 3510f4a2713aSLionel Sambuc IIC_SSE_SQRTPD_RR, IIC_SSE_SQRTPD_RM 3511f4a2713aSLionel Sambuc>; 3512f4a2713aSLionel Sambuc 3513f4a2713aSLionel Sambucdef SSE_SQRTSD : OpndItins< 3514f4a2713aSLionel Sambuc IIC_SSE_SQRTSD_RR, IIC_SSE_SQRTSD_RM 3515f4a2713aSLionel Sambuc>; 3516f4a2713aSLionel Sambuc} 3517f4a2713aSLionel Sambuc 3518*0a6a1f1dSLionel Sambuclet Sched = WriteFRsqrt in { 3519*0a6a1f1dSLionel Sambucdef SSE_RSQRTPS : OpndItins< 3520*0a6a1f1dSLionel Sambuc IIC_SSE_RSQRTPS_RR, IIC_SSE_RSQRTPS_RM 3521*0a6a1f1dSLionel Sambuc>; 3522*0a6a1f1dSLionel Sambuc 3523*0a6a1f1dSLionel Sambucdef SSE_RSQRTSS : OpndItins< 3524*0a6a1f1dSLionel Sambuc IIC_SSE_RSQRTSS_RR, IIC_SSE_RSQRTSS_RM 3525*0a6a1f1dSLionel Sambuc>; 3526*0a6a1f1dSLionel Sambuc} 3527*0a6a1f1dSLionel Sambuc 3528f4a2713aSLionel Sambuclet Sched = WriteFRcp in { 3529f4a2713aSLionel Sambucdef SSE_RCPP : OpndItins< 3530f4a2713aSLionel Sambuc IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM 3531f4a2713aSLionel Sambuc>; 3532f4a2713aSLionel Sambuc 3533f4a2713aSLionel Sambucdef SSE_RCPS : OpndItins< 3534f4a2713aSLionel Sambuc IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM 3535f4a2713aSLionel Sambuc>; 3536f4a2713aSLionel Sambuc} 3537f4a2713aSLionel Sambuc 3538*0a6a1f1dSLionel Sambuc/// sse1_fp_unop_s - SSE1 unops in scalar form 3539*0a6a1f1dSLionel Sambuc/// For the non-AVX defs, we need $src1 to be tied to $dst because 3540*0a6a1f1dSLionel Sambuc/// the HW instructions are 2 operand / destructive. 3541*0a6a1f1dSLionel Sambucmulticlass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 3542f4a2713aSLionel Sambuc OpndItins itins> { 3543f4a2713aSLionel Sambuclet Predicates = [HasAVX], hasSideEffects = 0 in { 3544f4a2713aSLionel Sambuc def V#NAME#SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), 3545f4a2713aSLionel Sambuc (ins FR32:$src1, FR32:$src2), 3546f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3547f4a2713aSLionel Sambuc "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3548f4a2713aSLionel Sambuc []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; 3549f4a2713aSLionel Sambuc let mayLoad = 1 in { 3550f4a2713aSLionel Sambuc def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), 3551f4a2713aSLionel Sambuc (ins FR32:$src1,f32mem:$src2), 3552f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3553f4a2713aSLionel Sambuc "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3554f4a2713aSLionel Sambuc []>, VEX_4V, VEX_LIG, 3555f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 3556*0a6a1f1dSLionel Sambuc let isCodeGenOnly = 1 in 3557f4a2713aSLionel Sambuc def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), 3558f4a2713aSLionel Sambuc (ins VR128:$src1, ssmem:$src2), 3559f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3560f4a2713aSLionel Sambuc "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3561f4a2713aSLionel Sambuc []>, VEX_4V, VEX_LIG, 3562f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 3563f4a2713aSLionel Sambuc } 3564f4a2713aSLionel Sambuc} 3565f4a2713aSLionel Sambuc 3566f4a2713aSLionel Sambuc def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), 3567f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3568f4a2713aSLionel Sambuc [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; 3569f4a2713aSLionel Sambuc // For scalar unary operations, fold a load into the operation 3570f4a2713aSLionel Sambuc // only in OptForSize mode. It eliminates an instruction, but it also 3571f4a2713aSLionel Sambuc // eliminates a whole-register clobber (the load), so it introduces a 3572f4a2713aSLionel Sambuc // partial register update condition. 3573f4a2713aSLionel Sambuc def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), 3574f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3575f4a2713aSLionel Sambuc [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, 3576f4a2713aSLionel Sambuc Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; 3577*0a6a1f1dSLionel Sambuc let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in { 3578f4a2713aSLionel Sambuc def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), 3579f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2), 3580f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), 3581f4a2713aSLionel Sambuc [], itins.rr>, Sched<[itins.Sched]>; 3582f4a2713aSLionel Sambuc let mayLoad = 1, hasSideEffects = 0 in 3583f4a2713aSLionel Sambuc def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), 3584f4a2713aSLionel Sambuc (ins VR128:$src1, ssmem:$src2), 3585f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), 3586f4a2713aSLionel Sambuc [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 3587f4a2713aSLionel Sambuc } 3588f4a2713aSLionel Sambuc} 3589f4a2713aSLionel Sambuc 3590f4a2713aSLionel Sambuc/// sse1_fp_unop_p - SSE1 unops in packed form. 3591f4a2713aSLionel Sambucmulticlass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 3592f4a2713aSLionel Sambuc OpndItins itins> { 3593f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 3594f4a2713aSLionel Sambuc def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3595f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3596f4a2713aSLionel Sambuc "ps\t{$src, $dst|$dst, $src}"), 3597f4a2713aSLionel Sambuc [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], 3598f4a2713aSLionel Sambuc itins.rr>, VEX, Sched<[itins.Sched]>; 3599f4a2713aSLionel Sambuc def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3600f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3601f4a2713aSLionel Sambuc "ps\t{$src, $dst|$dst, $src}"), 3602f4a2713aSLionel Sambuc [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))], 3603f4a2713aSLionel Sambuc itins.rm>, VEX, Sched<[itins.Sched.Folded]>; 3604f4a2713aSLionel Sambuc def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3605f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3606f4a2713aSLionel Sambuc "ps\t{$src, $dst|$dst, $src}"), 3607f4a2713aSLionel Sambuc [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], 3608f4a2713aSLionel Sambuc itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; 3609f4a2713aSLionel Sambuc def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 3610f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3611f4a2713aSLionel Sambuc "ps\t{$src, $dst|$dst, $src}"), 3612f4a2713aSLionel Sambuc [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))], 3613f4a2713aSLionel Sambuc itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; 3614f4a2713aSLionel Sambuc} 3615f4a2713aSLionel Sambuc 3616f4a2713aSLionel Sambuc def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3617f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3618f4a2713aSLionel Sambuc [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>, 3619f4a2713aSLionel Sambuc Sched<[itins.Sched]>; 3620f4a2713aSLionel Sambuc def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3621f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3622f4a2713aSLionel Sambuc [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>, 3623f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded]>; 3624f4a2713aSLionel Sambuc} 3625f4a2713aSLionel Sambuc 3626f4a2713aSLionel Sambuc/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. 3627f4a2713aSLionel Sambucmulticlass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr, 3628f4a2713aSLionel Sambuc Intrinsic V4F32Int, Intrinsic V8F32Int, 3629f4a2713aSLionel Sambuc OpndItins itins> { 3630*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in { 3631f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 3632f4a2713aSLionel Sambuc def V#NAME#PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3633f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3634f4a2713aSLionel Sambuc "ps\t{$src, $dst|$dst, $src}"), 3635f4a2713aSLionel Sambuc [(set VR128:$dst, (V4F32Int VR128:$src))], 3636f4a2713aSLionel Sambuc itins.rr>, VEX, Sched<[itins.Sched]>; 3637f4a2713aSLionel Sambuc def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3638f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3639f4a2713aSLionel Sambuc "ps\t{$src, $dst|$dst, $src}"), 3640f4a2713aSLionel Sambuc [(set VR128:$dst, (V4F32Int (loadv4f32 addr:$src)))], 3641f4a2713aSLionel Sambuc itins.rm>, VEX, Sched<[itins.Sched.Folded]>; 3642f4a2713aSLionel Sambuc def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3643f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3644f4a2713aSLionel Sambuc "ps\t{$src, $dst|$dst, $src}"), 3645f4a2713aSLionel Sambuc [(set VR256:$dst, (V8F32Int VR256:$src))], 3646f4a2713aSLionel Sambuc itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; 3647f4a2713aSLionel Sambuc def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), 3648f4a2713aSLionel Sambuc (ins f256mem:$src), 3649f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3650f4a2713aSLionel Sambuc "ps\t{$src, $dst|$dst, $src}"), 3651f4a2713aSLionel Sambuc [(set VR256:$dst, (V8F32Int (loadv8f32 addr:$src)))], 3652f4a2713aSLionel Sambuc itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; 3653f4a2713aSLionel Sambuc} 3654f4a2713aSLionel Sambuc 3655f4a2713aSLionel Sambuc def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3656f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3657f4a2713aSLionel Sambuc [(set VR128:$dst, (V4F32Int VR128:$src))], 3658f4a2713aSLionel Sambuc itins.rr>, Sched<[itins.Sched]>; 3659f4a2713aSLionel Sambuc def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3660f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3661f4a2713aSLionel Sambuc [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], 3662f4a2713aSLionel Sambuc itins.rm>, Sched<[itins.Sched.Folded]>; 3663*0a6a1f1dSLionel Sambuc} // isCodeGenOnly = 1 3664f4a2713aSLionel Sambuc} 3665f4a2713aSLionel Sambuc 3666f4a2713aSLionel Sambuc/// sse2_fp_unop_s - SSE2 unops in scalar form. 3667f4a2713aSLionel Sambucmulticlass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, 3668f4a2713aSLionel Sambuc SDNode OpNode, Intrinsic F64Int, OpndItins itins> { 3669f4a2713aSLionel Sambuclet Predicates = [HasAVX], hasSideEffects = 0 in { 3670f4a2713aSLionel Sambuc def V#NAME#SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), 3671f4a2713aSLionel Sambuc (ins FR64:$src1, FR64:$src2), 3672f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3673f4a2713aSLionel Sambuc "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3674f4a2713aSLionel Sambuc []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; 3675f4a2713aSLionel Sambuc let mayLoad = 1 in { 3676f4a2713aSLionel Sambuc def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), 3677f4a2713aSLionel Sambuc (ins FR64:$src1,f64mem:$src2), 3678f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3679f4a2713aSLionel Sambuc "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3680f4a2713aSLionel Sambuc []>, VEX_4V, VEX_LIG, 3681f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 3682*0a6a1f1dSLionel Sambuc let isCodeGenOnly = 1 in 3683f4a2713aSLionel Sambuc def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), 3684f4a2713aSLionel Sambuc (ins VR128:$src1, sdmem:$src2), 3685f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3686f4a2713aSLionel Sambuc "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3687f4a2713aSLionel Sambuc []>, VEX_4V, VEX_LIG, 3688f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 3689f4a2713aSLionel Sambuc } 3690f4a2713aSLionel Sambuc} 3691f4a2713aSLionel Sambuc 3692f4a2713aSLionel Sambuc def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), 3693f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3694f4a2713aSLionel Sambuc [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>, 3695f4a2713aSLionel Sambuc Sched<[itins.Sched]>; 3696f4a2713aSLionel Sambuc // See the comments in sse1_fp_unop_s for why this is OptForSize. 3697f4a2713aSLionel Sambuc def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), 3698f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3699f4a2713aSLionel Sambuc [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, 3700f4a2713aSLionel Sambuc Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>; 3701*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1 in { 3702f4a2713aSLionel Sambuc def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3703f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3704f4a2713aSLionel Sambuc [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>, 3705f4a2713aSLionel Sambuc Sched<[itins.Sched]>; 3706f4a2713aSLionel Sambuc def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), 3707f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3708f4a2713aSLionel Sambuc [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>, 3709f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded]>; 3710f4a2713aSLionel Sambuc} 3711*0a6a1f1dSLionel Sambuc} 3712f4a2713aSLionel Sambuc 3713f4a2713aSLionel Sambuc/// sse2_fp_unop_p - SSE2 unops in vector forms. 3714f4a2713aSLionel Sambucmulticlass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, 3715f4a2713aSLionel Sambuc SDNode OpNode, OpndItins itins> { 3716f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 3717f4a2713aSLionel Sambuc def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3718f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3719f4a2713aSLionel Sambuc "pd\t{$src, $dst|$dst, $src}"), 3720f4a2713aSLionel Sambuc [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], 3721f4a2713aSLionel Sambuc itins.rr>, VEX, Sched<[itins.Sched]>; 3722f4a2713aSLionel Sambuc def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3723f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3724f4a2713aSLionel Sambuc "pd\t{$src, $dst|$dst, $src}"), 3725f4a2713aSLionel Sambuc [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))], 3726f4a2713aSLionel Sambuc itins.rm>, VEX, Sched<[itins.Sched.Folded]>; 3727f4a2713aSLionel Sambuc def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3728f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3729f4a2713aSLionel Sambuc "pd\t{$src, $dst|$dst, $src}"), 3730f4a2713aSLionel Sambuc [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], 3731f4a2713aSLionel Sambuc itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; 3732f4a2713aSLionel Sambuc def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 3733f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 3734f4a2713aSLionel Sambuc "pd\t{$src, $dst|$dst, $src}"), 3735f4a2713aSLionel Sambuc [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))], 3736f4a2713aSLionel Sambuc itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; 3737f4a2713aSLionel Sambuc} 3738f4a2713aSLionel Sambuc 3739f4a2713aSLionel Sambuc def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3740f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3741f4a2713aSLionel Sambuc [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>, 3742f4a2713aSLionel Sambuc Sched<[itins.Sched]>; 3743f4a2713aSLionel Sambuc def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3744f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3745f4a2713aSLionel Sambuc [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>, 3746f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded]>; 3747f4a2713aSLionel Sambuc} 3748f4a2713aSLionel Sambuc 3749f4a2713aSLionel Sambuc// Square root. 3750*0a6a1f1dSLionel Sambucdefm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>, 3751f4a2713aSLionel Sambuc sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>, 3752f4a2713aSLionel Sambuc sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, 3753f4a2713aSLionel Sambuc SSE_SQRTSD>, 3754f4a2713aSLionel Sambuc sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>; 3755f4a2713aSLionel Sambuc 3756f4a2713aSLionel Sambuc// Reciprocal approximations. Note that these typically require refinement 3757f4a2713aSLionel Sambuc// in order to obtain suitable precision. 3758*0a6a1f1dSLionel Sambucdefm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>, 3759*0a6a1f1dSLionel Sambuc sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>, 3760f4a2713aSLionel Sambuc sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, 3761*0a6a1f1dSLionel Sambuc int_x86_avx_rsqrt_ps_256, SSE_RSQRTPS>; 3762*0a6a1f1dSLionel Sambucdefm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>, 3763f4a2713aSLionel Sambuc sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, 3764f4a2713aSLionel Sambuc sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, 3765f4a2713aSLionel Sambuc int_x86_avx_rcp_ps_256, SSE_RCPP>; 3766f4a2713aSLionel Sambuc 3767f4a2713aSLionel Sambuclet Predicates = [UseAVX] in { 3768f4a2713aSLionel Sambuc def : Pat<(f32 (fsqrt FR32:$src)), 3769f4a2713aSLionel Sambuc (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3770f4a2713aSLionel Sambuc def : Pat<(f32 (fsqrt (load addr:$src))), 3771f4a2713aSLionel Sambuc (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3772f4a2713aSLionel Sambuc Requires<[HasAVX, OptForSize]>; 3773f4a2713aSLionel Sambuc def : Pat<(f64 (fsqrt FR64:$src)), 3774f4a2713aSLionel Sambuc (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>; 3775f4a2713aSLionel Sambuc def : Pat<(f64 (fsqrt (load addr:$src))), 3776f4a2713aSLionel Sambuc (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>, 3777f4a2713aSLionel Sambuc Requires<[HasAVX, OptForSize]>; 3778f4a2713aSLionel Sambuc 3779f4a2713aSLionel Sambuc def : Pat<(f32 (X86frsqrt FR32:$src)), 3780f4a2713aSLionel Sambuc (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3781f4a2713aSLionel Sambuc def : Pat<(f32 (X86frsqrt (load addr:$src))), 3782f4a2713aSLionel Sambuc (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3783f4a2713aSLionel Sambuc Requires<[HasAVX, OptForSize]>; 3784f4a2713aSLionel Sambuc 3785f4a2713aSLionel Sambuc def : Pat<(f32 (X86frcp FR32:$src)), 3786f4a2713aSLionel Sambuc (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3787f4a2713aSLionel Sambuc def : Pat<(f32 (X86frcp (load addr:$src))), 3788f4a2713aSLionel Sambuc (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3789f4a2713aSLionel Sambuc Requires<[HasAVX, OptForSize]>; 3790f4a2713aSLionel Sambuc} 3791f4a2713aSLionel Sambuclet Predicates = [UseAVX] in { 3792f4a2713aSLionel Sambuc def : Pat<(int_x86_sse_sqrt_ss VR128:$src), 3793f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)), 3794f4a2713aSLionel Sambuc (COPY_TO_REGCLASS VR128:$src, FR32)), 3795f4a2713aSLionel Sambuc VR128)>; 3796f4a2713aSLionel Sambuc def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src), 3797f4a2713aSLionel Sambuc (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3798f4a2713aSLionel Sambuc 3799f4a2713aSLionel Sambuc def : Pat<(int_x86_sse2_sqrt_sd VR128:$src), 3800f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)), 3801f4a2713aSLionel Sambuc (COPY_TO_REGCLASS VR128:$src, FR64)), 3802f4a2713aSLionel Sambuc VR128)>; 3803f4a2713aSLionel Sambuc def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src), 3804f4a2713aSLionel Sambuc (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; 3805f4a2713aSLionel Sambuc} 3806f4a2713aSLionel Sambuc 3807f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 3808f4a2713aSLionel Sambuc def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), 3809f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)), 3810f4a2713aSLionel Sambuc (COPY_TO_REGCLASS VR128:$src, FR32)), 3811f4a2713aSLionel Sambuc VR128)>; 3812f4a2713aSLionel Sambuc def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src), 3813f4a2713aSLionel Sambuc (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3814f4a2713aSLionel Sambuc 3815f4a2713aSLionel Sambuc def : Pat<(int_x86_sse_rcp_ss VR128:$src), 3816f4a2713aSLionel Sambuc (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)), 3817f4a2713aSLionel Sambuc (COPY_TO_REGCLASS VR128:$src, FR32)), 3818f4a2713aSLionel Sambuc VR128)>; 3819f4a2713aSLionel Sambuc def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src), 3820f4a2713aSLionel Sambuc (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3821f4a2713aSLionel Sambuc} 3822f4a2713aSLionel Sambuc 3823*0a6a1f1dSLionel Sambuc// These are unary operations, but they are modeled as having 2 source operands 3824*0a6a1f1dSLionel Sambuc// because the high elements of the destination are unchanged in SSE. 3825f4a2713aSLionel Sambuclet Predicates = [UseSSE1] in { 3826f4a2713aSLionel Sambuc def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), 3827f4a2713aSLionel Sambuc (RSQRTSSr_Int VR128:$src, VR128:$src)>; 3828f4a2713aSLionel Sambuc def : Pat<(int_x86_sse_rcp_ss VR128:$src), 3829f4a2713aSLionel Sambuc (RCPSSr_Int VR128:$src, VR128:$src)>; 3830*0a6a1f1dSLionel Sambuc def : Pat<(int_x86_sse_sqrt_ss VR128:$src), 3831*0a6a1f1dSLionel Sambuc (SQRTSSr_Int VR128:$src, VR128:$src)>; 3832f4a2713aSLionel Sambuc} 3833f4a2713aSLionel Sambuc 3834f4a2713aSLionel Sambuc// There is no f64 version of the reciprocal approximation instructions. 3835f4a2713aSLionel Sambuc 3836f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 3837f4a2713aSLionel Sambuc// SSE 1 & 2 - Non-temporal stores 3838f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 3839f4a2713aSLionel Sambuc 3840f4a2713aSLionel Sambuclet AddedComplexity = 400 in { // Prefer non-temporal versions 3841f4a2713aSLionel Sambuclet SchedRW = [WriteStore] in { 3842*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, NoVLX] in { 3843f4a2713aSLionel Sambucdef VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), 3844f4a2713aSLionel Sambuc (ins f128mem:$dst, VR128:$src), 3845f4a2713aSLionel Sambuc "movntps\t{$src, $dst|$dst, $src}", 3846f4a2713aSLionel Sambuc [(alignednontemporalstore (v4f32 VR128:$src), 3847f4a2713aSLionel Sambuc addr:$dst)], 3848f4a2713aSLionel Sambuc IIC_SSE_MOVNT>, VEX; 3849f4a2713aSLionel Sambucdef VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), 3850f4a2713aSLionel Sambuc (ins f128mem:$dst, VR128:$src), 3851f4a2713aSLionel Sambuc "movntpd\t{$src, $dst|$dst, $src}", 3852f4a2713aSLionel Sambuc [(alignednontemporalstore (v2f64 VR128:$src), 3853f4a2713aSLionel Sambuc addr:$dst)], 3854f4a2713aSLionel Sambuc IIC_SSE_MOVNT>, VEX; 3855f4a2713aSLionel Sambuc 3856f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in 3857f4a2713aSLionel Sambucdef VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), 3858f4a2713aSLionel Sambuc (ins f128mem:$dst, VR128:$src), 3859f4a2713aSLionel Sambuc "movntdq\t{$src, $dst|$dst, $src}", 3860f4a2713aSLionel Sambuc [(alignednontemporalstore (v2i64 VR128:$src), 3861f4a2713aSLionel Sambuc addr:$dst)], 3862f4a2713aSLionel Sambuc IIC_SSE_MOVNT>, VEX; 3863f4a2713aSLionel Sambuc 3864f4a2713aSLionel Sambucdef VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), 3865f4a2713aSLionel Sambuc (ins f256mem:$dst, VR256:$src), 3866f4a2713aSLionel Sambuc "movntps\t{$src, $dst|$dst, $src}", 3867f4a2713aSLionel Sambuc [(alignednontemporalstore (v8f32 VR256:$src), 3868f4a2713aSLionel Sambuc addr:$dst)], 3869f4a2713aSLionel Sambuc IIC_SSE_MOVNT>, VEX, VEX_L; 3870f4a2713aSLionel Sambucdef VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), 3871f4a2713aSLionel Sambuc (ins f256mem:$dst, VR256:$src), 3872f4a2713aSLionel Sambuc "movntpd\t{$src, $dst|$dst, $src}", 3873f4a2713aSLionel Sambuc [(alignednontemporalstore (v4f64 VR256:$src), 3874f4a2713aSLionel Sambuc addr:$dst)], 3875f4a2713aSLionel Sambuc IIC_SSE_MOVNT>, VEX, VEX_L; 3876f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in 3877f4a2713aSLionel Sambucdef VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), 3878f4a2713aSLionel Sambuc (ins f256mem:$dst, VR256:$src), 3879f4a2713aSLionel Sambuc "movntdq\t{$src, $dst|$dst, $src}", 3880f4a2713aSLionel Sambuc [(alignednontemporalstore (v4i64 VR256:$src), 3881f4a2713aSLionel Sambuc addr:$dst)], 3882f4a2713aSLionel Sambuc IIC_SSE_MOVNT>, VEX, VEX_L; 3883*0a6a1f1dSLionel Sambuc} 3884f4a2713aSLionel Sambuc 3885f4a2713aSLionel Sambucdef MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3886f4a2713aSLionel Sambuc "movntps\t{$src, $dst|$dst, $src}", 3887f4a2713aSLionel Sambuc [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], 3888f4a2713aSLionel Sambuc IIC_SSE_MOVNT>; 3889f4a2713aSLionel Sambucdef MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3890f4a2713aSLionel Sambuc "movntpd\t{$src, $dst|$dst, $src}", 3891f4a2713aSLionel Sambuc [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)], 3892f4a2713aSLionel Sambuc IIC_SSE_MOVNT>; 3893f4a2713aSLionel Sambuc 3894f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in 3895f4a2713aSLionel Sambucdef MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3896f4a2713aSLionel Sambuc "movntdq\t{$src, $dst|$dst, $src}", 3897f4a2713aSLionel Sambuc [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], 3898f4a2713aSLionel Sambuc IIC_SSE_MOVNT>; 3899f4a2713aSLionel Sambuc 3900f4a2713aSLionel Sambuc// There is no AVX form for instructions below this point 3901f4a2713aSLionel Sambucdef MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), 3902f4a2713aSLionel Sambuc "movnti{l}\t{$src, $dst|$dst, $src}", 3903f4a2713aSLionel Sambuc [(nontemporalstore (i32 GR32:$src), addr:$dst)], 3904f4a2713aSLionel Sambuc IIC_SSE_MOVNT>, 3905*0a6a1f1dSLionel Sambuc PS, Requires<[HasSSE2]>; 3906f4a2713aSLionel Sambucdef MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), 3907f4a2713aSLionel Sambuc "movnti{q}\t{$src, $dst|$dst, $src}", 3908f4a2713aSLionel Sambuc [(nontemporalstore (i64 GR64:$src), addr:$dst)], 3909f4a2713aSLionel Sambuc IIC_SSE_MOVNT>, 3910*0a6a1f1dSLionel Sambuc PS, Requires<[HasSSE2]>; 3911f4a2713aSLionel Sambuc} // SchedRW = [WriteStore] 3912f4a2713aSLionel Sambuc 3913*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, NoVLX] in { 3914*0a6a1f1dSLionel Sambuc def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 3915*0a6a1f1dSLionel Sambuc (VMOVNTPSmr addr:$dst, VR128:$src)>; 3916*0a6a1f1dSLionel Sambuc} 3917f4a2713aSLionel Sambuc 3918*0a6a1f1dSLionel Sambucdef : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 3919*0a6a1f1dSLionel Sambuc (MOVNTPSmr addr:$dst, VR128:$src)>; 3920*0a6a1f1dSLionel Sambuc 3921f4a2713aSLionel Sambuc} // AddedComplexity 3922f4a2713aSLionel Sambuc 3923f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 3924f4a2713aSLionel Sambuc// SSE 1 & 2 - Prefetch and memory fence 3925f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 3926f4a2713aSLionel Sambuc 3927f4a2713aSLionel Sambuc// Prefetch intrinsic. 3928f4a2713aSLionel Sambuclet Predicates = [HasSSE1], SchedRW = [WriteLoad] in { 3929f4a2713aSLionel Sambucdef PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), 3930f4a2713aSLionel Sambuc "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))], 3931f4a2713aSLionel Sambuc IIC_SSE_PREFETCH>, TB; 3932f4a2713aSLionel Sambucdef PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), 3933f4a2713aSLionel Sambuc "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))], 3934f4a2713aSLionel Sambuc IIC_SSE_PREFETCH>, TB; 3935f4a2713aSLionel Sambucdef PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src), 3936f4a2713aSLionel Sambuc "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))], 3937f4a2713aSLionel Sambuc IIC_SSE_PREFETCH>, TB; 3938f4a2713aSLionel Sambucdef PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), 3939f4a2713aSLionel Sambuc "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))], 3940f4a2713aSLionel Sambuc IIC_SSE_PREFETCH>, TB; 3941f4a2713aSLionel Sambuc} 3942f4a2713aSLionel Sambuc 3943*0a6a1f1dSLionel Sambuc// FIXME: How should flush instruction be modeled? 3944f4a2713aSLionel Sambuclet SchedRW = [WriteLoad] in { 3945f4a2713aSLionel Sambuc// Flush cache 3946f4a2713aSLionel Sambucdef CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), 3947f4a2713aSLionel Sambuc "clflush\t$src", [(int_x86_sse2_clflush addr:$src)], 3948f4a2713aSLionel Sambuc IIC_SSE_PREFETCH>, TB, Requires<[HasSSE2]>; 3949*0a6a1f1dSLionel Sambuc} 3950f4a2713aSLionel Sambuc 3951*0a6a1f1dSLionel Sambuclet SchedRW = [WriteNop] in { 3952f4a2713aSLionel Sambuc// Pause. This "instruction" is encoded as "rep; nop", so even though it 3953f4a2713aSLionel Sambuc// was introduced with SSE2, it's backward compatible. 3954*0a6a1f1dSLionel Sambucdef PAUSE : I<0x90, RawFrm, (outs), (ins), 3955*0a6a1f1dSLionel Sambuc "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>, 3956*0a6a1f1dSLionel Sambuc OBXS, Requires<[HasSSE2]>; 3957*0a6a1f1dSLionel Sambuc} 3958f4a2713aSLionel Sambuc 3959*0a6a1f1dSLionel Sambuclet SchedRW = [WriteFence] in { 3960f4a2713aSLionel Sambuc// Load, store, and memory fence 3961f4a2713aSLionel Sambucdef SFENCE : I<0xAE, MRM_F8, (outs), (ins), 3962f4a2713aSLionel Sambuc "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>, 3963f4a2713aSLionel Sambuc TB, Requires<[HasSSE1]>; 3964f4a2713aSLionel Sambucdef LFENCE : I<0xAE, MRM_E8, (outs), (ins), 3965f4a2713aSLionel Sambuc "lfence", [(int_x86_sse2_lfence)], IIC_SSE_LFENCE>, 3966f4a2713aSLionel Sambuc TB, Requires<[HasSSE2]>; 3967f4a2713aSLionel Sambucdef MFENCE : I<0xAE, MRM_F0, (outs), (ins), 3968f4a2713aSLionel Sambuc "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, 3969f4a2713aSLionel Sambuc TB, Requires<[HasSSE2]>; 3970f4a2713aSLionel Sambuc} // SchedRW 3971f4a2713aSLionel Sambuc 3972f4a2713aSLionel Sambucdef : Pat<(X86SFence), (SFENCE)>; 3973f4a2713aSLionel Sambucdef : Pat<(X86LFence), (LFENCE)>; 3974f4a2713aSLionel Sambucdef : Pat<(X86MFence), (MFENCE)>; 3975f4a2713aSLionel Sambuc 3976f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 3977f4a2713aSLionel Sambuc// SSE 1 & 2 - Load/Store XCSR register 3978f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 3979f4a2713aSLionel Sambuc 3980f4a2713aSLionel Sambucdef VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 3981f4a2713aSLionel Sambuc "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], 3982f4a2713aSLionel Sambuc IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>; 3983f4a2713aSLionel Sambucdef VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3984f4a2713aSLionel Sambuc "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], 3985f4a2713aSLionel Sambuc IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>; 3986f4a2713aSLionel Sambuc 3987*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE1] in { 3988*0a6a1f1dSLionel Sambucdef LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src), 3989f4a2713aSLionel Sambuc "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], 3990*0a6a1f1dSLionel Sambuc IIC_SSE_LDMXCSR>, TB, Sched<[WriteLoad]>; 3991*0a6a1f1dSLionel Sambucdef STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3992f4a2713aSLionel Sambuc "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], 3993*0a6a1f1dSLionel Sambuc IIC_SSE_STMXCSR>, TB, Sched<[WriteStore]>; 3994*0a6a1f1dSLionel Sambuc} 3995f4a2713aSLionel Sambuc 3996f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 3997f4a2713aSLionel Sambuc// SSE2 - Move Aligned/Unaligned Packed Integer Instructions 3998f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 3999f4a2713aSLionel Sambuc 4000f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in { // SSE integer instructions 4001f4a2713aSLionel Sambuc 4002*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, SchedRW = [WriteMove] in { 4003f4a2713aSLionel Sambucdef VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4004f4a2713aSLionel Sambuc "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, 4005f4a2713aSLionel Sambuc VEX; 4006f4a2713aSLionel Sambucdef VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 4007f4a2713aSLionel Sambuc "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, 4008f4a2713aSLionel Sambuc VEX, VEX_L; 4009f4a2713aSLionel Sambucdef VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4010f4a2713aSLionel Sambuc "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, 4011f4a2713aSLionel Sambuc VEX; 4012f4a2713aSLionel Sambucdef VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 4013f4a2713aSLionel Sambuc "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, 4014f4a2713aSLionel Sambuc VEX, VEX_L; 4015f4a2713aSLionel Sambuc} 4016f4a2713aSLionel Sambuc 4017f4a2713aSLionel Sambuc// For Disassembler 4018*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 4019*0a6a1f1dSLionel Sambuc SchedRW = [WriteMove] in { 4020f4a2713aSLionel Sambucdef VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 4021f4a2713aSLionel Sambuc "movdqa\t{$src, $dst|$dst, $src}", [], 4022f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RR>, 4023f4a2713aSLionel Sambuc VEX; 4024f4a2713aSLionel Sambucdef VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 4025f4a2713aSLionel Sambuc "movdqa\t{$src, $dst|$dst, $src}", [], 4026f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 4027f4a2713aSLionel Sambucdef VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 4028f4a2713aSLionel Sambuc "movdqu\t{$src, $dst|$dst, $src}", [], 4029f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_RR>, 4030f4a2713aSLionel Sambuc VEX; 4031f4a2713aSLionel Sambucdef VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 4032f4a2713aSLionel Sambuc "movdqu\t{$src, $dst|$dst, $src}", [], 4033f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 4034f4a2713aSLionel Sambuc} 4035f4a2713aSLionel Sambuc 4036f4a2713aSLionel Sambuclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 4037*0a6a1f1dSLionel Sambuc hasSideEffects = 0, SchedRW = [WriteLoad] in { 4038f4a2713aSLionel Sambucdef VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4039f4a2713aSLionel Sambuc "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, 4040f4a2713aSLionel Sambuc VEX; 4041f4a2713aSLionel Sambucdef VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 4042f4a2713aSLionel Sambuc "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, 4043f4a2713aSLionel Sambuc VEX, VEX_L; 4044f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 4045f4a2713aSLionel Sambuc def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4046f4a2713aSLionel Sambuc "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, 4047f4a2713aSLionel Sambuc XS, VEX; 4048f4a2713aSLionel Sambuc def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 4049f4a2713aSLionel Sambuc "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, 4050f4a2713aSLionel Sambuc XS, VEX, VEX_L; 4051f4a2713aSLionel Sambuc} 4052f4a2713aSLionel Sambuc} 4053f4a2713aSLionel Sambuc 4054*0a6a1f1dSLionel Sambuclet mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in { 4055f4a2713aSLionel Sambucdef VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), 4056f4a2713aSLionel Sambuc (ins i128mem:$dst, VR128:$src), 4057f4a2713aSLionel Sambuc "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, 4058f4a2713aSLionel Sambuc VEX; 4059f4a2713aSLionel Sambucdef VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), 4060f4a2713aSLionel Sambuc (ins i256mem:$dst, VR256:$src), 4061f4a2713aSLionel Sambuc "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, 4062f4a2713aSLionel Sambuc VEX, VEX_L; 4063f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 4064f4a2713aSLionel Sambucdef VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 4065f4a2713aSLionel Sambuc "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, 4066f4a2713aSLionel Sambuc XS, VEX; 4067f4a2713aSLionel Sambucdef VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), 4068f4a2713aSLionel Sambuc "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, 4069f4a2713aSLionel Sambuc XS, VEX, VEX_L; 4070f4a2713aSLionel Sambuc} 4071f4a2713aSLionel Sambuc} 4072f4a2713aSLionel Sambuc 4073f4a2713aSLionel Sambuclet SchedRW = [WriteMove] in { 4074*0a6a1f1dSLionel Sambuclet hasSideEffects = 0 in 4075f4a2713aSLionel Sambucdef MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4076f4a2713aSLionel Sambuc "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; 4077f4a2713aSLionel Sambuc 4078f4a2713aSLionel Sambucdef MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4079f4a2713aSLionel Sambuc "movdqu\t{$src, $dst|$dst, $src}", 4080f4a2713aSLionel Sambuc [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; 4081f4a2713aSLionel Sambuc 4082f4a2713aSLionel Sambuc// For Disassembler 4083*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 4084f4a2713aSLionel Sambucdef MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 4085f4a2713aSLionel Sambuc "movdqa\t{$src, $dst|$dst, $src}", [], 4086f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RR>; 4087f4a2713aSLionel Sambuc 4088f4a2713aSLionel Sambucdef MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 4089f4a2713aSLionel Sambuc "movdqu\t{$src, $dst|$dst, $src}", 4090f4a2713aSLionel Sambuc [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; 4091f4a2713aSLionel Sambuc} 4092f4a2713aSLionel Sambuc} // SchedRW 4093f4a2713aSLionel Sambuc 4094f4a2713aSLionel Sambuclet canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 4095*0a6a1f1dSLionel Sambuc hasSideEffects = 0, SchedRW = [WriteLoad] in { 4096f4a2713aSLionel Sambucdef MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4097f4a2713aSLionel Sambuc "movdqa\t{$src, $dst|$dst, $src}", 4098f4a2713aSLionel Sambuc [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/], 4099f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_RM>; 4100f4a2713aSLionel Sambucdef MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4101f4a2713aSLionel Sambuc "movdqu\t{$src, $dst|$dst, $src}", 4102f4a2713aSLionel Sambuc [/*(set VR128:$dst, (loadv2i64 addr:$src))*/], 4103f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_RM>, 4104f4a2713aSLionel Sambuc XS, Requires<[UseSSE2]>; 4105f4a2713aSLionel Sambuc} 4106f4a2713aSLionel Sambuc 4107*0a6a1f1dSLionel Sambuclet mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in { 4108f4a2713aSLionel Sambucdef MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 4109f4a2713aSLionel Sambuc "movdqa\t{$src, $dst|$dst, $src}", 4110f4a2713aSLionel Sambuc [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], 4111f4a2713aSLionel Sambuc IIC_SSE_MOVA_P_MR>; 4112f4a2713aSLionel Sambucdef MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 4113f4a2713aSLionel Sambuc "movdqu\t{$src, $dst|$dst, $src}", 4114f4a2713aSLionel Sambuc [/*(store (v2i64 VR128:$src), addr:$dst)*/], 4115f4a2713aSLionel Sambuc IIC_SSE_MOVU_P_MR>, 4116f4a2713aSLionel Sambuc XS, Requires<[UseSSE2]>; 4117f4a2713aSLionel Sambuc} 4118f4a2713aSLionel Sambuc 4119f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt 4120f4a2713aSLionel Sambuc 4121f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 4122f4a2713aSLionel Sambuc def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), 4123f4a2713aSLionel Sambuc (VMOVDQUmr addr:$dst, VR128:$src)>; 4124f4a2713aSLionel Sambuc def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src), 4125f4a2713aSLionel Sambuc (VMOVDQUYmr addr:$dst, VR256:$src)>; 4126f4a2713aSLionel Sambuc} 4127f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in 4128f4a2713aSLionel Sambucdef : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), 4129f4a2713aSLionel Sambuc (MOVDQUmr addr:$dst, VR128:$src)>; 4130f4a2713aSLionel Sambuc 4131f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4132f4a2713aSLionel Sambuc// SSE2 - Packed Integer Arithmetic Instructions 4133f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4134f4a2713aSLionel Sambuc 4135f4a2713aSLionel Sambuclet Sched = WriteVecIMul in 4136f4a2713aSLionel Sambucdef SSE_PMADD : OpndItins< 4137f4a2713aSLionel Sambuc IIC_SSE_PMADD, IIC_SSE_PMADD 4138f4a2713aSLionel Sambuc>; 4139f4a2713aSLionel Sambuc 4140f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in { // SSE integer instructions 4141f4a2713aSLionel Sambuc 4142f4a2713aSLionel Sambucmulticlass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, 4143f4a2713aSLionel Sambuc RegisterClass RC, PatFrag memop_frag, 4144f4a2713aSLionel Sambuc X86MemOperand x86memop, 4145f4a2713aSLionel Sambuc OpndItins itins, 4146f4a2713aSLionel Sambuc bit IsCommutable = 0, 4147f4a2713aSLionel Sambuc bit Is2Addr = 1> { 4148f4a2713aSLionel Sambuc let isCommutable = IsCommutable in 4149f4a2713aSLionel Sambuc def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 4150f4a2713aSLionel Sambuc (ins RC:$src1, RC:$src2), 4151f4a2713aSLionel Sambuc !if(Is2Addr, 4152f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4153f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4154f4a2713aSLionel Sambuc [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>, 4155f4a2713aSLionel Sambuc Sched<[itins.Sched]>; 4156f4a2713aSLionel Sambuc def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 4157f4a2713aSLionel Sambuc (ins RC:$src1, x86memop:$src2), 4158f4a2713aSLionel Sambuc !if(Is2Addr, 4159f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4160f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4161f4a2713aSLionel Sambuc [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))], 4162f4a2713aSLionel Sambuc itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 4163f4a2713aSLionel Sambuc} 4164f4a2713aSLionel Sambuc 4165f4a2713aSLionel Sambucmulticlass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128, 4166f4a2713aSLionel Sambuc Intrinsic IntId256, OpndItins itins, 4167f4a2713aSLionel Sambuc bit IsCommutable = 0> { 4168f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 4169f4a2713aSLionel Sambuc defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128, 4170f4a2713aSLionel Sambuc VR128, loadv2i64, i128mem, itins, 4171f4a2713aSLionel Sambuc IsCommutable, 0>, VEX_4V; 4172f4a2713aSLionel Sambuc 4173f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in 4174f4a2713aSLionel Sambuc defm NAME : PDI_binop_rm_int<opc, OpcodeStr, IntId128, VR128, memopv2i64, 4175f4a2713aSLionel Sambuc i128mem, itins, IsCommutable, 1>; 4176f4a2713aSLionel Sambuc 4177f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in 4178f4a2713aSLionel Sambuc defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256, 4179f4a2713aSLionel Sambuc VR256, loadv4i64, i256mem, itins, 4180f4a2713aSLionel Sambuc IsCommutable, 0>, VEX_4V, VEX_L; 4181f4a2713aSLionel Sambuc} 4182f4a2713aSLionel Sambuc 4183f4a2713aSLionel Sambucmulticlass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, 4184f4a2713aSLionel Sambuc string OpcodeStr, SDNode OpNode, 4185f4a2713aSLionel Sambuc SDNode OpNode2, RegisterClass RC, 4186f4a2713aSLionel Sambuc ValueType DstVT, ValueType SrcVT, PatFrag bc_frag, 4187f4a2713aSLionel Sambuc ShiftOpndItins itins, 4188f4a2713aSLionel Sambuc bit Is2Addr = 1> { 4189f4a2713aSLionel Sambuc // src2 is always 128-bit 4190f4a2713aSLionel Sambuc def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 4191f4a2713aSLionel Sambuc (ins RC:$src1, VR128:$src2), 4192f4a2713aSLionel Sambuc !if(Is2Addr, 4193f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4194f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4195f4a2713aSLionel Sambuc [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))], 4196f4a2713aSLionel Sambuc itins.rr>, Sched<[WriteVecShift]>; 4197f4a2713aSLionel Sambuc def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 4198f4a2713aSLionel Sambuc (ins RC:$src1, i128mem:$src2), 4199f4a2713aSLionel Sambuc !if(Is2Addr, 4200f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4201f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4202f4a2713aSLionel Sambuc [(set RC:$dst, (DstVT (OpNode RC:$src1, 4203f4a2713aSLionel Sambuc (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>, 4204f4a2713aSLionel Sambuc Sched<[WriteVecShiftLd, ReadAfterLd]>; 4205f4a2713aSLionel Sambuc def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), 4206f4a2713aSLionel Sambuc (ins RC:$src1, i8imm:$src2), 4207f4a2713aSLionel Sambuc !if(Is2Addr, 4208f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4209f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4210f4a2713aSLionel Sambuc [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))], itins.ri>, 4211f4a2713aSLionel Sambuc Sched<[WriteVecShift]>; 4212f4a2713aSLionel Sambuc} 4213f4a2713aSLionel Sambuc 4214f4a2713aSLionel Sambuc/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types 4215f4a2713aSLionel Sambucmulticlass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, 4216f4a2713aSLionel Sambuc ValueType DstVT, ValueType SrcVT, RegisterClass RC, 4217f4a2713aSLionel Sambuc PatFrag memop_frag, X86MemOperand x86memop, 4218f4a2713aSLionel Sambuc OpndItins itins, 4219f4a2713aSLionel Sambuc bit IsCommutable = 0, bit Is2Addr = 1> { 4220f4a2713aSLionel Sambuc let isCommutable = IsCommutable in 4221f4a2713aSLionel Sambuc def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 4222f4a2713aSLionel Sambuc (ins RC:$src1, RC:$src2), 4223f4a2713aSLionel Sambuc !if(Is2Addr, 4224f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4225f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4226f4a2713aSLionel Sambuc [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, 4227f4a2713aSLionel Sambuc Sched<[itins.Sched]>; 4228f4a2713aSLionel Sambuc def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 4229f4a2713aSLionel Sambuc (ins RC:$src1, x86memop:$src2), 4230f4a2713aSLionel Sambuc !if(Is2Addr, 4231f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4232f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4233f4a2713aSLionel Sambuc [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), 4234f4a2713aSLionel Sambuc (bitconvert (memop_frag addr:$src2)))))]>, 4235f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 4236f4a2713aSLionel Sambuc} 4237f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt 4238f4a2713aSLionel Sambuc 4239f4a2713aSLionel Sambucdefm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, 4240f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 1>; 4241f4a2713aSLionel Sambucdefm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, 4242f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 1>; 4243f4a2713aSLionel Sambucdefm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, 4244f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 1>; 4245f4a2713aSLionel Sambucdefm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, 4246f4a2713aSLionel Sambuc SSE_INTALUQ_ITINS_P, 1>; 4247f4a2713aSLionel Sambucdefm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, 4248f4a2713aSLionel Sambuc SSE_INTMUL_ITINS_P, 1>; 4249*0a6a1f1dSLionel Sambucdefm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16, 4250*0a6a1f1dSLionel Sambuc SSE_INTMUL_ITINS_P, 1>; 4251*0a6a1f1dSLionel Sambucdefm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16, 4252*0a6a1f1dSLionel Sambuc SSE_INTMUL_ITINS_P, 1>; 4253f4a2713aSLionel Sambucdefm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, 4254f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 0>; 4255f4a2713aSLionel Sambucdefm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, 4256f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 0>; 4257f4a2713aSLionel Sambucdefm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, 4258f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 0>; 4259f4a2713aSLionel Sambucdefm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, 4260f4a2713aSLionel Sambuc SSE_INTALUQ_ITINS_P, 0>; 4261f4a2713aSLionel Sambucdefm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8, 4262f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 0>; 4263f4a2713aSLionel Sambucdefm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16, 4264f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 0>; 4265f4a2713aSLionel Sambucdefm PMINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8, 4266f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 1>; 4267f4a2713aSLionel Sambucdefm PMINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16, 4268f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 1>; 4269f4a2713aSLionel Sambucdefm PMAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8, 4270f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 1>; 4271f4a2713aSLionel Sambucdefm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16, 4272f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 1>; 4273f4a2713aSLionel Sambuc 4274f4a2713aSLionel Sambuc// Intrinsic forms 4275f4a2713aSLionel Sambucdefm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b, 4276f4a2713aSLionel Sambuc int_x86_avx2_psubs_b, SSE_INTALU_ITINS_P, 0>; 4277f4a2713aSLionel Sambucdefm PSUBSW : PDI_binop_all_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, 4278f4a2713aSLionel Sambuc int_x86_avx2_psubs_w, SSE_INTALU_ITINS_P, 0>; 4279f4a2713aSLionel Sambucdefm PADDSB : PDI_binop_all_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 4280f4a2713aSLionel Sambuc int_x86_avx2_padds_b, SSE_INTALU_ITINS_P, 1>; 4281f4a2713aSLionel Sambucdefm PADDSW : PDI_binop_all_int<0xED, "paddsw" , int_x86_sse2_padds_w, 4282f4a2713aSLionel Sambuc int_x86_avx2_padds_w, SSE_INTALU_ITINS_P, 1>; 4283f4a2713aSLionel Sambucdefm PADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 4284f4a2713aSLionel Sambuc int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>; 4285f4a2713aSLionel Sambucdefm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 4286f4a2713aSLionel Sambuc int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>; 4287f4a2713aSLionel Sambucdefm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 4288f4a2713aSLionel Sambuc int_x86_avx2_pmadd_wd, SSE_PMADD, 1>; 4289f4a2713aSLionel Sambucdefm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 4290f4a2713aSLionel Sambuc int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>; 4291f4a2713aSLionel Sambucdefm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 4292f4a2713aSLionel Sambuc int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>; 4293f4a2713aSLionel Sambucdefm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 4294f4a2713aSLionel Sambuc int_x86_avx2_psad_bw, SSE_PMADD, 1>; 4295f4a2713aSLionel Sambuc 4296f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 4297f4a2713aSLionel Sambucdefm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, 4298f4a2713aSLionel Sambuc loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, 4299f4a2713aSLionel Sambuc VEX_4V; 4300f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in 4301f4a2713aSLionel Sambucdefm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32, 4302f4a2713aSLionel Sambuc VR256, loadv4i64, i256mem, 4303f4a2713aSLionel Sambuc SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; 4304f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in 4305f4a2713aSLionel Sambucdefm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, 4306f4a2713aSLionel Sambuc memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>; 4307f4a2713aSLionel Sambuc 4308f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4309f4a2713aSLionel Sambuc// SSE2 - Packed Integer Logical Instructions 4310f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4311f4a2713aSLionel Sambuc 4312f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 4313f4a2713aSLionel Sambucdefm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, 4314f4a2713aSLionel Sambuc VR128, v8i16, v8i16, bc_v8i16, 4315f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4316f4a2713aSLionel Sambucdefm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, 4317f4a2713aSLionel Sambuc VR128, v4i32, v4i32, bc_v4i32, 4318f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4319f4a2713aSLionel Sambucdefm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, 4320f4a2713aSLionel Sambuc VR128, v2i64, v2i64, bc_v2i64, 4321f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4322f4a2713aSLionel Sambuc 4323f4a2713aSLionel Sambucdefm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, 4324f4a2713aSLionel Sambuc VR128, v8i16, v8i16, bc_v8i16, 4325f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4326f4a2713aSLionel Sambucdefm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, 4327f4a2713aSLionel Sambuc VR128, v4i32, v4i32, bc_v4i32, 4328f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4329f4a2713aSLionel Sambucdefm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, 4330f4a2713aSLionel Sambuc VR128, v2i64, v2i64, bc_v2i64, 4331f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4332f4a2713aSLionel Sambuc 4333f4a2713aSLionel Sambucdefm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, 4334f4a2713aSLionel Sambuc VR128, v8i16, v8i16, bc_v8i16, 4335f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4336f4a2713aSLionel Sambucdefm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, 4337f4a2713aSLionel Sambuc VR128, v4i32, v4i32, bc_v4i32, 4338f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4339f4a2713aSLionel Sambuc 4340f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { 4341f4a2713aSLionel Sambuc // 128-bit logical shifts. 4342f4a2713aSLionel Sambuc def VPSLLDQri : PDIi8<0x73, MRM7r, 4343f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 4344f4a2713aSLionel Sambuc "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4345f4a2713aSLionel Sambuc [(set VR128:$dst, 4346f4a2713aSLionel Sambuc (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>, 4347f4a2713aSLionel Sambuc VEX_4V; 4348f4a2713aSLionel Sambuc def VPSRLDQri : PDIi8<0x73, MRM3r, 4349f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 4350f4a2713aSLionel Sambuc "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4351f4a2713aSLionel Sambuc [(set VR128:$dst, 4352f4a2713aSLionel Sambuc (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>, 4353f4a2713aSLionel Sambuc VEX_4V; 4354f4a2713aSLionel Sambuc // PSRADQri doesn't exist in SSE[1-3]. 4355f4a2713aSLionel Sambuc} 4356f4a2713aSLionel Sambuc} // Predicates = [HasAVX] 4357f4a2713aSLionel Sambuc 4358f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 4359f4a2713aSLionel Sambucdefm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, 4360f4a2713aSLionel Sambuc VR256, v16i16, v8i16, bc_v8i16, 4361f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4362f4a2713aSLionel Sambucdefm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, 4363f4a2713aSLionel Sambuc VR256, v8i32, v4i32, bc_v4i32, 4364f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4365f4a2713aSLionel Sambucdefm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, 4366f4a2713aSLionel Sambuc VR256, v4i64, v2i64, bc_v2i64, 4367f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4368f4a2713aSLionel Sambuc 4369f4a2713aSLionel Sambucdefm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, 4370f4a2713aSLionel Sambuc VR256, v16i16, v8i16, bc_v8i16, 4371f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4372f4a2713aSLionel Sambucdefm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, 4373f4a2713aSLionel Sambuc VR256, v8i32, v4i32, bc_v4i32, 4374f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4375f4a2713aSLionel Sambucdefm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, 4376f4a2713aSLionel Sambuc VR256, v4i64, v2i64, bc_v2i64, 4377f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4378f4a2713aSLionel Sambuc 4379f4a2713aSLionel Sambucdefm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, 4380f4a2713aSLionel Sambuc VR256, v16i16, v8i16, bc_v8i16, 4381f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4382f4a2713aSLionel Sambucdefm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, 4383f4a2713aSLionel Sambuc VR256, v8i32, v4i32, bc_v4i32, 4384f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4385f4a2713aSLionel Sambuc 4386f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { 4387f4a2713aSLionel Sambuc // 256-bit logical shifts. 4388f4a2713aSLionel Sambuc def VPSLLDQYri : PDIi8<0x73, MRM7r, 4389f4a2713aSLionel Sambuc (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), 4390f4a2713aSLionel Sambuc "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4391f4a2713aSLionel Sambuc [(set VR256:$dst, 4392f4a2713aSLionel Sambuc (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>, 4393f4a2713aSLionel Sambuc VEX_4V, VEX_L; 4394f4a2713aSLionel Sambuc def VPSRLDQYri : PDIi8<0x73, MRM3r, 4395f4a2713aSLionel Sambuc (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), 4396f4a2713aSLionel Sambuc "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4397f4a2713aSLionel Sambuc [(set VR256:$dst, 4398f4a2713aSLionel Sambuc (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>, 4399f4a2713aSLionel Sambuc VEX_4V, VEX_L; 4400f4a2713aSLionel Sambuc // PSRADQYri doesn't exist in SSE[1-3]. 4401f4a2713aSLionel Sambuc} 4402f4a2713aSLionel Sambuc} // Predicates = [HasAVX2] 4403f4a2713aSLionel Sambuc 4404f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { 4405f4a2713aSLionel Sambucdefm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, 4406f4a2713aSLionel Sambuc VR128, v8i16, v8i16, bc_v8i16, 4407f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P>; 4408f4a2713aSLionel Sambucdefm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, 4409f4a2713aSLionel Sambuc VR128, v4i32, v4i32, bc_v4i32, 4410f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P>; 4411f4a2713aSLionel Sambucdefm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, 4412f4a2713aSLionel Sambuc VR128, v2i64, v2i64, bc_v2i64, 4413f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P>; 4414f4a2713aSLionel Sambuc 4415f4a2713aSLionel Sambucdefm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, 4416f4a2713aSLionel Sambuc VR128, v8i16, v8i16, bc_v8i16, 4417f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P>; 4418f4a2713aSLionel Sambucdefm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, 4419f4a2713aSLionel Sambuc VR128, v4i32, v4i32, bc_v4i32, 4420f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P>; 4421f4a2713aSLionel Sambucdefm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, 4422f4a2713aSLionel Sambuc VR128, v2i64, v2i64, bc_v2i64, 4423f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P>; 4424f4a2713aSLionel Sambuc 4425f4a2713aSLionel Sambucdefm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, 4426f4a2713aSLionel Sambuc VR128, v8i16, v8i16, bc_v8i16, 4427f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P>; 4428f4a2713aSLionel Sambucdefm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, 4429f4a2713aSLionel Sambuc VR128, v4i32, v4i32, bc_v4i32, 4430f4a2713aSLionel Sambuc SSE_INTSHIFT_ITINS_P>; 4431f4a2713aSLionel Sambuc 4432f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { 4433f4a2713aSLionel Sambuc // 128-bit logical shifts. 4434f4a2713aSLionel Sambuc def PSLLDQri : PDIi8<0x73, MRM7r, 4435f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 4436f4a2713aSLionel Sambuc "pslldq\t{$src2, $dst|$dst, $src2}", 4437f4a2713aSLionel Sambuc [(set VR128:$dst, 4438f4a2713aSLionel Sambuc (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))], 4439f4a2713aSLionel Sambuc IIC_SSE_INTSHDQ_P_RI>; 4440f4a2713aSLionel Sambuc def PSRLDQri : PDIi8<0x73, MRM3r, 4441f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 4442f4a2713aSLionel Sambuc "psrldq\t{$src2, $dst|$dst, $src2}", 4443f4a2713aSLionel Sambuc [(set VR128:$dst, 4444f4a2713aSLionel Sambuc (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))], 4445f4a2713aSLionel Sambuc IIC_SSE_INTSHDQ_P_RI>; 4446f4a2713aSLionel Sambuc // PSRADQri doesn't exist in SSE[1-3]. 4447f4a2713aSLionel Sambuc} 4448f4a2713aSLionel Sambuc} // Constraints = "$src1 = $dst" 4449f4a2713aSLionel Sambuc 4450f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 4451f4a2713aSLionel Sambuc def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), 4452f4a2713aSLionel Sambuc (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4453f4a2713aSLionel Sambuc def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), 4454f4a2713aSLionel Sambuc (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4455f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), 4456f4a2713aSLionel Sambuc (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4457f4a2713aSLionel Sambuc 4458f4a2713aSLionel Sambuc // Shift up / down and insert zero's. 4459f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))), 4460f4a2713aSLionel Sambuc (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4461f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))), 4462f4a2713aSLionel Sambuc (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4463f4a2713aSLionel Sambuc} 4464f4a2713aSLionel Sambuc 4465f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 4466f4a2713aSLionel Sambuc def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2), 4467f4a2713aSLionel Sambuc (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; 4468f4a2713aSLionel Sambuc def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2), 4469f4a2713aSLionel Sambuc (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; 4470f4a2713aSLionel Sambuc} 4471f4a2713aSLionel Sambuc 4472f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in { 4473f4a2713aSLionel Sambuc def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), 4474f4a2713aSLionel Sambuc (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4475f4a2713aSLionel Sambuc def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), 4476f4a2713aSLionel Sambuc (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4477f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), 4478f4a2713aSLionel Sambuc (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4479f4a2713aSLionel Sambuc 4480f4a2713aSLionel Sambuc // Shift up / down and insert zero's. 4481f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))), 4482f4a2713aSLionel Sambuc (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4483f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))), 4484f4a2713aSLionel Sambuc (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4485f4a2713aSLionel Sambuc} 4486f4a2713aSLionel Sambuc 4487f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4488f4a2713aSLionel Sambuc// SSE2 - Packed Integer Comparison Instructions 4489f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4490f4a2713aSLionel Sambuc 4491f4a2713aSLionel Sambucdefm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, 4492f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 1>; 4493f4a2713aSLionel Sambucdefm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, 4494f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 1>; 4495f4a2713aSLionel Sambucdefm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, 4496f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 1>; 4497f4a2713aSLionel Sambucdefm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, 4498f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 0>; 4499f4a2713aSLionel Sambucdefm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, 4500f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 0>; 4501f4a2713aSLionel Sambucdefm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, 4502f4a2713aSLionel Sambuc SSE_INTALU_ITINS_P, 0>; 4503f4a2713aSLionel Sambuc 4504f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4505f4a2713aSLionel Sambuc// SSE2 - Packed Integer Shuffle Instructions 4506f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4507f4a2713aSLionel Sambuc 4508f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in { 4509f4a2713aSLionel Sambucmulticlass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256, 4510f4a2713aSLionel Sambuc SDNode OpNode> { 4511f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 4512f4a2713aSLionel Sambuc def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), 4513f4a2713aSLionel Sambuc (ins VR128:$src1, i8imm:$src2), 4514f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 4515f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4516f4a2713aSLionel Sambuc [(set VR128:$dst, 4517f4a2713aSLionel Sambuc (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], 4518f4a2713aSLionel Sambuc IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>; 4519f4a2713aSLionel Sambuc def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), 4520f4a2713aSLionel Sambuc (ins i128mem:$src1, i8imm:$src2), 4521f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 4522f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4523f4a2713aSLionel Sambuc [(set VR128:$dst, 4524f4a2713aSLionel Sambuc (vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)), 4525f4a2713aSLionel Sambuc (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, 4526f4a2713aSLionel Sambuc Sched<[WriteShuffleLd]>; 4527f4a2713aSLionel Sambuc} 4528f4a2713aSLionel Sambuc 4529f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 4530f4a2713aSLionel Sambuc def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), 4531f4a2713aSLionel Sambuc (ins VR256:$src1, i8imm:$src2), 4532f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 4533f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4534f4a2713aSLionel Sambuc [(set VR256:$dst, 4535f4a2713aSLionel Sambuc (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], 4536f4a2713aSLionel Sambuc IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>; 4537f4a2713aSLionel Sambuc def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), 4538f4a2713aSLionel Sambuc (ins i256mem:$src1, i8imm:$src2), 4539f4a2713aSLionel Sambuc !strconcat("v", OpcodeStr, 4540f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4541f4a2713aSLionel Sambuc [(set VR256:$dst, 4542f4a2713aSLionel Sambuc (vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)), 4543f4a2713aSLionel Sambuc (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L, 4544f4a2713aSLionel Sambuc Sched<[WriteShuffleLd]>; 4545f4a2713aSLionel Sambuc} 4546f4a2713aSLionel Sambuc 4547f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in { 4548f4a2713aSLionel Sambuc def ri : Ii8<0x70, MRMSrcReg, 4549f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), 4550f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 4551f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4552f4a2713aSLionel Sambuc [(set VR128:$dst, 4553f4a2713aSLionel Sambuc (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], 4554f4a2713aSLionel Sambuc IIC_SSE_PSHUF_RI>, Sched<[WriteShuffle]>; 4555f4a2713aSLionel Sambuc def mi : Ii8<0x70, MRMSrcMem, 4556f4a2713aSLionel Sambuc (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), 4557f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 4558f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4559f4a2713aSLionel Sambuc [(set VR128:$dst, 4560f4a2713aSLionel Sambuc (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), 4561f4a2713aSLionel Sambuc (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, 4562*0a6a1f1dSLionel Sambuc Sched<[WriteShuffleLd, ReadAfterLd]>; 4563f4a2713aSLionel Sambuc} 4564f4a2713aSLionel Sambuc} 4565f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt 4566f4a2713aSLionel Sambuc 4567*0a6a1f1dSLionel Sambucdefm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, PD; 4568f4a2713aSLionel Sambucdefm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS; 4569f4a2713aSLionel Sambucdefm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD; 4570f4a2713aSLionel Sambuc 4571f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 4572f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86PShufd (loadv4f32 addr:$src1), (i8 imm:$imm))), 4573f4a2713aSLionel Sambuc (VPSHUFDmi addr:$src1, imm:$imm)>; 4574f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), 4575f4a2713aSLionel Sambuc (VPSHUFDri VR128:$src1, imm:$imm)>; 4576f4a2713aSLionel Sambuc} 4577f4a2713aSLionel Sambuc 4578f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in { 4579f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), 4580f4a2713aSLionel Sambuc (PSHUFDmi addr:$src1, imm:$imm)>; 4581f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), 4582f4a2713aSLionel Sambuc (PSHUFDri VR128:$src1, imm:$imm)>; 4583f4a2713aSLionel Sambuc} 4584f4a2713aSLionel Sambuc 4585f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4586*0a6a1f1dSLionel Sambuc// Packed Integer Pack Instructions (SSE & AVX) 4587*0a6a1f1dSLionel Sambuc//===---------------------------------------------------------------------===// 4588*0a6a1f1dSLionel Sambuc 4589*0a6a1f1dSLionel Sambuclet ExeDomain = SSEPackedInt in { 4590*0a6a1f1dSLionel Sambucmulticlass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 4591*0a6a1f1dSLionel Sambuc ValueType ArgVT, SDNode OpNode, PatFrag bc_frag, 4592*0a6a1f1dSLionel Sambuc bit Is2Addr = 1> { 4593*0a6a1f1dSLionel Sambuc def rr : PDI<opc, MRMSrcReg, 4594*0a6a1f1dSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 4595*0a6a1f1dSLionel Sambuc !if(Is2Addr, 4596*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4597*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, 4598*0a6a1f1dSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4599*0a6a1f1dSLionel Sambuc [(set VR128:$dst, 4600*0a6a1f1dSLionel Sambuc (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>, 4601*0a6a1f1dSLionel Sambuc Sched<[WriteShuffle]>; 4602*0a6a1f1dSLionel Sambuc def rm : PDI<opc, MRMSrcMem, 4603*0a6a1f1dSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 4604*0a6a1f1dSLionel Sambuc !if(Is2Addr, 4605*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4606*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, 4607*0a6a1f1dSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4608*0a6a1f1dSLionel Sambuc [(set VR128:$dst, 4609*0a6a1f1dSLionel Sambuc (OutVT (OpNode VR128:$src1, 4610*0a6a1f1dSLionel Sambuc (bc_frag (memopv2i64 addr:$src2)))))]>, 4611*0a6a1f1dSLionel Sambuc Sched<[WriteShuffleLd, ReadAfterLd]>; 4612*0a6a1f1dSLionel Sambuc} 4613*0a6a1f1dSLionel Sambuc 4614*0a6a1f1dSLionel Sambucmulticlass sse2_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT, 4615*0a6a1f1dSLionel Sambuc ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> { 4616*0a6a1f1dSLionel Sambuc def Yrr : PDI<opc, MRMSrcReg, 4617*0a6a1f1dSLionel Sambuc (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), 4618*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, 4619*0a6a1f1dSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4620*0a6a1f1dSLionel Sambuc [(set VR256:$dst, 4621*0a6a1f1dSLionel Sambuc (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>, 4622*0a6a1f1dSLionel Sambuc Sched<[WriteShuffle]>; 4623*0a6a1f1dSLionel Sambuc def Yrm : PDI<opc, MRMSrcMem, 4624*0a6a1f1dSLionel Sambuc (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), 4625*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, 4626*0a6a1f1dSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4627*0a6a1f1dSLionel Sambuc [(set VR256:$dst, 4628*0a6a1f1dSLionel Sambuc (OutVT (OpNode VR256:$src1, 4629*0a6a1f1dSLionel Sambuc (bc_frag (memopv4i64 addr:$src2)))))]>, 4630*0a6a1f1dSLionel Sambuc Sched<[WriteShuffleLd, ReadAfterLd]>; 4631*0a6a1f1dSLionel Sambuc} 4632*0a6a1f1dSLionel Sambuc 4633*0a6a1f1dSLionel Sambucmulticlass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 4634*0a6a1f1dSLionel Sambuc ValueType ArgVT, SDNode OpNode, PatFrag bc_frag, 4635*0a6a1f1dSLionel Sambuc bit Is2Addr = 1> { 4636*0a6a1f1dSLionel Sambuc def rr : SS48I<opc, MRMSrcReg, 4637*0a6a1f1dSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 4638*0a6a1f1dSLionel Sambuc !if(Is2Addr, 4639*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4640*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, 4641*0a6a1f1dSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4642*0a6a1f1dSLionel Sambuc [(set VR128:$dst, 4643*0a6a1f1dSLionel Sambuc (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>, 4644*0a6a1f1dSLionel Sambuc Sched<[WriteShuffle]>; 4645*0a6a1f1dSLionel Sambuc def rm : SS48I<opc, MRMSrcMem, 4646*0a6a1f1dSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 4647*0a6a1f1dSLionel Sambuc !if(Is2Addr, 4648*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4649*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, 4650*0a6a1f1dSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4651*0a6a1f1dSLionel Sambuc [(set VR128:$dst, 4652*0a6a1f1dSLionel Sambuc (OutVT (OpNode VR128:$src1, 4653*0a6a1f1dSLionel Sambuc (bc_frag (memopv2i64 addr:$src2)))))]>, 4654*0a6a1f1dSLionel Sambuc Sched<[WriteShuffleLd, ReadAfterLd]>; 4655*0a6a1f1dSLionel Sambuc} 4656*0a6a1f1dSLionel Sambuc 4657*0a6a1f1dSLionel Sambucmulticlass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT, 4658*0a6a1f1dSLionel Sambuc ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> { 4659*0a6a1f1dSLionel Sambuc def Yrr : SS48I<opc, MRMSrcReg, 4660*0a6a1f1dSLionel Sambuc (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), 4661*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, 4662*0a6a1f1dSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4663*0a6a1f1dSLionel Sambuc [(set VR256:$dst, 4664*0a6a1f1dSLionel Sambuc (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>, 4665*0a6a1f1dSLionel Sambuc Sched<[WriteShuffle]>; 4666*0a6a1f1dSLionel Sambuc def Yrm : SS48I<opc, MRMSrcMem, 4667*0a6a1f1dSLionel Sambuc (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), 4668*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, 4669*0a6a1f1dSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4670*0a6a1f1dSLionel Sambuc [(set VR256:$dst, 4671*0a6a1f1dSLionel Sambuc (OutVT (OpNode VR256:$src1, 4672*0a6a1f1dSLionel Sambuc (bc_frag (memopv4i64 addr:$src2)))))]>, 4673*0a6a1f1dSLionel Sambuc Sched<[WriteShuffleLd, ReadAfterLd]>; 4674*0a6a1f1dSLionel Sambuc} 4675*0a6a1f1dSLionel Sambuc 4676*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX] in { 4677*0a6a1f1dSLionel Sambuc defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, 4678*0a6a1f1dSLionel Sambuc bc_v8i16, 0>, VEX_4V; 4679*0a6a1f1dSLionel Sambuc defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, 4680*0a6a1f1dSLionel Sambuc bc_v4i32, 0>, VEX_4V; 4681*0a6a1f1dSLionel Sambuc 4682*0a6a1f1dSLionel Sambuc defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, 4683*0a6a1f1dSLionel Sambuc bc_v8i16, 0>, VEX_4V; 4684*0a6a1f1dSLionel Sambuc defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, 4685*0a6a1f1dSLionel Sambuc bc_v4i32, 0>, VEX_4V; 4686*0a6a1f1dSLionel Sambuc} 4687*0a6a1f1dSLionel Sambuc 4688*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX2] in { 4689*0a6a1f1dSLionel Sambuc defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss, 4690*0a6a1f1dSLionel Sambuc bc_v16i16>, VEX_4V, VEX_L; 4691*0a6a1f1dSLionel Sambuc defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, 4692*0a6a1f1dSLionel Sambuc bc_v8i32>, VEX_4V, VEX_L; 4693*0a6a1f1dSLionel Sambuc 4694*0a6a1f1dSLionel Sambuc defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus, 4695*0a6a1f1dSLionel Sambuc bc_v16i16>, VEX_4V, VEX_L; 4696*0a6a1f1dSLionel Sambuc defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, 4697*0a6a1f1dSLionel Sambuc bc_v8i32>, VEX_4V, VEX_L; 4698*0a6a1f1dSLionel Sambuc} 4699*0a6a1f1dSLionel Sambuc 4700*0a6a1f1dSLionel Sambuclet Constraints = "$src1 = $dst" in { 4701*0a6a1f1dSLionel Sambuc defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, 4702*0a6a1f1dSLionel Sambuc bc_v8i16>; 4703*0a6a1f1dSLionel Sambuc defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, 4704*0a6a1f1dSLionel Sambuc bc_v4i32>; 4705*0a6a1f1dSLionel Sambuc 4706*0a6a1f1dSLionel Sambuc defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, 4707*0a6a1f1dSLionel Sambuc bc_v8i16>; 4708*0a6a1f1dSLionel Sambuc 4709*0a6a1f1dSLionel Sambuc let Predicates = [HasSSE41] in 4710*0a6a1f1dSLionel Sambuc defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, 4711*0a6a1f1dSLionel Sambuc bc_v4i32>; 4712*0a6a1f1dSLionel Sambuc} 4713*0a6a1f1dSLionel Sambuc} // ExeDomain = SSEPackedInt 4714*0a6a1f1dSLionel Sambuc 4715*0a6a1f1dSLionel Sambuc//===---------------------------------------------------------------------===// 4716f4a2713aSLionel Sambuc// SSE2 - Packed Integer Unpack Instructions 4717f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4718f4a2713aSLionel Sambuc 4719f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in { 4720f4a2713aSLionel Sambucmulticlass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, 4721f4a2713aSLionel Sambuc SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> { 4722f4a2713aSLionel Sambuc def rr : PDI<opc, MRMSrcReg, 4723f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 4724f4a2713aSLionel Sambuc !if(Is2Addr, 4725f4a2713aSLionel Sambuc !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 4726f4a2713aSLionel Sambuc !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4727f4a2713aSLionel Sambuc [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], 4728f4a2713aSLionel Sambuc IIC_SSE_UNPCK>, Sched<[WriteShuffle]>; 4729f4a2713aSLionel Sambuc def rm : PDI<opc, MRMSrcMem, 4730f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 4731f4a2713aSLionel Sambuc !if(Is2Addr, 4732f4a2713aSLionel Sambuc !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 4733f4a2713aSLionel Sambuc !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4734f4a2713aSLionel Sambuc [(set VR128:$dst, (OpNode VR128:$src1, 4735f4a2713aSLionel Sambuc (bc_frag (memopv2i64 4736f4a2713aSLionel Sambuc addr:$src2))))], 4737f4a2713aSLionel Sambuc IIC_SSE_UNPCK>, 4738f4a2713aSLionel Sambuc Sched<[WriteShuffleLd, ReadAfterLd]>; 4739f4a2713aSLionel Sambuc} 4740f4a2713aSLionel Sambuc 4741f4a2713aSLionel Sambucmulticlass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, 4742f4a2713aSLionel Sambuc SDNode OpNode, PatFrag bc_frag> { 4743f4a2713aSLionel Sambuc def Yrr : PDI<opc, MRMSrcReg, 4744f4a2713aSLionel Sambuc (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), 4745f4a2713aSLionel Sambuc !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4746f4a2713aSLionel Sambuc [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>, 4747f4a2713aSLionel Sambuc Sched<[WriteShuffle]>; 4748f4a2713aSLionel Sambuc def Yrm : PDI<opc, MRMSrcMem, 4749f4a2713aSLionel Sambuc (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), 4750f4a2713aSLionel Sambuc !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4751f4a2713aSLionel Sambuc [(set VR256:$dst, (OpNode VR256:$src1, 4752f4a2713aSLionel Sambuc (bc_frag (memopv4i64 addr:$src2))))]>, 4753f4a2713aSLionel Sambuc Sched<[WriteShuffleLd, ReadAfterLd]>; 4754f4a2713aSLionel Sambuc} 4755f4a2713aSLionel Sambuc 4756f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 4757f4a2713aSLionel Sambuc defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, 4758f4a2713aSLionel Sambuc bc_v16i8, 0>, VEX_4V; 4759f4a2713aSLionel Sambuc defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, 4760f4a2713aSLionel Sambuc bc_v8i16, 0>, VEX_4V; 4761f4a2713aSLionel Sambuc defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, 4762f4a2713aSLionel Sambuc bc_v4i32, 0>, VEX_4V; 4763f4a2713aSLionel Sambuc defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, 4764f4a2713aSLionel Sambuc bc_v2i64, 0>, VEX_4V; 4765f4a2713aSLionel Sambuc 4766f4a2713aSLionel Sambuc defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, 4767f4a2713aSLionel Sambuc bc_v16i8, 0>, VEX_4V; 4768f4a2713aSLionel Sambuc defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, 4769f4a2713aSLionel Sambuc bc_v8i16, 0>, VEX_4V; 4770f4a2713aSLionel Sambuc defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, 4771f4a2713aSLionel Sambuc bc_v4i32, 0>, VEX_4V; 4772f4a2713aSLionel Sambuc defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, 4773f4a2713aSLionel Sambuc bc_v2i64, 0>, VEX_4V; 4774f4a2713aSLionel Sambuc} 4775f4a2713aSLionel Sambuc 4776f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 4777f4a2713aSLionel Sambuc defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl, 4778f4a2713aSLionel Sambuc bc_v32i8>, VEX_4V, VEX_L; 4779f4a2713aSLionel Sambuc defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl, 4780f4a2713aSLionel Sambuc bc_v16i16>, VEX_4V, VEX_L; 4781f4a2713aSLionel Sambuc defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, 4782f4a2713aSLionel Sambuc bc_v8i32>, VEX_4V, VEX_L; 4783f4a2713aSLionel Sambuc defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, 4784f4a2713aSLionel Sambuc bc_v4i64>, VEX_4V, VEX_L; 4785f4a2713aSLionel Sambuc 4786f4a2713aSLionel Sambuc defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh, 4787f4a2713aSLionel Sambuc bc_v32i8>, VEX_4V, VEX_L; 4788f4a2713aSLionel Sambuc defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh, 4789f4a2713aSLionel Sambuc bc_v16i16>, VEX_4V, VEX_L; 4790f4a2713aSLionel Sambuc defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh, 4791f4a2713aSLionel Sambuc bc_v8i32>, VEX_4V, VEX_L; 4792f4a2713aSLionel Sambuc defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, 4793f4a2713aSLionel Sambuc bc_v4i64>, VEX_4V, VEX_L; 4794f4a2713aSLionel Sambuc} 4795f4a2713aSLionel Sambuc 4796f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { 4797f4a2713aSLionel Sambuc defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, 4798f4a2713aSLionel Sambuc bc_v16i8>; 4799f4a2713aSLionel Sambuc defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, 4800f4a2713aSLionel Sambuc bc_v8i16>; 4801f4a2713aSLionel Sambuc defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, 4802f4a2713aSLionel Sambuc bc_v4i32>; 4803f4a2713aSLionel Sambuc defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, 4804f4a2713aSLionel Sambuc bc_v2i64>; 4805f4a2713aSLionel Sambuc 4806f4a2713aSLionel Sambuc defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, 4807f4a2713aSLionel Sambuc bc_v16i8>; 4808f4a2713aSLionel Sambuc defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, 4809f4a2713aSLionel Sambuc bc_v8i16>; 4810f4a2713aSLionel Sambuc defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, 4811f4a2713aSLionel Sambuc bc_v4i32>; 4812f4a2713aSLionel Sambuc defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, 4813f4a2713aSLionel Sambuc bc_v2i64>; 4814f4a2713aSLionel Sambuc} 4815f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt 4816f4a2713aSLionel Sambuc 4817f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4818f4a2713aSLionel Sambuc// SSE2 - Packed Integer Extract and Insert 4819f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4820f4a2713aSLionel Sambuc 4821f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt in { 4822f4a2713aSLionel Sambucmulticlass sse2_pinsrw<bit Is2Addr = 1> { 4823f4a2713aSLionel Sambuc def rri : Ii8<0xC4, MRMSrcReg, 4824f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, 4825f4a2713aSLionel Sambuc GR32orGR64:$src2, i32i8imm:$src3), 4826f4a2713aSLionel Sambuc !if(Is2Addr, 4827f4a2713aSLionel Sambuc "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 4828f4a2713aSLionel Sambuc "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 4829f4a2713aSLionel Sambuc [(set VR128:$dst, 4830f4a2713aSLionel Sambuc (X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))], 4831f4a2713aSLionel Sambuc IIC_SSE_PINSRW>, Sched<[WriteShuffle]>; 4832f4a2713aSLionel Sambuc def rmi : Ii8<0xC4, MRMSrcMem, 4833f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, 4834f4a2713aSLionel Sambuc i16mem:$src2, i32i8imm:$src3), 4835f4a2713aSLionel Sambuc !if(Is2Addr, 4836f4a2713aSLionel Sambuc "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 4837f4a2713aSLionel Sambuc "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 4838f4a2713aSLionel Sambuc [(set VR128:$dst, 4839f4a2713aSLionel Sambuc (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), 4840f4a2713aSLionel Sambuc imm:$src3))], IIC_SSE_PINSRW>, 4841f4a2713aSLionel Sambuc Sched<[WriteShuffleLd, ReadAfterLd]>; 4842f4a2713aSLionel Sambuc} 4843f4a2713aSLionel Sambuc 4844f4a2713aSLionel Sambuc// Extract 4845f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 4846f4a2713aSLionel Sambucdef VPEXTRWri : Ii8<0xC5, MRMSrcReg, 4847f4a2713aSLionel Sambuc (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), 4848f4a2713aSLionel Sambuc "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4849f4a2713aSLionel Sambuc [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 4850*0a6a1f1dSLionel Sambuc imm:$src2))]>, PD, VEX, 4851f4a2713aSLionel Sambuc Sched<[WriteShuffle]>; 4852f4a2713aSLionel Sambucdef PEXTRWri : PDIi8<0xC5, MRMSrcReg, 4853f4a2713aSLionel Sambuc (outs GR32orGR64:$dst), (ins VR128:$src1, i32i8imm:$src2), 4854f4a2713aSLionel Sambuc "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4855f4a2713aSLionel Sambuc [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 4856f4a2713aSLionel Sambuc imm:$src2))], IIC_SSE_PEXTRW>, 4857f4a2713aSLionel Sambuc Sched<[WriteShuffleLd, ReadAfterLd]>; 4858f4a2713aSLionel Sambuc 4859f4a2713aSLionel Sambuc// Insert 4860f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 4861*0a6a1f1dSLionel Sambucdefm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V; 4862f4a2713aSLionel Sambuc 4863f4a2713aSLionel Sambuclet Predicates = [UseSSE2], Constraints = "$src1 = $dst" in 4864*0a6a1f1dSLionel Sambucdefm PINSRW : sse2_pinsrw, PD; 4865f4a2713aSLionel Sambuc 4866f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt 4867f4a2713aSLionel Sambuc 4868f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4869f4a2713aSLionel Sambuc// SSE2 - Packed Mask Creation 4870f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4871f4a2713aSLionel Sambuc 4872f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { 4873f4a2713aSLionel Sambuc 4874f4a2713aSLionel Sambucdef VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 4875f4a2713aSLionel Sambuc (ins VR128:$src), 4876f4a2713aSLionel Sambuc "pmovmskb\t{$src, $dst|$dst, $src}", 4877f4a2713aSLionel Sambuc [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], 4878f4a2713aSLionel Sambuc IIC_SSE_MOVMSK>, VEX; 4879f4a2713aSLionel Sambuc 4880f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 4881f4a2713aSLionel Sambucdef VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 4882f4a2713aSLionel Sambuc (ins VR256:$src), 4883f4a2713aSLionel Sambuc "pmovmskb\t{$src, $dst|$dst, $src}", 4884f4a2713aSLionel Sambuc [(set GR32orGR64:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, 4885f4a2713aSLionel Sambuc VEX, VEX_L; 4886f4a2713aSLionel Sambuc} 4887f4a2713aSLionel Sambuc 4888f4a2713aSLionel Sambucdef PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), 4889f4a2713aSLionel Sambuc "pmovmskb\t{$src, $dst|$dst, $src}", 4890f4a2713aSLionel Sambuc [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], 4891f4a2713aSLionel Sambuc IIC_SSE_MOVMSK>; 4892f4a2713aSLionel Sambuc 4893f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt 4894f4a2713aSLionel Sambuc 4895f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4896f4a2713aSLionel Sambuc// SSE2 - Conditional Store 4897f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4898f4a2713aSLionel Sambuc 4899f4a2713aSLionel Sambuclet ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { 4900f4a2713aSLionel Sambuc 4901*0a6a1f1dSLionel Sambuclet Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in 4902f4a2713aSLionel Sambucdef VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), 4903f4a2713aSLionel Sambuc (ins VR128:$src, VR128:$mask), 4904f4a2713aSLionel Sambuc "maskmovdqu\t{$mask, $src|$src, $mask}", 4905f4a2713aSLionel Sambuc [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)], 4906f4a2713aSLionel Sambuc IIC_SSE_MASKMOV>, VEX; 4907f4a2713aSLionel Sambuclet Uses = [RDI], Predicates = [HasAVX,In64BitMode] in 4908f4a2713aSLionel Sambucdef VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), 4909f4a2713aSLionel Sambuc (ins VR128:$src, VR128:$mask), 4910f4a2713aSLionel Sambuc "maskmovdqu\t{$mask, $src|$src, $mask}", 4911f4a2713aSLionel Sambuc [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)], 4912f4a2713aSLionel Sambuc IIC_SSE_MASKMOV>, VEX; 4913f4a2713aSLionel Sambuc 4914*0a6a1f1dSLionel Sambuclet Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in 4915f4a2713aSLionel Sambucdef MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4916f4a2713aSLionel Sambuc "maskmovdqu\t{$mask, $src|$src, $mask}", 4917f4a2713aSLionel Sambuc [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)], 4918f4a2713aSLionel Sambuc IIC_SSE_MASKMOV>; 4919f4a2713aSLionel Sambuclet Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in 4920f4a2713aSLionel Sambucdef MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4921f4a2713aSLionel Sambuc "maskmovdqu\t{$mask, $src|$src, $mask}", 4922f4a2713aSLionel Sambuc [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)], 4923f4a2713aSLionel Sambuc IIC_SSE_MASKMOV>; 4924f4a2713aSLionel Sambuc 4925f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedInt 4926f4a2713aSLionel Sambuc 4927f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4928f4a2713aSLionel Sambuc// SSE2 - Move Doubleword 4929f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4930f4a2713aSLionel Sambuc 4931f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4932f4a2713aSLionel Sambuc// Move Int Doubleword to Packed Double Int 4933f4a2713aSLionel Sambuc// 4934f4a2713aSLionel Sambucdef VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4935f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 4936f4a2713aSLionel Sambuc [(set VR128:$dst, 4937f4a2713aSLionel Sambuc (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, 4938f4a2713aSLionel Sambuc VEX, Sched<[WriteMove]>; 4939f4a2713aSLionel Sambucdef VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4940f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 4941f4a2713aSLionel Sambuc [(set VR128:$dst, 4942f4a2713aSLionel Sambuc (v4i32 (scalar_to_vector (loadi32 addr:$src))))], 4943f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, 4944f4a2713aSLionel Sambuc VEX, Sched<[WriteLoad]>; 4945f4a2713aSLionel Sambucdef VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4946f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", 4947f4a2713aSLionel Sambuc [(set VR128:$dst, 4948f4a2713aSLionel Sambuc (v2i64 (scalar_to_vector GR64:$src)))], 4949f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4950f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in 4951f4a2713aSLionel Sambucdef VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4952f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", 4953f4a2713aSLionel Sambuc [(set FR64:$dst, (bitconvert GR64:$src))], 4954f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4955f4a2713aSLionel Sambuc 4956f4a2713aSLionel Sambucdef MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4957f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 4958f4a2713aSLionel Sambuc [(set VR128:$dst, 4959f4a2713aSLionel Sambuc (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, 4960f4a2713aSLionel Sambuc Sched<[WriteMove]>; 4961f4a2713aSLionel Sambucdef MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4962f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 4963f4a2713aSLionel Sambuc [(set VR128:$dst, 4964f4a2713aSLionel Sambuc (v4i32 (scalar_to_vector (loadi32 addr:$src))))], 4965f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; 4966f4a2713aSLionel Sambucdef MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4967f4a2713aSLionel Sambuc "mov{d|q}\t{$src, $dst|$dst, $src}", 4968f4a2713aSLionel Sambuc [(set VR128:$dst, 4969f4a2713aSLionel Sambuc (v2i64 (scalar_to_vector GR64:$src)))], 4970f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, Sched<[WriteMove]>; 4971f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in 4972f4a2713aSLionel Sambucdef MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4973f4a2713aSLionel Sambuc "mov{d|q}\t{$src, $dst|$dst, $src}", 4974f4a2713aSLionel Sambuc [(set FR64:$dst, (bitconvert GR64:$src))], 4975f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, Sched<[WriteMove]>; 4976f4a2713aSLionel Sambuc 4977f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 4978f4a2713aSLionel Sambuc// Move Int Doubleword to Single Scalar 4979f4a2713aSLionel Sambuc// 4980f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in { 4981f4a2713aSLionel Sambuc def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4982f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 4983f4a2713aSLionel Sambuc [(set FR32:$dst, (bitconvert GR32:$src))], 4984f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4985f4a2713aSLionel Sambuc 4986f4a2713aSLionel Sambuc def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), 4987f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 4988f4a2713aSLionel Sambuc [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], 4989f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, 4990f4a2713aSLionel Sambuc VEX, Sched<[WriteLoad]>; 4991f4a2713aSLionel Sambuc def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4992f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 4993f4a2713aSLionel Sambuc [(set FR32:$dst, (bitconvert GR32:$src))], 4994f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, Sched<[WriteMove]>; 4995f4a2713aSLionel Sambuc 4996f4a2713aSLionel Sambuc def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), 4997f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 4998f4a2713aSLionel Sambuc [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], 4999f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; 5000f4a2713aSLionel Sambuc} 5001f4a2713aSLionel Sambuc 5002f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5003f4a2713aSLionel Sambuc// Move Packed Doubleword Int to Packed Double Int 5004f4a2713aSLionel Sambuc// 5005f4a2713aSLionel Sambucdef VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 5006f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 5007f4a2713aSLionel Sambuc [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), 5008f4a2713aSLionel Sambuc (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX, 5009f4a2713aSLionel Sambuc Sched<[WriteMove]>; 5010f4a2713aSLionel Sambucdef VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs), 5011f4a2713aSLionel Sambuc (ins i32mem:$dst, VR128:$src), 5012f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 5013f4a2713aSLionel Sambuc [(store (i32 (vector_extract (v4i32 VR128:$src), 5014f4a2713aSLionel Sambuc (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, 5015*0a6a1f1dSLionel Sambuc VEX, Sched<[WriteStore]>; 5016f4a2713aSLionel Sambucdef MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 5017f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 5018f4a2713aSLionel Sambuc [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), 5019f4a2713aSLionel Sambuc (iPTR 0)))], IIC_SSE_MOVD_ToGP>, 5020f4a2713aSLionel Sambuc Sched<[WriteMove]>; 5021f4a2713aSLionel Sambucdef MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), 5022f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 5023f4a2713aSLionel Sambuc [(store (i32 (vector_extract (v4i32 VR128:$src), 5024f4a2713aSLionel Sambuc (iPTR 0))), addr:$dst)], 5025*0a6a1f1dSLionel Sambuc IIC_SSE_MOVDQ>, Sched<[WriteStore]>; 5026f4a2713aSLionel Sambuc 5027f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86Vinsert (v8i32 immAllZerosV), GR32:$src2, (iPTR 0))), 5028f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>; 5029f4a2713aSLionel Sambuc 5030f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86Vinsert (bc_v4i64 (v8i32 immAllZerosV)), GR64:$src2, (iPTR 0))), 5031f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>; 5032f4a2713aSLionel Sambuc 5033f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))), 5034f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>; 5035f4a2713aSLionel Sambuc 5036f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))), 5037f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>; 5038f4a2713aSLionel Sambuc 5039f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5040f4a2713aSLionel Sambuc// Move Packed Doubleword Int first element to Doubleword Int 5041f4a2713aSLionel Sambuc// 5042f4a2713aSLionel Sambuclet SchedRW = [WriteMove] in { 5043f4a2713aSLionel Sambucdef VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 5044f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", 5045f4a2713aSLionel Sambuc [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), 5046f4a2713aSLionel Sambuc (iPTR 0)))], 5047f4a2713aSLionel Sambuc IIC_SSE_MOVD_ToGP>, 5048f4a2713aSLionel Sambuc VEX; 5049f4a2713aSLionel Sambuc 5050f4a2713aSLionel Sambucdef MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 5051f4a2713aSLionel Sambuc "mov{d|q}\t{$src, $dst|$dst, $src}", 5052f4a2713aSLionel Sambuc [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), 5053f4a2713aSLionel Sambuc (iPTR 0)))], 5054f4a2713aSLionel Sambuc IIC_SSE_MOVD_ToGP>; 5055f4a2713aSLionel Sambuc} //SchedRW 5056f4a2713aSLionel Sambuc 5057f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5058f4a2713aSLionel Sambuc// Bitcast FR64 <-> GR64 5059f4a2713aSLionel Sambuc// 5060f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in { 5061f4a2713aSLionel Sambuc let Predicates = [UseAVX] in 5062f4a2713aSLionel Sambuc def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), 5063f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", 5064f4a2713aSLionel Sambuc [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, 5065f4a2713aSLionel Sambuc VEX, Sched<[WriteLoad]>; 5066f4a2713aSLionel Sambuc def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 5067f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", 5068f4a2713aSLionel Sambuc [(set GR64:$dst, (bitconvert FR64:$src))], 5069f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 5070f4a2713aSLionel Sambuc def VMOVSDto64mr : VRS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), 5071f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", 5072f4a2713aSLionel Sambuc [(store (i64 (bitconvert FR64:$src)), addr:$dst)], 5073f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; 5074f4a2713aSLionel Sambuc 5075f4a2713aSLionel Sambuc def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), 5076f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", 5077f4a2713aSLionel Sambuc [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], 5078f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; 5079f4a2713aSLionel Sambuc def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 5080f4a2713aSLionel Sambuc "mov{d|q}\t{$src, $dst|$dst, $src}", 5081f4a2713aSLionel Sambuc [(set GR64:$dst, (bitconvert FR64:$src))], 5082f4a2713aSLionel Sambuc IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; 5083f4a2713aSLionel Sambuc def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), 5084f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", 5085f4a2713aSLionel Sambuc [(store (i64 (bitconvert FR64:$src)), addr:$dst)], 5086f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, Sched<[WriteStore]>; 5087f4a2713aSLionel Sambuc} 5088f4a2713aSLionel Sambuc 5089f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5090f4a2713aSLionel Sambuc// Move Scalar Single to Double Int 5091f4a2713aSLionel Sambuc// 5092f4a2713aSLionel Sambuclet isCodeGenOnly = 1 in { 5093f4a2713aSLionel Sambuc def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 5094f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 5095f4a2713aSLionel Sambuc [(set GR32:$dst, (bitconvert FR32:$src))], 5096f4a2713aSLionel Sambuc IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>; 5097f4a2713aSLionel Sambuc def VMOVSS2DImr : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), 5098f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 5099f4a2713aSLionel Sambuc [(store (i32 (bitconvert FR32:$src)), addr:$dst)], 5100f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; 5101f4a2713aSLionel Sambuc def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 5102f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 5103f4a2713aSLionel Sambuc [(set GR32:$dst, (bitconvert FR32:$src))], 5104f4a2713aSLionel Sambuc IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; 5105f4a2713aSLionel Sambuc def MOVSS2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), 5106f4a2713aSLionel Sambuc "movd\t{$src, $dst|$dst, $src}", 5107f4a2713aSLionel Sambuc [(store (i32 (bitconvert FR32:$src)), addr:$dst)], 5108f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, Sched<[WriteStore]>; 5109f4a2713aSLionel Sambuc} 5110f4a2713aSLionel Sambuc 5111f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5112f4a2713aSLionel Sambuc// Patterns and instructions to describe movd/movq to XMM register zero-extends 5113f4a2713aSLionel Sambuc// 5114f4a2713aSLionel Sambuclet isCodeGenOnly = 1, SchedRW = [WriteMove] in { 5115f4a2713aSLionel Sambuclet AddedComplexity = 15 in { 5116f4a2713aSLionel Sambucdef VMOVZQI2PQIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 5117f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", // X86-64 only 5118f4a2713aSLionel Sambuc [(set VR128:$dst, (v2i64 (X86vzmovl 5119f4a2713aSLionel Sambuc (v2i64 (scalar_to_vector GR64:$src)))))], 5120f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, 5121f4a2713aSLionel Sambuc VEX, VEX_W; 5122f4a2713aSLionel Sambucdef MOVZQI2PQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 5123f4a2713aSLionel Sambuc "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only 5124f4a2713aSLionel Sambuc [(set VR128:$dst, (v2i64 (X86vzmovl 5125f4a2713aSLionel Sambuc (v2i64 (scalar_to_vector GR64:$src)))))], 5126f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>; 5127f4a2713aSLionel Sambuc} 5128f4a2713aSLionel Sambuc} // isCodeGenOnly, SchedRW 5129f4a2713aSLionel Sambuc 5130f4a2713aSLionel Sambuclet Predicates = [UseAVX] in { 5131f4a2713aSLionel Sambuc let AddedComplexity = 15 in 5132f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 5133f4a2713aSLionel Sambuc (VMOVDI2PDIrr GR32:$src)>; 5134f4a2713aSLionel Sambuc 5135f4a2713aSLionel Sambuc // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 5136f4a2713aSLionel Sambuc let AddedComplexity = 20 in { 5137f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), 5138f4a2713aSLionel Sambuc (VMOVDI2PDIrm addr:$src)>; 5139f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), 5140f4a2713aSLionel Sambuc (VMOVDI2PDIrm addr:$src)>; 5141f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), 5142f4a2713aSLionel Sambuc (VMOVDI2PDIrm addr:$src)>; 5143f4a2713aSLionel Sambuc } 5144f4a2713aSLionel Sambuc // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. 5145f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 5146f4a2713aSLionel Sambuc (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), 5147f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src), sub_xmm)>; 5148f4a2713aSLionel Sambuc def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 5149f4a2713aSLionel Sambuc (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), 5150f4a2713aSLionel Sambuc (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; 5151f4a2713aSLionel Sambuc} 5152f4a2713aSLionel Sambuc 5153f4a2713aSLionel Sambuclet Predicates = [UseSSE2] in { 5154f4a2713aSLionel Sambuc let AddedComplexity = 15 in 5155f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 5156f4a2713aSLionel Sambuc (MOVDI2PDIrr GR32:$src)>; 5157f4a2713aSLionel Sambuc 5158f4a2713aSLionel Sambuc let AddedComplexity = 20 in { 5159f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), 5160f4a2713aSLionel Sambuc (MOVDI2PDIrm addr:$src)>; 5161f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), 5162f4a2713aSLionel Sambuc (MOVDI2PDIrm addr:$src)>; 5163f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), 5164f4a2713aSLionel Sambuc (MOVDI2PDIrm addr:$src)>; 5165f4a2713aSLionel Sambuc } 5166f4a2713aSLionel Sambuc} 5167f4a2713aSLionel Sambuc 5168f4a2713aSLionel Sambuc// These are the correct encodings of the instructions so that we know how to 5169f4a2713aSLionel Sambuc// read correct assembly, even though we continue to emit the wrong ones for 5170f4a2713aSLionel Sambuc// compatibility with Darwin's buggy assembler. 5171f4a2713aSLionel Sambucdef : InstAlias<"movq\t{$src, $dst|$dst, $src}", 5172f4a2713aSLionel Sambuc (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; 5173f4a2713aSLionel Sambucdef : InstAlias<"movq\t{$src, $dst|$dst, $src}", 5174f4a2713aSLionel Sambuc (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; 5175f4a2713aSLionel Sambuc// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX. 5176f4a2713aSLionel Sambucdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 5177f4a2713aSLionel Sambuc (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>; 5178f4a2713aSLionel Sambucdef : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 5179f4a2713aSLionel Sambuc (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>; 5180f4a2713aSLionel Sambuc 5181f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5182f4a2713aSLionel Sambuc// SSE2 - Move Quadword 5183f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5184f4a2713aSLionel Sambuc 5185f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5186f4a2713aSLionel Sambuc// Move Quadword Int to Packed Quadword Int 5187f4a2713aSLionel Sambuc// 5188f4a2713aSLionel Sambuc 5189f4a2713aSLionel Sambuclet SchedRW = [WriteLoad] in { 5190f4a2713aSLionel Sambucdef VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 5191f4a2713aSLionel Sambuc "vmovq\t{$src, $dst|$dst, $src}", 5192f4a2713aSLionel Sambuc [(set VR128:$dst, 5193f4a2713aSLionel Sambuc (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, 5194f4a2713aSLionel Sambuc VEX, Requires<[UseAVX]>; 5195f4a2713aSLionel Sambucdef MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 5196f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", 5197f4a2713aSLionel Sambuc [(set VR128:$dst, 5198f4a2713aSLionel Sambuc (v2i64 (scalar_to_vector (loadi64 addr:$src))))], 5199f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, XS, 5200f4a2713aSLionel Sambuc Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix 5201f4a2713aSLionel Sambuc} // SchedRW 5202f4a2713aSLionel Sambuc 5203f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5204f4a2713aSLionel Sambuc// Move Packed Quadword Int to Quadword Int 5205f4a2713aSLionel Sambuc// 5206f4a2713aSLionel Sambuclet SchedRW = [WriteStore] in { 5207f4a2713aSLionel Sambucdef VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 5208f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", 5209f4a2713aSLionel Sambuc [(store (i64 (vector_extract (v2i64 VR128:$src), 5210f4a2713aSLionel Sambuc (iPTR 0))), addr:$dst)], 5211f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, VEX; 5212f4a2713aSLionel Sambucdef MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 5213f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", 5214f4a2713aSLionel Sambuc [(store (i64 (vector_extract (v2i64 VR128:$src), 5215f4a2713aSLionel Sambuc (iPTR 0))), addr:$dst)], 5216f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>; 5217f4a2713aSLionel Sambuc} // SchedRW 5218f4a2713aSLionel Sambuc 5219*0a6a1f1dSLionel Sambuc// For disassembler only 5220*0a6a1f1dSLionel Sambuclet isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 5221*0a6a1f1dSLionel Sambuc SchedRW = [WriteVecLogic] in { 5222*0a6a1f1dSLionel Sambucdef VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 5223*0a6a1f1dSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, VEX; 5224*0a6a1f1dSLionel Sambucdef MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 5225*0a6a1f1dSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>; 5226*0a6a1f1dSLionel Sambuc} 5227*0a6a1f1dSLionel Sambuc 5228f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5229f4a2713aSLionel Sambuc// Store / copy lower 64-bits of a XMM register. 5230f4a2713aSLionel Sambuc// 5231*0a6a1f1dSLionel Sambuclet Predicates = [UseAVX] in 5232*0a6a1f1dSLionel Sambucdef : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src), 5233*0a6a1f1dSLionel Sambuc (VMOVPQI2QImr addr:$dst, VR128:$src)>; 5234*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE2] in 5235*0a6a1f1dSLionel Sambucdef : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src), 5236*0a6a1f1dSLionel Sambuc (MOVPQI2QImr addr:$dst, VR128:$src)>; 5237f4a2713aSLionel Sambuc 5238f4a2713aSLionel Sambuclet isCodeGenOnly = 1, AddedComplexity = 20 in { 5239f4a2713aSLionel Sambucdef VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 5240f4a2713aSLionel Sambuc "vmovq\t{$src, $dst|$dst, $src}", 5241f4a2713aSLionel Sambuc [(set VR128:$dst, 5242f4a2713aSLionel Sambuc (v2i64 (X86vzmovl (v2i64 (scalar_to_vector 5243f4a2713aSLionel Sambuc (loadi64 addr:$src))))))], 5244f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, 5245f4a2713aSLionel Sambuc XS, VEX, Requires<[UseAVX]>, Sched<[WriteLoad]>; 5246f4a2713aSLionel Sambuc 5247f4a2713aSLionel Sambucdef MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 5248f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", 5249f4a2713aSLionel Sambuc [(set VR128:$dst, 5250f4a2713aSLionel Sambuc (v2i64 (X86vzmovl (v2i64 (scalar_to_vector 5251f4a2713aSLionel Sambuc (loadi64 addr:$src))))))], 5252f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, 5253f4a2713aSLionel Sambuc XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>; 5254f4a2713aSLionel Sambuc} 5255f4a2713aSLionel Sambuc 5256f4a2713aSLionel Sambuclet Predicates = [UseAVX], AddedComplexity = 20 in { 5257f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), 5258f4a2713aSLionel Sambuc (VMOVZQI2PQIrm addr:$src)>; 5259f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86vzload addr:$src)), 5260f4a2713aSLionel Sambuc (VMOVZQI2PQIrm addr:$src)>; 5261f4a2713aSLionel Sambuc} 5262f4a2713aSLionel Sambuc 5263f4a2713aSLionel Sambuclet Predicates = [UseSSE2], AddedComplexity = 20 in { 5264f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), 5265f4a2713aSLionel Sambuc (MOVZQI2PQIrm addr:$src)>; 5266f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; 5267f4a2713aSLionel Sambuc} 5268f4a2713aSLionel Sambuc 5269f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 5270f4a2713aSLionel Sambucdef : Pat<(v4i64 (alignedX86vzload addr:$src)), 5271f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>; 5272f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86vzload addr:$src)), 5273f4a2713aSLionel Sambuc (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>; 5274f4a2713aSLionel Sambuc} 5275f4a2713aSLionel Sambuc 5276f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5277f4a2713aSLionel Sambuc// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in 5278f4a2713aSLionel Sambuc// IA32 document. movq xmm1, xmm2 does clear the high bits. 5279f4a2713aSLionel Sambuc// 5280f4a2713aSLionel Sambuclet SchedRW = [WriteVecLogic] in { 5281f4a2713aSLionel Sambuclet AddedComplexity = 15 in 5282f4a2713aSLionel Sambucdef VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5283f4a2713aSLionel Sambuc "vmovq\t{$src, $dst|$dst, $src}", 5284f4a2713aSLionel Sambuc [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], 5285f4a2713aSLionel Sambuc IIC_SSE_MOVQ_RR>, 5286f4a2713aSLionel Sambuc XS, VEX, Requires<[UseAVX]>; 5287f4a2713aSLionel Sambuclet AddedComplexity = 15 in 5288f4a2713aSLionel Sambucdef MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5289f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", 5290f4a2713aSLionel Sambuc [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], 5291f4a2713aSLionel Sambuc IIC_SSE_MOVQ_RR>, 5292f4a2713aSLionel Sambuc XS, Requires<[UseSSE2]>; 5293f4a2713aSLionel Sambuc} // SchedRW 5294f4a2713aSLionel Sambuc 5295f4a2713aSLionel Sambuclet isCodeGenOnly = 1, SchedRW = [WriteVecLogicLd] in { 5296f4a2713aSLionel Sambuclet AddedComplexity = 20 in 5297f4a2713aSLionel Sambucdef VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 5298f4a2713aSLionel Sambuc "vmovq\t{$src, $dst|$dst, $src}", 5299f4a2713aSLionel Sambuc [(set VR128:$dst, (v2i64 (X86vzmovl 5300f4a2713aSLionel Sambuc (loadv2i64 addr:$src))))], 5301f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, 5302f4a2713aSLionel Sambuc XS, VEX, Requires<[UseAVX]>; 5303f4a2713aSLionel Sambuclet AddedComplexity = 20 in { 5304f4a2713aSLionel Sambucdef MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 5305f4a2713aSLionel Sambuc "movq\t{$src, $dst|$dst, $src}", 5306f4a2713aSLionel Sambuc [(set VR128:$dst, (v2i64 (X86vzmovl 5307f4a2713aSLionel Sambuc (loadv2i64 addr:$src))))], 5308f4a2713aSLionel Sambuc IIC_SSE_MOVDQ>, 5309f4a2713aSLionel Sambuc XS, Requires<[UseSSE2]>; 5310f4a2713aSLionel Sambuc} 5311f4a2713aSLionel Sambuc} // isCodeGenOnly, SchedRW 5312f4a2713aSLionel Sambuc 5313f4a2713aSLionel Sambuclet AddedComplexity = 20 in { 5314f4a2713aSLionel Sambuc let Predicates = [UseAVX] in { 5315f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 5316f4a2713aSLionel Sambuc (VMOVZPQILo2PQIrr VR128:$src)>; 5317f4a2713aSLionel Sambuc } 5318f4a2713aSLionel Sambuc let Predicates = [UseSSE2] in { 5319f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 5320f4a2713aSLionel Sambuc (MOVZPQILo2PQIrr VR128:$src)>; 5321f4a2713aSLionel Sambuc } 5322f4a2713aSLionel Sambuc} 5323f4a2713aSLionel Sambuc 5324f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5325f4a2713aSLionel Sambuc// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP 5326f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5327f4a2713aSLionel Sambucmulticlass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, 5328f4a2713aSLionel Sambuc ValueType vt, RegisterClass RC, PatFrag mem_frag, 5329f4a2713aSLionel Sambuc X86MemOperand x86memop> { 5330f4a2713aSLionel Sambucdef rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 5331f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5332f4a2713aSLionel Sambuc [(set RC:$dst, (vt (OpNode RC:$src)))], 5333*0a6a1f1dSLionel Sambuc IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>; 5334f4a2713aSLionel Sambucdef rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 5335f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5336f4a2713aSLionel Sambuc [(set RC:$dst, (OpNode (mem_frag addr:$src)))], 5337*0a6a1f1dSLionel Sambuc IIC_SSE_MOV_LH>, Sched<[WriteLoad]>; 5338f4a2713aSLionel Sambuc} 5339f4a2713aSLionel Sambuc 5340f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 5341f4a2713aSLionel Sambuc defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 5342f4a2713aSLionel Sambuc v4f32, VR128, loadv4f32, f128mem>, VEX; 5343f4a2713aSLionel Sambuc defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 5344f4a2713aSLionel Sambuc v4f32, VR128, loadv4f32, f128mem>, VEX; 5345f4a2713aSLionel Sambuc defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 5346f4a2713aSLionel Sambuc v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L; 5347f4a2713aSLionel Sambuc defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 5348f4a2713aSLionel Sambuc v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L; 5349f4a2713aSLionel Sambuc} 5350f4a2713aSLionel Sambucdefm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, 5351f4a2713aSLionel Sambuc memopv4f32, f128mem>; 5352f4a2713aSLionel Sambucdefm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, 5353f4a2713aSLionel Sambuc memopv4f32, f128mem>; 5354f4a2713aSLionel Sambuc 5355f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 5356f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movshdup VR128:$src)), 5357f4a2713aSLionel Sambuc (VMOVSHDUPrr VR128:$src)>; 5358f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))), 5359f4a2713aSLionel Sambuc (VMOVSHDUPrm addr:$src)>; 5360f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movsldup VR128:$src)), 5361f4a2713aSLionel Sambuc (VMOVSLDUPrr VR128:$src)>; 5362f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (loadv2i64 addr:$src)))), 5363f4a2713aSLionel Sambuc (VMOVSLDUPrm addr:$src)>; 5364f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86Movshdup VR256:$src)), 5365f4a2713aSLionel Sambuc (VMOVSHDUPYrr VR256:$src)>; 5366f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (loadv4i64 addr:$src)))), 5367f4a2713aSLionel Sambuc (VMOVSHDUPYrm addr:$src)>; 5368f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86Movsldup VR256:$src)), 5369f4a2713aSLionel Sambuc (VMOVSLDUPYrr VR256:$src)>; 5370f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (loadv4i64 addr:$src)))), 5371f4a2713aSLionel Sambuc (VMOVSLDUPYrm addr:$src)>; 5372f4a2713aSLionel Sambuc} 5373f4a2713aSLionel Sambuc 5374f4a2713aSLionel Sambuclet Predicates = [UseSSE3] in { 5375f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movshdup VR128:$src)), 5376f4a2713aSLionel Sambuc (MOVSHDUPrr VR128:$src)>; 5377f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), 5378f4a2713aSLionel Sambuc (MOVSHDUPrm addr:$src)>; 5379f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movsldup VR128:$src)), 5380f4a2713aSLionel Sambuc (MOVSLDUPrr VR128:$src)>; 5381f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), 5382f4a2713aSLionel Sambuc (MOVSLDUPrm addr:$src)>; 5383f4a2713aSLionel Sambuc} 5384f4a2713aSLionel Sambuc 5385f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5386f4a2713aSLionel Sambuc// SSE3 - Replicate Double FP - MOVDDUP 5387f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5388f4a2713aSLionel Sambuc 5389f4a2713aSLionel Sambucmulticlass sse3_replicate_dfp<string OpcodeStr> { 5390*0a6a1f1dSLionel Sambuclet hasSideEffects = 0 in 5391f4a2713aSLionel Sambucdef rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5392f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5393*0a6a1f1dSLionel Sambuc [], IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>; 5394f4a2713aSLionel Sambucdef rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 5395f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5396f4a2713aSLionel Sambuc [(set VR128:$dst, 5397f4a2713aSLionel Sambuc (v2f64 (X86Movddup 5398f4a2713aSLionel Sambuc (scalar_to_vector (loadf64 addr:$src)))))], 5399*0a6a1f1dSLionel Sambuc IIC_SSE_MOV_LH>, Sched<[WriteLoad]>; 5400f4a2713aSLionel Sambuc} 5401f4a2713aSLionel Sambuc 5402f4a2713aSLionel Sambuc// FIXME: Merge with above classe when there're patterns for the ymm version 5403f4a2713aSLionel Sambucmulticlass sse3_replicate_dfp_y<string OpcodeStr> { 5404f4a2713aSLionel Sambucdef rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 5405f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5406f4a2713aSLionel Sambuc [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, 5407*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffle]>; 5408f4a2713aSLionel Sambucdef rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 5409f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5410f4a2713aSLionel Sambuc [(set VR256:$dst, 5411f4a2713aSLionel Sambuc (v4f64 (X86Movddup 5412f4a2713aSLionel Sambuc (scalar_to_vector (loadf64 addr:$src)))))]>, 5413*0a6a1f1dSLionel Sambuc Sched<[WriteLoad]>; 5414f4a2713aSLionel Sambuc} 5415f4a2713aSLionel Sambuc 5416f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 5417f4a2713aSLionel Sambuc defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; 5418f4a2713aSLionel Sambuc defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L; 5419f4a2713aSLionel Sambuc} 5420f4a2713aSLionel Sambuc 5421f4a2713aSLionel Sambucdefm MOVDDUP : sse3_replicate_dfp<"movddup">; 5422f4a2713aSLionel Sambuc 5423f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 5424f4a2713aSLionel Sambuc def : Pat<(X86Movddup (loadv2f64 addr:$src)), 5425f4a2713aSLionel Sambuc (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5426f4a2713aSLionel Sambuc def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))), 5427f4a2713aSLionel Sambuc (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5428f4a2713aSLionel Sambuc def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))), 5429f4a2713aSLionel Sambuc (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5430f4a2713aSLionel Sambuc def : Pat<(X86Movddup (bc_v2f64 5431f4a2713aSLionel Sambuc (v2i64 (scalar_to_vector (loadi64 addr:$src))))), 5432f4a2713aSLionel Sambuc (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5433f4a2713aSLionel Sambuc 5434f4a2713aSLionel Sambuc // 256-bit version 5435f4a2713aSLionel Sambuc def : Pat<(X86Movddup (loadv4f64 addr:$src)), 5436f4a2713aSLionel Sambuc (VMOVDDUPYrm addr:$src)>; 5437f4a2713aSLionel Sambuc def : Pat<(X86Movddup (loadv4i64 addr:$src)), 5438f4a2713aSLionel Sambuc (VMOVDDUPYrm addr:$src)>; 5439f4a2713aSLionel Sambuc def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))), 5440f4a2713aSLionel Sambuc (VMOVDDUPYrm addr:$src)>; 5441f4a2713aSLionel Sambuc def : Pat<(X86Movddup (v4i64 VR256:$src)), 5442f4a2713aSLionel Sambuc (VMOVDDUPYrr VR256:$src)>; 5443f4a2713aSLionel Sambuc} 5444f4a2713aSLionel Sambuc 5445*0a6a1f1dSLionel Sambuclet Predicates = [UseAVX, OptForSize] in { 5446*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), 5447*0a6a1f1dSLionel Sambuc (VMOVDDUPrm addr:$src)>; 5448*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))), 5449*0a6a1f1dSLionel Sambuc (VMOVDDUPrm addr:$src)>; 5450*0a6a1f1dSLionel Sambuc} 5451*0a6a1f1dSLionel Sambuc 5452f4a2713aSLionel Sambuclet Predicates = [UseSSE3] in { 5453f4a2713aSLionel Sambuc def : Pat<(X86Movddup (memopv2f64 addr:$src)), 5454f4a2713aSLionel Sambuc (MOVDDUPrm addr:$src)>; 5455f4a2713aSLionel Sambuc def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), 5456f4a2713aSLionel Sambuc (MOVDDUPrm addr:$src)>; 5457f4a2713aSLionel Sambuc def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), 5458f4a2713aSLionel Sambuc (MOVDDUPrm addr:$src)>; 5459f4a2713aSLionel Sambuc def : Pat<(X86Movddup (bc_v2f64 5460f4a2713aSLionel Sambuc (v2i64 (scalar_to_vector (loadi64 addr:$src))))), 5461f4a2713aSLionel Sambuc (MOVDDUPrm addr:$src)>; 5462f4a2713aSLionel Sambuc} 5463f4a2713aSLionel Sambuc 5464f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5465f4a2713aSLionel Sambuc// SSE3 - Move Unaligned Integer 5466f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5467f4a2713aSLionel Sambuc 5468f4a2713aSLionel Sambuclet SchedRW = [WriteLoad] in { 5469f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 5470f4a2713aSLionel Sambuc def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 5471f4a2713aSLionel Sambuc "vlddqu\t{$src, $dst|$dst, $src}", 5472f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; 5473f4a2713aSLionel Sambuc def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 5474f4a2713aSLionel Sambuc "vlddqu\t{$src, $dst|$dst, $src}", 5475f4a2713aSLionel Sambuc [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, 5476f4a2713aSLionel Sambuc VEX, VEX_L; 5477f4a2713aSLionel Sambuc} 5478f4a2713aSLionel Sambucdef LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 5479f4a2713aSLionel Sambuc "lddqu\t{$src, $dst|$dst, $src}", 5480f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))], 5481f4a2713aSLionel Sambuc IIC_SSE_LDDQU>; 5482f4a2713aSLionel Sambuc} 5483f4a2713aSLionel Sambuc 5484f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5485f4a2713aSLionel Sambuc// SSE3 - Arithmetic 5486f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5487f4a2713aSLionel Sambuc 5488f4a2713aSLionel Sambucmulticlass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, 5489f4a2713aSLionel Sambuc X86MemOperand x86memop, OpndItins itins, 5490f4a2713aSLionel Sambuc bit Is2Addr = 1> { 5491f4a2713aSLionel Sambuc def rr : I<0xD0, MRMSrcReg, 5492f4a2713aSLionel Sambuc (outs RC:$dst), (ins RC:$src1, RC:$src2), 5493f4a2713aSLionel Sambuc !if(Is2Addr, 5494f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5495f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5496f4a2713aSLionel Sambuc [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>, 5497f4a2713aSLionel Sambuc Sched<[itins.Sched]>; 5498f4a2713aSLionel Sambuc def rm : I<0xD0, MRMSrcMem, 5499f4a2713aSLionel Sambuc (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 5500f4a2713aSLionel Sambuc !if(Is2Addr, 5501f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5502f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5503f4a2713aSLionel Sambuc [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>, 5504f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 5505f4a2713aSLionel Sambuc} 5506f4a2713aSLionel Sambuc 5507f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 5508f4a2713aSLionel Sambuc let ExeDomain = SSEPackedSingle in { 5509f4a2713aSLionel Sambuc defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, 5510*0a6a1f1dSLionel Sambuc f128mem, SSE_ALU_F32P, 0>, XD, VEX_4V; 5511f4a2713aSLionel Sambuc defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, 5512*0a6a1f1dSLionel Sambuc f256mem, SSE_ALU_F32P, 0>, XD, VEX_4V, VEX_L; 5513f4a2713aSLionel Sambuc } 5514f4a2713aSLionel Sambuc let ExeDomain = SSEPackedDouble in { 5515f4a2713aSLionel Sambuc defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, 5516*0a6a1f1dSLionel Sambuc f128mem, SSE_ALU_F64P, 0>, PD, VEX_4V; 5517f4a2713aSLionel Sambuc defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, 5518*0a6a1f1dSLionel Sambuc f256mem, SSE_ALU_F64P, 0>, PD, VEX_4V, VEX_L; 5519f4a2713aSLionel Sambuc } 5520f4a2713aSLionel Sambuc} 5521f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { 5522f4a2713aSLionel Sambuc let ExeDomain = SSEPackedSingle in 5523f4a2713aSLionel Sambuc defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, 5524*0a6a1f1dSLionel Sambuc f128mem, SSE_ALU_F32P>, XD; 5525f4a2713aSLionel Sambuc let ExeDomain = SSEPackedDouble in 5526f4a2713aSLionel Sambuc defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128, 5527*0a6a1f1dSLionel Sambuc f128mem, SSE_ALU_F64P>, PD; 5528*0a6a1f1dSLionel Sambuc} 5529*0a6a1f1dSLionel Sambuc 5530*0a6a1f1dSLionel Sambuc// Patterns used to select 'addsub' instructions. 5531*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX] in { 5532*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))), 5533*0a6a1f1dSLionel Sambuc (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>; 5534*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 (memop addr:$rhs)))), 5535*0a6a1f1dSLionel Sambuc (VADDSUBPSrm VR128:$lhs, f128mem:$rhs)>; 5536*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))), 5537*0a6a1f1dSLionel Sambuc (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>; 5538*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))), 5539*0a6a1f1dSLionel Sambuc (VADDSUBPDrm VR128:$lhs, f128mem:$rhs)>; 5540*0a6a1f1dSLionel Sambuc 5541*0a6a1f1dSLionel Sambuc def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 VR256:$rhs))), 5542*0a6a1f1dSLionel Sambuc (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>; 5543*0a6a1f1dSLionel Sambuc def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 (memop addr:$rhs)))), 5544*0a6a1f1dSLionel Sambuc (VADDSUBPSYrm VR256:$lhs, f256mem:$rhs)>; 5545*0a6a1f1dSLionel Sambuc def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 VR256:$rhs))), 5546*0a6a1f1dSLionel Sambuc (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>; 5547*0a6a1f1dSLionel Sambuc def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 (memop addr:$rhs)))), 5548*0a6a1f1dSLionel Sambuc (VADDSUBPDYrm VR256:$lhs, f256mem:$rhs)>; 5549*0a6a1f1dSLionel Sambuc} 5550*0a6a1f1dSLionel Sambuc 5551*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE3] in { 5552*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))), 5553*0a6a1f1dSLionel Sambuc (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>; 5554*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 (memop addr:$rhs)))), 5555*0a6a1f1dSLionel Sambuc (ADDSUBPSrm VR128:$lhs, f128mem:$rhs)>; 5556*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))), 5557*0a6a1f1dSLionel Sambuc (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>; 5558*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 (memop addr:$rhs)))), 5559*0a6a1f1dSLionel Sambuc (ADDSUBPDrm VR128:$lhs, f128mem:$rhs)>; 5560f4a2713aSLionel Sambuc} 5561f4a2713aSLionel Sambuc 5562f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5563f4a2713aSLionel Sambuc// SSE3 Instructions 5564f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5565f4a2713aSLionel Sambuc 5566f4a2713aSLionel Sambuc// Horizontal ops 5567f4a2713aSLionel Sambucmulticlass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 5568f4a2713aSLionel Sambuc X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { 5569f4a2713aSLionel Sambuc def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 5570f4a2713aSLionel Sambuc !if(Is2Addr, 5571f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5572f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5573f4a2713aSLionel Sambuc [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, 5574f4a2713aSLionel Sambuc Sched<[WriteFAdd]>; 5575f4a2713aSLionel Sambuc 5576f4a2713aSLionel Sambuc def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 5577f4a2713aSLionel Sambuc !if(Is2Addr, 5578f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5579f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5580f4a2713aSLionel Sambuc [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], 5581f4a2713aSLionel Sambuc IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; 5582f4a2713aSLionel Sambuc} 5583f4a2713aSLionel Sambucmulticlass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 5584f4a2713aSLionel Sambuc X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { 5585f4a2713aSLionel Sambuc def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 5586f4a2713aSLionel Sambuc !if(Is2Addr, 5587f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5588f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5589f4a2713aSLionel Sambuc [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, 5590f4a2713aSLionel Sambuc Sched<[WriteFAdd]>; 5591f4a2713aSLionel Sambuc 5592f4a2713aSLionel Sambuc def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 5593f4a2713aSLionel Sambuc !if(Is2Addr, 5594f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5595f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5596f4a2713aSLionel Sambuc [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], 5597f4a2713aSLionel Sambuc IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; 5598f4a2713aSLionel Sambuc} 5599f4a2713aSLionel Sambuc 5600f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 5601f4a2713aSLionel Sambuc let ExeDomain = SSEPackedSingle in { 5602f4a2713aSLionel Sambuc defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, 5603f4a2713aSLionel Sambuc X86fhadd, 0>, VEX_4V; 5604f4a2713aSLionel Sambuc defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, 5605f4a2713aSLionel Sambuc X86fhsub, 0>, VEX_4V; 5606f4a2713aSLionel Sambuc defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, 5607f4a2713aSLionel Sambuc X86fhadd, 0>, VEX_4V, VEX_L; 5608f4a2713aSLionel Sambuc defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, 5609f4a2713aSLionel Sambuc X86fhsub, 0>, VEX_4V, VEX_L; 5610f4a2713aSLionel Sambuc } 5611f4a2713aSLionel Sambuc let ExeDomain = SSEPackedDouble in { 5612f4a2713aSLionel Sambuc defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, 5613f4a2713aSLionel Sambuc X86fhadd, 0>, VEX_4V; 5614f4a2713aSLionel Sambuc defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, 5615f4a2713aSLionel Sambuc X86fhsub, 0>, VEX_4V; 5616f4a2713aSLionel Sambuc defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, 5617f4a2713aSLionel Sambuc X86fhadd, 0>, VEX_4V, VEX_L; 5618f4a2713aSLionel Sambuc defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, 5619f4a2713aSLionel Sambuc X86fhsub, 0>, VEX_4V, VEX_L; 5620f4a2713aSLionel Sambuc } 5621f4a2713aSLionel Sambuc} 5622f4a2713aSLionel Sambuc 5623f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { 5624f4a2713aSLionel Sambuc let ExeDomain = SSEPackedSingle in { 5625f4a2713aSLionel Sambuc defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>; 5626f4a2713aSLionel Sambuc defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>; 5627f4a2713aSLionel Sambuc } 5628f4a2713aSLionel Sambuc let ExeDomain = SSEPackedDouble in { 5629f4a2713aSLionel Sambuc defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>; 5630f4a2713aSLionel Sambuc defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>; 5631f4a2713aSLionel Sambuc } 5632f4a2713aSLionel Sambuc} 5633f4a2713aSLionel Sambuc 5634f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5635f4a2713aSLionel Sambuc// SSSE3 - Packed Absolute Instructions 5636f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5637f4a2713aSLionel Sambuc 5638f4a2713aSLionel Sambuc 5639f4a2713aSLionel Sambuc/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 5640f4a2713aSLionel Sambucmulticlass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, 5641f4a2713aSLionel Sambuc Intrinsic IntId128> { 5642f4a2713aSLionel Sambuc def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 5643f4a2713aSLionel Sambuc (ins VR128:$src), 5644f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5645f4a2713aSLionel Sambuc [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>, 5646*0a6a1f1dSLionel Sambuc Sched<[WriteVecALU]>; 5647f4a2713aSLionel Sambuc 5648f4a2713aSLionel Sambuc def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 5649f4a2713aSLionel Sambuc (ins i128mem:$src), 5650f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5651f4a2713aSLionel Sambuc [(set VR128:$dst, 5652f4a2713aSLionel Sambuc (IntId128 5653f4a2713aSLionel Sambuc (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>, 5654*0a6a1f1dSLionel Sambuc Sched<[WriteVecALULd]>; 5655f4a2713aSLionel Sambuc} 5656f4a2713aSLionel Sambuc 5657f4a2713aSLionel Sambuc/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 5658f4a2713aSLionel Sambucmulticlass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, 5659f4a2713aSLionel Sambuc Intrinsic IntId256> { 5660f4a2713aSLionel Sambuc def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 5661f4a2713aSLionel Sambuc (ins VR256:$src), 5662f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5663f4a2713aSLionel Sambuc [(set VR256:$dst, (IntId256 VR256:$src))]>, 5664*0a6a1f1dSLionel Sambuc Sched<[WriteVecALU]>; 5665f4a2713aSLionel Sambuc 5666f4a2713aSLionel Sambuc def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 5667f4a2713aSLionel Sambuc (ins i256mem:$src), 5668f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5669f4a2713aSLionel Sambuc [(set VR256:$dst, 5670f4a2713aSLionel Sambuc (IntId256 5671*0a6a1f1dSLionel Sambuc (bitconvert (memopv4i64 addr:$src))))]>, 5672f4a2713aSLionel Sambuc Sched<[WriteVecALULd]>; 5673f4a2713aSLionel Sambuc} 5674f4a2713aSLionel Sambuc 5675f4a2713aSLionel Sambuc// Helper fragments to match sext vXi1 to vXiY. 5676f4a2713aSLionel Sambucdef v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)), 5677f4a2713aSLionel Sambuc VR128:$src))>; 5678f4a2713aSLionel Sambucdef v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>; 5679f4a2713aSLionel Sambucdef v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>; 5680f4a2713aSLionel Sambucdef v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)), 5681f4a2713aSLionel Sambuc VR256:$src))>; 5682f4a2713aSLionel Sambucdef v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>; 5683f4a2713aSLionel Sambucdef v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>; 5684f4a2713aSLionel Sambuc 5685f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 5686f4a2713aSLionel Sambuc defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", 5687f4a2713aSLionel Sambuc int_x86_ssse3_pabs_b_128>, VEX; 5688f4a2713aSLionel Sambuc defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", 5689f4a2713aSLionel Sambuc int_x86_ssse3_pabs_w_128>, VEX; 5690f4a2713aSLionel Sambuc defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", 5691f4a2713aSLionel Sambuc int_x86_ssse3_pabs_d_128>, VEX; 5692f4a2713aSLionel Sambuc 5693f4a2713aSLionel Sambuc def : Pat<(xor 5694f4a2713aSLionel Sambuc (bc_v2i64 (v16i1sextv16i8)), 5695f4a2713aSLionel Sambuc (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), 5696f4a2713aSLionel Sambuc (VPABSBrr128 VR128:$src)>; 5697f4a2713aSLionel Sambuc def : Pat<(xor 5698f4a2713aSLionel Sambuc (bc_v2i64 (v8i1sextv8i16)), 5699f4a2713aSLionel Sambuc (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), 5700f4a2713aSLionel Sambuc (VPABSWrr128 VR128:$src)>; 5701f4a2713aSLionel Sambuc def : Pat<(xor 5702f4a2713aSLionel Sambuc (bc_v2i64 (v4i1sextv4i32)), 5703f4a2713aSLionel Sambuc (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), 5704f4a2713aSLionel Sambuc (VPABSDrr128 VR128:$src)>; 5705f4a2713aSLionel Sambuc} 5706f4a2713aSLionel Sambuc 5707f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 5708f4a2713aSLionel Sambuc defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", 5709f4a2713aSLionel Sambuc int_x86_avx2_pabs_b>, VEX, VEX_L; 5710f4a2713aSLionel Sambuc defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", 5711f4a2713aSLionel Sambuc int_x86_avx2_pabs_w>, VEX, VEX_L; 5712f4a2713aSLionel Sambuc defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", 5713f4a2713aSLionel Sambuc int_x86_avx2_pabs_d>, VEX, VEX_L; 5714f4a2713aSLionel Sambuc 5715f4a2713aSLionel Sambuc def : Pat<(xor 5716f4a2713aSLionel Sambuc (bc_v4i64 (v32i1sextv32i8)), 5717f4a2713aSLionel Sambuc (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))), 5718f4a2713aSLionel Sambuc (VPABSBrr256 VR256:$src)>; 5719f4a2713aSLionel Sambuc def : Pat<(xor 5720f4a2713aSLionel Sambuc (bc_v4i64 (v16i1sextv16i16)), 5721f4a2713aSLionel Sambuc (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))), 5722f4a2713aSLionel Sambuc (VPABSWrr256 VR256:$src)>; 5723f4a2713aSLionel Sambuc def : Pat<(xor 5724f4a2713aSLionel Sambuc (bc_v4i64 (v8i1sextv8i32)), 5725f4a2713aSLionel Sambuc (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))), 5726f4a2713aSLionel Sambuc (VPABSDrr256 VR256:$src)>; 5727f4a2713aSLionel Sambuc} 5728f4a2713aSLionel Sambuc 5729f4a2713aSLionel Sambucdefm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", 5730f4a2713aSLionel Sambuc int_x86_ssse3_pabs_b_128>; 5731f4a2713aSLionel Sambucdefm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", 5732f4a2713aSLionel Sambuc int_x86_ssse3_pabs_w_128>; 5733f4a2713aSLionel Sambucdefm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", 5734f4a2713aSLionel Sambuc int_x86_ssse3_pabs_d_128>; 5735f4a2713aSLionel Sambuc 5736f4a2713aSLionel Sambuclet Predicates = [HasSSSE3] in { 5737f4a2713aSLionel Sambuc def : Pat<(xor 5738f4a2713aSLionel Sambuc (bc_v2i64 (v16i1sextv16i8)), 5739f4a2713aSLionel Sambuc (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), 5740f4a2713aSLionel Sambuc (PABSBrr128 VR128:$src)>; 5741f4a2713aSLionel Sambuc def : Pat<(xor 5742f4a2713aSLionel Sambuc (bc_v2i64 (v8i1sextv8i16)), 5743f4a2713aSLionel Sambuc (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), 5744f4a2713aSLionel Sambuc (PABSWrr128 VR128:$src)>; 5745f4a2713aSLionel Sambuc def : Pat<(xor 5746f4a2713aSLionel Sambuc (bc_v2i64 (v4i1sextv4i32)), 5747f4a2713aSLionel Sambuc (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), 5748f4a2713aSLionel Sambuc (PABSDrr128 VR128:$src)>; 5749f4a2713aSLionel Sambuc} 5750f4a2713aSLionel Sambuc 5751f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5752f4a2713aSLionel Sambuc// SSSE3 - Packed Binary Operator Instructions 5753f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5754f4a2713aSLionel Sambuc 5755f4a2713aSLionel Sambuclet Sched = WriteVecALU in { 5756f4a2713aSLionel Sambucdef SSE_PHADDSUBD : OpndItins< 5757f4a2713aSLionel Sambuc IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM 5758f4a2713aSLionel Sambuc>; 5759f4a2713aSLionel Sambucdef SSE_PHADDSUBSW : OpndItins< 5760f4a2713aSLionel Sambuc IIC_SSE_PHADDSUBSW_RR, IIC_SSE_PHADDSUBSW_RM 5761f4a2713aSLionel Sambuc>; 5762f4a2713aSLionel Sambucdef SSE_PHADDSUBW : OpndItins< 5763f4a2713aSLionel Sambuc IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM 5764f4a2713aSLionel Sambuc>; 5765f4a2713aSLionel Sambuc} 5766f4a2713aSLionel Sambuclet Sched = WriteShuffle in 5767f4a2713aSLionel Sambucdef SSE_PSHUFB : OpndItins< 5768f4a2713aSLionel Sambuc IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM 5769f4a2713aSLionel Sambuc>; 5770f4a2713aSLionel Sambuclet Sched = WriteVecALU in 5771f4a2713aSLionel Sambucdef SSE_PSIGN : OpndItins< 5772f4a2713aSLionel Sambuc IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM 5773f4a2713aSLionel Sambuc>; 5774f4a2713aSLionel Sambuclet Sched = WriteVecIMul in 5775f4a2713aSLionel Sambucdef SSE_PMULHRSW : OpndItins< 5776f4a2713aSLionel Sambuc IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW 5777f4a2713aSLionel Sambuc>; 5778f4a2713aSLionel Sambuc 5779f4a2713aSLionel Sambuc/// SS3I_binop_rm - Simple SSSE3 bin op 5780f4a2713aSLionel Sambucmulticlass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5781f4a2713aSLionel Sambuc ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 5782f4a2713aSLionel Sambuc X86MemOperand x86memop, OpndItins itins, 5783f4a2713aSLionel Sambuc bit Is2Addr = 1> { 5784f4a2713aSLionel Sambuc let isCommutable = 1 in 5785f4a2713aSLionel Sambuc def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst), 5786f4a2713aSLionel Sambuc (ins RC:$src1, RC:$src2), 5787f4a2713aSLionel Sambuc !if(Is2Addr, 5788f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5789f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5790f4a2713aSLionel Sambuc [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, 5791*0a6a1f1dSLionel Sambuc Sched<[itins.Sched]>; 5792f4a2713aSLionel Sambuc def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), 5793f4a2713aSLionel Sambuc (ins RC:$src1, x86memop:$src2), 5794f4a2713aSLionel Sambuc !if(Is2Addr, 5795f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5796f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5797f4a2713aSLionel Sambuc [(set RC:$dst, 5798f4a2713aSLionel Sambuc (OpVT (OpNode RC:$src1, 5799*0a6a1f1dSLionel Sambuc (bitconvert (memop_frag addr:$src2)))))], itins.rm>, 5800f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 5801f4a2713aSLionel Sambuc} 5802f4a2713aSLionel Sambuc 5803f4a2713aSLionel Sambuc/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. 5804f4a2713aSLionel Sambucmulticlass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, 5805f4a2713aSLionel Sambuc Intrinsic IntId128, OpndItins itins, 5806f4a2713aSLionel Sambuc bit Is2Addr = 1> { 5807f4a2713aSLionel Sambuc let isCommutable = 1 in 5808f4a2713aSLionel Sambuc def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 5809f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2), 5810f4a2713aSLionel Sambuc !if(Is2Addr, 5811f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5812f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5813f4a2713aSLionel Sambuc [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 5814*0a6a1f1dSLionel Sambuc Sched<[itins.Sched]>; 5815f4a2713aSLionel Sambuc def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 5816f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src2), 5817f4a2713aSLionel Sambuc !if(Is2Addr, 5818f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5819f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5820f4a2713aSLionel Sambuc [(set VR128:$dst, 5821f4a2713aSLionel Sambuc (IntId128 VR128:$src1, 5822*0a6a1f1dSLionel Sambuc (bitconvert (memopv2i64 addr:$src2))))]>, 5823f4a2713aSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 5824f4a2713aSLionel Sambuc} 5825f4a2713aSLionel Sambuc 5826f4a2713aSLionel Sambucmulticlass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 5827*0a6a1f1dSLionel Sambuc Intrinsic IntId256, 5828*0a6a1f1dSLionel Sambuc X86FoldableSchedWrite Sched> { 5829f4a2713aSLionel Sambuc let isCommutable = 1 in 5830f4a2713aSLionel Sambuc def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 5831f4a2713aSLionel Sambuc (ins VR256:$src1, VR256:$src2), 5832f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5833f4a2713aSLionel Sambuc [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, 5834*0a6a1f1dSLionel Sambuc Sched<[Sched]>; 5835f4a2713aSLionel Sambuc def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 5836f4a2713aSLionel Sambuc (ins VR256:$src1, i256mem:$src2), 5837f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5838f4a2713aSLionel Sambuc [(set VR256:$dst, 5839*0a6a1f1dSLionel Sambuc (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>, 5840*0a6a1f1dSLionel Sambuc Sched<[Sched.Folded, ReadAfterLd]>; 5841f4a2713aSLionel Sambuc} 5842f4a2713aSLionel Sambuc 5843f4a2713aSLionel Sambuclet ImmT = NoImm, Predicates = [HasAVX] in { 5844f4a2713aSLionel Sambuclet isCommutable = 0 in { 5845f4a2713aSLionel Sambuc defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128, 5846f4a2713aSLionel Sambuc loadv2i64, i128mem, 5847f4a2713aSLionel Sambuc SSE_PHADDSUBW, 0>, VEX_4V; 5848f4a2713aSLionel Sambuc defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128, 5849f4a2713aSLionel Sambuc loadv2i64, i128mem, 5850f4a2713aSLionel Sambuc SSE_PHADDSUBD, 0>, VEX_4V; 5851f4a2713aSLionel Sambuc defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128, 5852f4a2713aSLionel Sambuc loadv2i64, i128mem, 5853f4a2713aSLionel Sambuc SSE_PHADDSUBW, 0>, VEX_4V; 5854f4a2713aSLionel Sambuc defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128, 5855f4a2713aSLionel Sambuc loadv2i64, i128mem, 5856f4a2713aSLionel Sambuc SSE_PHADDSUBD, 0>, VEX_4V; 5857f4a2713aSLionel Sambuc defm VPSIGNB : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128, 5858f4a2713aSLionel Sambuc loadv2i64, i128mem, 5859f4a2713aSLionel Sambuc SSE_PSIGN, 0>, VEX_4V; 5860f4a2713aSLionel Sambuc defm VPSIGNW : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128, 5861f4a2713aSLionel Sambuc loadv2i64, i128mem, 5862f4a2713aSLionel Sambuc SSE_PSIGN, 0>, VEX_4V; 5863f4a2713aSLionel Sambuc defm VPSIGND : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128, 5864f4a2713aSLionel Sambuc loadv2i64, i128mem, 5865f4a2713aSLionel Sambuc SSE_PSIGN, 0>, VEX_4V; 5866f4a2713aSLionel Sambuc defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128, 5867f4a2713aSLionel Sambuc loadv2i64, i128mem, 5868f4a2713aSLionel Sambuc SSE_PSHUFB, 0>, VEX_4V; 5869f4a2713aSLionel Sambuc defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", 5870f4a2713aSLionel Sambuc int_x86_ssse3_phadd_sw_128, 5871f4a2713aSLionel Sambuc SSE_PHADDSUBSW, 0>, VEX_4V; 5872f4a2713aSLionel Sambuc defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", 5873f4a2713aSLionel Sambuc int_x86_ssse3_phsub_sw_128, 5874f4a2713aSLionel Sambuc SSE_PHADDSUBSW, 0>, VEX_4V; 5875f4a2713aSLionel Sambuc defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", 5876f4a2713aSLionel Sambuc int_x86_ssse3_pmadd_ub_sw_128, 5877f4a2713aSLionel Sambuc SSE_PMADD, 0>, VEX_4V; 5878f4a2713aSLionel Sambuc} 5879f4a2713aSLionel Sambucdefm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", 5880f4a2713aSLionel Sambuc int_x86_ssse3_pmul_hr_sw_128, 5881f4a2713aSLionel Sambuc SSE_PMULHRSW, 0>, VEX_4V; 5882f4a2713aSLionel Sambuc} 5883f4a2713aSLionel Sambuc 5884f4a2713aSLionel Sambuclet ImmT = NoImm, Predicates = [HasAVX2] in { 5885f4a2713aSLionel Sambuclet isCommutable = 0 in { 5886f4a2713aSLionel Sambuc defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256, 5887f4a2713aSLionel Sambuc loadv4i64, i256mem, 5888f4a2713aSLionel Sambuc SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5889f4a2713aSLionel Sambuc defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256, 5890f4a2713aSLionel Sambuc loadv4i64, i256mem, 5891f4a2713aSLionel Sambuc SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5892f4a2713aSLionel Sambuc defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256, 5893f4a2713aSLionel Sambuc loadv4i64, i256mem, 5894f4a2713aSLionel Sambuc SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5895f4a2713aSLionel Sambuc defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256, 5896f4a2713aSLionel Sambuc loadv4i64, i256mem, 5897f4a2713aSLionel Sambuc SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5898f4a2713aSLionel Sambuc defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256, 5899f4a2713aSLionel Sambuc loadv4i64, i256mem, 5900f4a2713aSLionel Sambuc SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5901f4a2713aSLionel Sambuc defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256, 5902f4a2713aSLionel Sambuc loadv4i64, i256mem, 5903f4a2713aSLionel Sambuc SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5904f4a2713aSLionel Sambuc defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256, 5905f4a2713aSLionel Sambuc loadv4i64, i256mem, 5906f4a2713aSLionel Sambuc SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5907f4a2713aSLionel Sambuc defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256, 5908f4a2713aSLionel Sambuc loadv4i64, i256mem, 5909*0a6a1f1dSLionel Sambuc SSE_PSHUFB, 0>, VEX_4V, VEX_L; 5910f4a2713aSLionel Sambuc defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", 5911*0a6a1f1dSLionel Sambuc int_x86_avx2_phadd_sw, 5912*0a6a1f1dSLionel Sambuc WriteVecALU>, VEX_4V, VEX_L; 5913f4a2713aSLionel Sambuc defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", 5914*0a6a1f1dSLionel Sambuc int_x86_avx2_phsub_sw, 5915*0a6a1f1dSLionel Sambuc WriteVecALU>, VEX_4V, VEX_L; 5916f4a2713aSLionel Sambuc defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", 5917*0a6a1f1dSLionel Sambuc int_x86_avx2_pmadd_ub_sw, 5918*0a6a1f1dSLionel Sambuc WriteVecIMul>, VEX_4V, VEX_L; 5919f4a2713aSLionel Sambuc} 5920f4a2713aSLionel Sambucdefm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", 5921*0a6a1f1dSLionel Sambuc int_x86_avx2_pmul_hr_sw, 5922*0a6a1f1dSLionel Sambuc WriteVecIMul>, VEX_4V, VEX_L; 5923f4a2713aSLionel Sambuc} 5924f4a2713aSLionel Sambuc 5925f4a2713aSLionel Sambuc// None of these have i8 immediate fields. 5926f4a2713aSLionel Sambuclet ImmT = NoImm, Constraints = "$src1 = $dst" in { 5927f4a2713aSLionel Sambuclet isCommutable = 0 in { 5928f4a2713aSLionel Sambuc defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128, 5929f4a2713aSLionel Sambuc memopv2i64, i128mem, SSE_PHADDSUBW>; 5930f4a2713aSLionel Sambuc defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128, 5931f4a2713aSLionel Sambuc memopv2i64, i128mem, SSE_PHADDSUBD>; 5932f4a2713aSLionel Sambuc defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128, 5933f4a2713aSLionel Sambuc memopv2i64, i128mem, SSE_PHADDSUBW>; 5934f4a2713aSLionel Sambuc defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128, 5935f4a2713aSLionel Sambuc memopv2i64, i128mem, SSE_PHADDSUBD>; 5936f4a2713aSLionel Sambuc defm PSIGNB : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128, 5937f4a2713aSLionel Sambuc memopv2i64, i128mem, SSE_PSIGN>; 5938f4a2713aSLionel Sambuc defm PSIGNW : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128, 5939f4a2713aSLionel Sambuc memopv2i64, i128mem, SSE_PSIGN>; 5940f4a2713aSLionel Sambuc defm PSIGND : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128, 5941f4a2713aSLionel Sambuc memopv2i64, i128mem, SSE_PSIGN>; 5942f4a2713aSLionel Sambuc defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128, 5943f4a2713aSLionel Sambuc memopv2i64, i128mem, SSE_PSHUFB>; 5944f4a2713aSLionel Sambuc defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", 5945f4a2713aSLionel Sambuc int_x86_ssse3_phadd_sw_128, 5946f4a2713aSLionel Sambuc SSE_PHADDSUBSW>; 5947f4a2713aSLionel Sambuc defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", 5948f4a2713aSLionel Sambuc int_x86_ssse3_phsub_sw_128, 5949f4a2713aSLionel Sambuc SSE_PHADDSUBSW>; 5950f4a2713aSLionel Sambuc defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", 5951f4a2713aSLionel Sambuc int_x86_ssse3_pmadd_ub_sw_128, SSE_PMADD>; 5952f4a2713aSLionel Sambuc} 5953f4a2713aSLionel Sambucdefm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", 5954f4a2713aSLionel Sambuc int_x86_ssse3_pmul_hr_sw_128, 5955f4a2713aSLionel Sambuc SSE_PMULHRSW>; 5956f4a2713aSLionel Sambuc} 5957f4a2713aSLionel Sambuc 5958f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5959f4a2713aSLionel Sambuc// SSSE3 - Packed Align Instruction Patterns 5960f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 5961f4a2713aSLionel Sambuc 5962f4a2713aSLionel Sambucmulticlass ssse3_palignr<string asm, bit Is2Addr = 1> { 5963*0a6a1f1dSLionel Sambuc let hasSideEffects = 0 in { 5964f4a2713aSLionel Sambuc def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), 5965f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2, i8imm:$src3), 5966f4a2713aSLionel Sambuc !if(Is2Addr, 5967f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5968f4a2713aSLionel Sambuc !strconcat(asm, 5969f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5970*0a6a1f1dSLionel Sambuc [], IIC_SSE_PALIGNRR>, Sched<[WriteShuffle]>; 5971f4a2713aSLionel Sambuc let mayLoad = 1 in 5972f4a2713aSLionel Sambuc def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), 5973f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 5974f4a2713aSLionel Sambuc !if(Is2Addr, 5975f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5976f4a2713aSLionel Sambuc !strconcat(asm, 5977f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5978*0a6a1f1dSLionel Sambuc [], IIC_SSE_PALIGNRM>, Sched<[WriteShuffleLd, ReadAfterLd]>; 5979f4a2713aSLionel Sambuc } 5980f4a2713aSLionel Sambuc} 5981f4a2713aSLionel Sambuc 5982f4a2713aSLionel Sambucmulticlass ssse3_palignr_y<string asm, bit Is2Addr = 1> { 5983*0a6a1f1dSLionel Sambuc let hasSideEffects = 0 in { 5984f4a2713aSLionel Sambuc def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst), 5985f4a2713aSLionel Sambuc (ins VR256:$src1, VR256:$src2, i8imm:$src3), 5986f4a2713aSLionel Sambuc !strconcat(asm, 5987f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5988*0a6a1f1dSLionel Sambuc []>, Sched<[WriteShuffle]>; 5989f4a2713aSLionel Sambuc let mayLoad = 1 in 5990f4a2713aSLionel Sambuc def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), 5991f4a2713aSLionel Sambuc (ins VR256:$src1, i256mem:$src2, i8imm:$src3), 5992f4a2713aSLionel Sambuc !strconcat(asm, 5993f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5994*0a6a1f1dSLionel Sambuc []>, Sched<[WriteShuffleLd, ReadAfterLd]>; 5995f4a2713aSLionel Sambuc } 5996f4a2713aSLionel Sambuc} 5997f4a2713aSLionel Sambuc 5998f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 5999f4a2713aSLionel Sambuc defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V; 6000f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in 6001f4a2713aSLionel Sambuc defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L; 6002f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in 6003f4a2713aSLionel Sambuc defm PALIGN : ssse3_palignr<"palignr">; 6004f4a2713aSLionel Sambuc 6005f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 6006f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 6007f4a2713aSLionel Sambuc (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 6008f4a2713aSLionel Sambucdef : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 6009f4a2713aSLionel Sambuc (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 6010f4a2713aSLionel Sambucdef : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 6011f4a2713aSLionel Sambuc (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 6012f4a2713aSLionel Sambucdef : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 6013f4a2713aSLionel Sambuc (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 6014f4a2713aSLionel Sambuc} 6015f4a2713aSLionel Sambuc 6016f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 6017f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 6018f4a2713aSLionel Sambuc (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 6019f4a2713aSLionel Sambucdef : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 6020f4a2713aSLionel Sambuc (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 6021f4a2713aSLionel Sambucdef : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 6022f4a2713aSLionel Sambuc (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 6023f4a2713aSLionel Sambucdef : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 6024f4a2713aSLionel Sambuc (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 6025f4a2713aSLionel Sambuc} 6026f4a2713aSLionel Sambuc 6027f4a2713aSLionel Sambuclet Predicates = [UseSSSE3] in { 6028f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 6029f4a2713aSLionel Sambuc (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 6030f4a2713aSLionel Sambucdef : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 6031f4a2713aSLionel Sambuc (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 6032f4a2713aSLionel Sambucdef : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 6033f4a2713aSLionel Sambuc (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 6034f4a2713aSLionel Sambucdef : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 6035f4a2713aSLionel Sambuc (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 6036f4a2713aSLionel Sambuc} 6037f4a2713aSLionel Sambuc 6038f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 6039f4a2713aSLionel Sambuc// SSSE3 - Thread synchronization 6040f4a2713aSLionel Sambuc//===---------------------------------------------------------------------===// 6041f4a2713aSLionel Sambuc 6042f4a2713aSLionel Sambuclet SchedRW = [WriteSystem] in { 6043f4a2713aSLionel Sambuclet usesCustomInserter = 1 in { 6044f4a2713aSLionel Sambucdef MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), 6045f4a2713aSLionel Sambuc [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, 6046f4a2713aSLionel Sambuc Requires<[HasSSE3]>; 6047f4a2713aSLionel Sambuc} 6048f4a2713aSLionel Sambuc 6049f4a2713aSLionel Sambuclet Uses = [EAX, ECX, EDX] in 6050f4a2713aSLionel Sambucdef MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>, 6051f4a2713aSLionel Sambuc TB, Requires<[HasSSE3]>; 6052f4a2713aSLionel Sambuclet Uses = [ECX, EAX] in 6053f4a2713aSLionel Sambucdef MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", 6054f4a2713aSLionel Sambuc [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>, 6055f4a2713aSLionel Sambuc TB, Requires<[HasSSE3]>; 6056f4a2713aSLionel Sambuc} // SchedRW 6057f4a2713aSLionel Sambuc 6058*0a6a1f1dSLionel Sambucdef : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>; 6059f4a2713aSLionel Sambucdef : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>; 6060f4a2713aSLionel Sambuc 6061f4a2713aSLionel Sambucdef : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORrrr)>, 6062*0a6a1f1dSLionel Sambuc Requires<[Not64BitMode]>; 6063f4a2713aSLionel Sambucdef : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>, 6064f4a2713aSLionel Sambuc Requires<[In64BitMode]>; 6065f4a2713aSLionel Sambuc 6066f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 6067f4a2713aSLionel Sambuc// SSE4.1 - Packed Move with Sign/Zero Extend 6068f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 6069f4a2713aSLionel Sambuc 6070*0a6a1f1dSLionel Sambucmulticlass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, 6071*0a6a1f1dSLionel Sambuc RegisterClass OutRC, RegisterClass InRC, 6072*0a6a1f1dSLionel Sambuc OpndItins itins> { 6073*0a6a1f1dSLionel Sambuc def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src), 6074f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6075*0a6a1f1dSLionel Sambuc [], itins.rr>, 6076*0a6a1f1dSLionel Sambuc Sched<[itins.Sched]>; 6077f4a2713aSLionel Sambuc 6078*0a6a1f1dSLionel Sambuc def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src), 6079f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6080*0a6a1f1dSLionel Sambuc [], 6081*0a6a1f1dSLionel Sambuc itins.rm>, Sched<[itins.Sched.Folded]>; 6082f4a2713aSLionel Sambuc} 6083f4a2713aSLionel Sambuc 6084*0a6a1f1dSLionel Sambucmulticlass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr, 6085*0a6a1f1dSLionel Sambuc X86MemOperand MemOp, X86MemOperand MemYOp, 6086*0a6a1f1dSLionel Sambuc OpndItins SSEItins, OpndItins AVXItins, 6087*0a6a1f1dSLionel Sambuc OpndItins AVX2Itins> { 6088*0a6a1f1dSLionel Sambuc defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, SSEItins>; 6089*0a6a1f1dSLionel Sambuc let Predicates = [HasAVX] in 6090*0a6a1f1dSLionel Sambuc defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp, 6091*0a6a1f1dSLionel Sambuc VR128, VR128, AVXItins>, VEX; 6092*0a6a1f1dSLionel Sambuc let Predicates = [HasAVX2] in 6093*0a6a1f1dSLionel Sambuc defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp, 6094*0a6a1f1dSLionel Sambuc VR256, VR128, AVX2Itins>, VEX, VEX_L; 6095f4a2713aSLionel Sambuc} 6096f4a2713aSLionel Sambuc 6097*0a6a1f1dSLionel Sambucmulticlass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, 6098*0a6a1f1dSLionel Sambuc X86MemOperand MemOp, X86MemOperand MemYOp> { 6099*0a6a1f1dSLionel Sambuc defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr), 6100*0a6a1f1dSLionel Sambuc MemOp, MemYOp, 6101*0a6a1f1dSLionel Sambuc SSE_INTALU_ITINS_SHUFF_P, 6102*0a6a1f1dSLionel Sambuc DEFAULT_ITINS_SHUFFLESCHED, 6103*0a6a1f1dSLionel Sambuc DEFAULT_ITINS_SHUFFLESCHED>; 6104*0a6a1f1dSLionel Sambuc defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10), 6105*0a6a1f1dSLionel Sambuc !strconcat("pmovzx", OpcodeStr), 6106*0a6a1f1dSLionel Sambuc MemOp, MemYOp, 6107*0a6a1f1dSLionel Sambuc SSE_INTALU_ITINS_SHUFF_P, 6108*0a6a1f1dSLionel Sambuc DEFAULT_ITINS_SHUFFLESCHED, 6109*0a6a1f1dSLionel Sambuc DEFAULT_ITINS_SHUFFLESCHED>; 6110*0a6a1f1dSLionel Sambuc} 6111*0a6a1f1dSLionel Sambuc 6112*0a6a1f1dSLionel Sambucdefm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem>; 6113*0a6a1f1dSLionel Sambucdefm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem>; 6114*0a6a1f1dSLionel Sambucdefm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem>; 6115*0a6a1f1dSLionel Sambuc 6116*0a6a1f1dSLionel Sambucdefm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem>; 6117*0a6a1f1dSLionel Sambucdefm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem>; 6118*0a6a1f1dSLionel Sambuc 6119*0a6a1f1dSLionel Sambucdefm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem>; 6120*0a6a1f1dSLionel Sambuc 6121*0a6a1f1dSLionel Sambuc// AVX2 Patterns 6122*0a6a1f1dSLionel Sambucmulticlass SS41I_pmovx_avx2_patterns<string OpcPrefix, SDNode ExtOp> { 6123*0a6a1f1dSLionel Sambuc // Register-Register patterns 6124*0a6a1f1dSLionel Sambuc def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))), 6125*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>; 6126*0a6a1f1dSLionel Sambuc def : Pat<(v8i32 (ExtOp (v16i8 VR128:$src))), 6127*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>; 6128*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (v16i8 VR128:$src))), 6129*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>; 6130*0a6a1f1dSLionel Sambuc 6131*0a6a1f1dSLionel Sambuc def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))), 6132*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>; 6133*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (v8i16 VR128:$src))), 6134*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>; 6135*0a6a1f1dSLionel Sambuc 6136*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))), 6137*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>; 6138*0a6a1f1dSLionel Sambuc 6139*0a6a1f1dSLionel Sambuc // On AVX2, we also support 256bit inputs. 6140*0a6a1f1dSLionel Sambuc // FIXME: remove these patterns when the old shuffle lowering goes away. 6141*0a6a1f1dSLionel Sambuc def : Pat<(v16i16 (ExtOp (v32i8 VR256:$src))), 6142*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BWYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6143*0a6a1f1dSLionel Sambuc def : Pat<(v8i32 (ExtOp (v32i8 VR256:$src))), 6144*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BDYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6145*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (v32i8 VR256:$src))), 6146*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6147*0a6a1f1dSLionel Sambuc 6148*0a6a1f1dSLionel Sambuc def : Pat<(v8i32 (ExtOp (v16i16 VR256:$src))), 6149*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WDYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6150*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (v16i16 VR256:$src))), 6151*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6152*0a6a1f1dSLionel Sambuc 6153*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (v8i32 VR256:$src))), 6154*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#DQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 6155*0a6a1f1dSLionel Sambuc 6156*0a6a1f1dSLionel Sambuc // AVX2 Register-Memory patterns 6157*0a6a1f1dSLionel Sambuc def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 6158*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 6159*0a6a1f1dSLionel Sambuc def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), 6160*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 6161*0a6a1f1dSLionel Sambuc def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 6162*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 6163*0a6a1f1dSLionel Sambuc def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 6164*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 6165*0a6a1f1dSLionel Sambuc 6166*0a6a1f1dSLionel Sambuc def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 6167*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 6168*0a6a1f1dSLionel Sambuc def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), 6169*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 6170*0a6a1f1dSLionel Sambuc def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 6171*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 6172*0a6a1f1dSLionel Sambuc def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 6173*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 6174*0a6a1f1dSLionel Sambuc 6175*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 6176*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 6177*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), 6178*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 6179*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 6180*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 6181*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 6182*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 6183*0a6a1f1dSLionel Sambuc 6184*0a6a1f1dSLionel Sambuc def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 6185*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 6186*0a6a1f1dSLionel Sambuc def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), 6187*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 6188*0a6a1f1dSLionel Sambuc def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), 6189*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 6190*0a6a1f1dSLionel Sambuc def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 6191*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 6192*0a6a1f1dSLionel Sambuc 6193*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 6194*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 6195*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), 6196*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 6197*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), 6198*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 6199*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 6200*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 6201*0a6a1f1dSLionel Sambuc 6202*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))), 6203*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 6204*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), 6205*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 6206*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), 6207*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 6208*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))), 6209*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 6210f4a2713aSLionel Sambuc} 6211f4a2713aSLionel Sambuc 6212f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 6213*0a6a1f1dSLionel Sambuc defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", X86vsext>; 6214*0a6a1f1dSLionel Sambuc defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", X86vzext>; 6215f4a2713aSLionel Sambuc} 6216f4a2713aSLionel Sambuc 6217*0a6a1f1dSLionel Sambuc// SSE4.1/AVX patterns. 6218*0a6a1f1dSLionel Sambucmulticlass SS41I_pmovx_patterns<string OpcPrefix, SDNode ExtOp, 6219*0a6a1f1dSLionel Sambuc PatFrag ExtLoad16> { 6220*0a6a1f1dSLionel Sambuc def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))), 6221*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BWrr) VR128:$src)>; 6222*0a6a1f1dSLionel Sambuc def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))), 6223*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BDrr) VR128:$src)>; 6224*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))), 6225*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BQrr) VR128:$src)>; 6226*0a6a1f1dSLionel Sambuc 6227*0a6a1f1dSLionel Sambuc def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))), 6228*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WDrr) VR128:$src)>; 6229*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))), 6230*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WQrr) VR128:$src)>; 6231*0a6a1f1dSLionel Sambuc 6232*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))), 6233*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#DQrr) VR128:$src)>; 6234*0a6a1f1dSLionel Sambuc 6235*0a6a1f1dSLionel Sambuc def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 6236*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 6237*0a6a1f1dSLionel Sambuc def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 6238*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 6239*0a6a1f1dSLionel Sambuc def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), 6240*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 6241*0a6a1f1dSLionel Sambuc def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 6242*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 6243*0a6a1f1dSLionel Sambuc def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 6244*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 6245*0a6a1f1dSLionel Sambuc 6246*0a6a1f1dSLionel Sambuc def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 6247*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 6248*0a6a1f1dSLionel Sambuc def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), 6249*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 6250*0a6a1f1dSLionel Sambuc def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 6251*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 6252*0a6a1f1dSLionel Sambuc def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 6253*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 6254*0a6a1f1dSLionel Sambuc 6255*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))), 6256*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 6257*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), 6258*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 6259*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 6260*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 6261*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 6262*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 6263*0a6a1f1dSLionel Sambuc 6264*0a6a1f1dSLionel Sambuc def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 6265*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 6266*0a6a1f1dSLionel Sambuc def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 6267*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 6268*0a6a1f1dSLionel Sambuc def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), 6269*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 6270*0a6a1f1dSLionel Sambuc def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), 6271*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 6272*0a6a1f1dSLionel Sambuc def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 6273*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 6274*0a6a1f1dSLionel Sambuc 6275*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 6276*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 6277*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))), 6278*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 6279*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), 6280*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 6281*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 6282*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 6283*0a6a1f1dSLionel Sambuc 6284*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 6285*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 6286*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 6287*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 6288*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), 6289*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 6290*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), 6291*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 6292*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))), 6293*0a6a1f1dSLionel Sambuc (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 6294*0a6a1f1dSLionel Sambuc} 6295f4a2713aSLionel Sambuc 6296f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 6297*0a6a1f1dSLionel Sambuc defm : SS41I_pmovx_patterns<"VPMOVSX", X86vsext, extloadi32i16>; 6298*0a6a1f1dSLionel Sambuc defm : SS41I_pmovx_patterns<"VPMOVZX", X86vzext, loadi16_anyext>; 6299f4a2713aSLionel Sambuc} 6300f4a2713aSLionel Sambuc 6301f4a2713aSLionel Sambuclet Predicates = [UseSSE41] in { 6302*0a6a1f1dSLionel Sambuc defm : SS41I_pmovx_patterns<"PMOVSX", X86vsext, extloadi32i16>; 6303*0a6a1f1dSLionel Sambuc defm : SS41I_pmovx_patterns<"PMOVZX", X86vzext, loadi16_anyext>; 6304f4a2713aSLionel Sambuc} 6305f4a2713aSLionel Sambuc 6306f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 6307f4a2713aSLionel Sambuc// SSE4.1 - Extract Instructions 6308f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 6309f4a2713aSLionel Sambuc 6310f4a2713aSLionel Sambuc/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem 6311f4a2713aSLionel Sambucmulticlass SS41I_extract8<bits<8> opc, string OpcodeStr> { 6312f4a2713aSLionel Sambuc def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 6313f4a2713aSLionel Sambuc (ins VR128:$src1, i32i8imm:$src2), 6314f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6315f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6316f4a2713aSLionel Sambuc [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), 6317f4a2713aSLionel Sambuc imm:$src2))]>, 6318*0a6a1f1dSLionel Sambuc Sched<[WriteShuffle]>; 6319*0a6a1f1dSLionel Sambuc let hasSideEffects = 0, mayStore = 1, 6320*0a6a1f1dSLionel Sambuc SchedRW = [WriteShuffleLd, WriteRMW] in 6321f4a2713aSLionel Sambuc def mr : SS4AIi8<opc, MRMDestMem, (outs), 6322f4a2713aSLionel Sambuc (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2), 6323f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6324f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6325*0a6a1f1dSLionel Sambuc [(store (i8 (trunc (assertzext (X86pextrb (v16i8 VR128:$src1), 6326*0a6a1f1dSLionel Sambuc imm:$src2)))), addr:$dst)]>; 6327f4a2713aSLionel Sambuc} 6328f4a2713aSLionel Sambuc 6329f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 6330f4a2713aSLionel Sambuc defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; 6331f4a2713aSLionel Sambuc 6332f4a2713aSLionel Sambucdefm PEXTRB : SS41I_extract8<0x14, "pextrb">; 6333f4a2713aSLionel Sambuc 6334f4a2713aSLionel Sambuc 6335f4a2713aSLionel Sambuc/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination 6336f4a2713aSLionel Sambucmulticlass SS41I_extract16<bits<8> opc, string OpcodeStr> { 6337*0a6a1f1dSLionel Sambuc let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 6338f4a2713aSLionel Sambuc def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 6339f4a2713aSLionel Sambuc (ins VR128:$src1, i32i8imm:$src2), 6340f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6341f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6342*0a6a1f1dSLionel Sambuc []>, Sched<[WriteShuffle]>; 6343f4a2713aSLionel Sambuc 6344*0a6a1f1dSLionel Sambuc let hasSideEffects = 0, mayStore = 1, 6345*0a6a1f1dSLionel Sambuc SchedRW = [WriteShuffleLd, WriteRMW] in 6346f4a2713aSLionel Sambuc def mr : SS4AIi8<opc, MRMDestMem, (outs), 6347f4a2713aSLionel Sambuc (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2), 6348f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6349f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6350*0a6a1f1dSLionel Sambuc [(store (i16 (trunc (assertzext (X86pextrw (v8i16 VR128:$src1), 6351*0a6a1f1dSLionel Sambuc imm:$src2)))), addr:$dst)]>; 6352f4a2713aSLionel Sambuc} 6353f4a2713aSLionel Sambuc 6354f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 6355f4a2713aSLionel Sambuc defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX; 6356f4a2713aSLionel Sambuc 6357f4a2713aSLionel Sambucdefm PEXTRW : SS41I_extract16<0x15, "pextrw">; 6358f4a2713aSLionel Sambuc 6359f4a2713aSLionel Sambuc 6360f4a2713aSLionel Sambuc/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 6361f4a2713aSLionel Sambucmulticlass SS41I_extract32<bits<8> opc, string OpcodeStr> { 6362f4a2713aSLionel Sambuc def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 6363f4a2713aSLionel Sambuc (ins VR128:$src1, i32i8imm:$src2), 6364f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6365f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6366f4a2713aSLionel Sambuc [(set GR32:$dst, 6367*0a6a1f1dSLionel Sambuc (extractelt (v4i32 VR128:$src1), imm:$src2))]>, 6368*0a6a1f1dSLionel Sambuc Sched<[WriteShuffle]>; 6369*0a6a1f1dSLionel Sambuc let SchedRW = [WriteShuffleLd, WriteRMW] in 6370f4a2713aSLionel Sambuc def mr : SS4AIi8<opc, MRMDestMem, (outs), 6371f4a2713aSLionel Sambuc (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2), 6372f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6373f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6374f4a2713aSLionel Sambuc [(store (extractelt (v4i32 VR128:$src1), imm:$src2), 6375*0a6a1f1dSLionel Sambuc addr:$dst)]>; 6376f4a2713aSLionel Sambuc} 6377f4a2713aSLionel Sambuc 6378f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 6379f4a2713aSLionel Sambuc defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; 6380f4a2713aSLionel Sambuc 6381f4a2713aSLionel Sambucdefm PEXTRD : SS41I_extract32<0x16, "pextrd">; 6382f4a2713aSLionel Sambuc 6383f4a2713aSLionel Sambuc/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 6384f4a2713aSLionel Sambucmulticlass SS41I_extract64<bits<8> opc, string OpcodeStr> { 6385f4a2713aSLionel Sambuc def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), 6386f4a2713aSLionel Sambuc (ins VR128:$src1, i32i8imm:$src2), 6387f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6388f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6389f4a2713aSLionel Sambuc [(set GR64:$dst, 6390*0a6a1f1dSLionel Sambuc (extractelt (v2i64 VR128:$src1), imm:$src2))]>, 6391*0a6a1f1dSLionel Sambuc Sched<[WriteShuffle]>, REX_W; 6392*0a6a1f1dSLionel Sambuc let SchedRW = [WriteShuffleLd, WriteRMW] in 6393f4a2713aSLionel Sambuc def mr : SS4AIi8<opc, MRMDestMem, (outs), 6394f4a2713aSLionel Sambuc (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2), 6395f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6396f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6397f4a2713aSLionel Sambuc [(store (extractelt (v2i64 VR128:$src1), imm:$src2), 6398*0a6a1f1dSLionel Sambuc addr:$dst)]>, REX_W; 6399f4a2713aSLionel Sambuc} 6400f4a2713aSLionel Sambuc 6401f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 6402f4a2713aSLionel Sambuc defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; 6403f4a2713aSLionel Sambuc 6404f4a2713aSLionel Sambucdefm PEXTRQ : SS41I_extract64<0x16, "pextrq">; 6405f4a2713aSLionel Sambuc 6406f4a2713aSLionel Sambuc/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory 6407f4a2713aSLionel Sambuc/// destination 6408f4a2713aSLionel Sambucmulticlass SS41I_extractf32<bits<8> opc, string OpcodeStr, 6409f4a2713aSLionel Sambuc OpndItins itins = DEFAULT_ITINS> { 6410f4a2713aSLionel Sambuc def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 6411f4a2713aSLionel Sambuc (ins VR128:$src1, i32i8imm:$src2), 6412f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6413f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6414f4a2713aSLionel Sambuc [(set GR32orGR64:$dst, 6415f4a2713aSLionel Sambuc (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))], 6416*0a6a1f1dSLionel Sambuc itins.rr>, Sched<[WriteFBlend]>; 6417*0a6a1f1dSLionel Sambuc let SchedRW = [WriteFBlendLd, WriteRMW] in 6418f4a2713aSLionel Sambuc def mr : SS4AIi8<opc, MRMDestMem, (outs), 6419f4a2713aSLionel Sambuc (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2), 6420f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6421f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6422f4a2713aSLionel Sambuc [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2), 6423*0a6a1f1dSLionel Sambuc addr:$dst)], itins.rm>; 6424f4a2713aSLionel Sambuc} 6425f4a2713aSLionel Sambuc 6426f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in { 6427f4a2713aSLionel Sambuc let Predicates = [UseAVX] in 6428f4a2713aSLionel Sambuc defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; 6429f4a2713aSLionel Sambuc defm EXTRACTPS : SS41I_extractf32<0x17, "extractps", SSE_EXTRACT_ITINS>; 6430f4a2713aSLionel Sambuc} 6431f4a2713aSLionel Sambuc 6432f4a2713aSLionel Sambuc// Also match an EXTRACTPS store when the store is done as f32 instead of i32. 6433f4a2713aSLionel Sambucdef : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), 6434f4a2713aSLionel Sambuc imm:$src2))), 6435f4a2713aSLionel Sambuc addr:$dst), 6436f4a2713aSLionel Sambuc (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, 6437f4a2713aSLionel Sambuc Requires<[HasAVX]>; 6438f4a2713aSLionel Sambucdef : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), 6439f4a2713aSLionel Sambuc imm:$src2))), 6440f4a2713aSLionel Sambuc addr:$dst), 6441f4a2713aSLionel Sambuc (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, 6442f4a2713aSLionel Sambuc Requires<[UseSSE41]>; 6443f4a2713aSLionel Sambuc 6444f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 6445f4a2713aSLionel Sambuc// SSE4.1 - Insert Instructions 6446f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 6447f4a2713aSLionel Sambuc 6448f4a2713aSLionel Sambucmulticlass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { 6449f4a2713aSLionel Sambuc def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6450f4a2713aSLionel Sambuc (ins VR128:$src1, GR32orGR64:$src2, i32i8imm:$src3), 6451f4a2713aSLionel Sambuc !if(Is2Addr, 6452f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6453f4a2713aSLionel Sambuc !strconcat(asm, 6454f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6455f4a2713aSLionel Sambuc [(set VR128:$dst, 6456*0a6a1f1dSLionel Sambuc (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, 6457*0a6a1f1dSLionel Sambuc Sched<[WriteShuffle]>; 6458f4a2713aSLionel Sambuc def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6459f4a2713aSLionel Sambuc (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3), 6460f4a2713aSLionel Sambuc !if(Is2Addr, 6461f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6462f4a2713aSLionel Sambuc !strconcat(asm, 6463f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6464f4a2713aSLionel Sambuc [(set VR128:$dst, 6465f4a2713aSLionel Sambuc (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), 6466*0a6a1f1dSLionel Sambuc imm:$src3))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; 6467f4a2713aSLionel Sambuc} 6468f4a2713aSLionel Sambuc 6469f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 6470f4a2713aSLionel Sambuc defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; 6471f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in 6472f4a2713aSLionel Sambuc defm PINSRB : SS41I_insert8<0x20, "pinsrb">; 6473f4a2713aSLionel Sambuc 6474f4a2713aSLionel Sambucmulticlass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { 6475f4a2713aSLionel Sambuc def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6476f4a2713aSLionel Sambuc (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), 6477f4a2713aSLionel Sambuc !if(Is2Addr, 6478f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6479f4a2713aSLionel Sambuc !strconcat(asm, 6480f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6481f4a2713aSLionel Sambuc [(set VR128:$dst, 6482f4a2713aSLionel Sambuc (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, 6483*0a6a1f1dSLionel Sambuc Sched<[WriteShuffle]>; 6484f4a2713aSLionel Sambuc def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6485f4a2713aSLionel Sambuc (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3), 6486f4a2713aSLionel Sambuc !if(Is2Addr, 6487f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6488f4a2713aSLionel Sambuc !strconcat(asm, 6489f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6490f4a2713aSLionel Sambuc [(set VR128:$dst, 6491f4a2713aSLionel Sambuc (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), 6492*0a6a1f1dSLionel Sambuc imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; 6493f4a2713aSLionel Sambuc} 6494f4a2713aSLionel Sambuc 6495f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 6496f4a2713aSLionel Sambuc defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; 6497f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in 6498f4a2713aSLionel Sambuc defm PINSRD : SS41I_insert32<0x22, "pinsrd">; 6499f4a2713aSLionel Sambuc 6500f4a2713aSLionel Sambucmulticlass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { 6501f4a2713aSLionel Sambuc def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6502f4a2713aSLionel Sambuc (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), 6503f4a2713aSLionel Sambuc !if(Is2Addr, 6504f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6505f4a2713aSLionel Sambuc !strconcat(asm, 6506f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6507f4a2713aSLionel Sambuc [(set VR128:$dst, 6508f4a2713aSLionel Sambuc (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, 6509*0a6a1f1dSLionel Sambuc Sched<[WriteShuffle]>; 6510f4a2713aSLionel Sambuc def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6511f4a2713aSLionel Sambuc (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3), 6512f4a2713aSLionel Sambuc !if(Is2Addr, 6513f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6514f4a2713aSLionel Sambuc !strconcat(asm, 6515f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6516f4a2713aSLionel Sambuc [(set VR128:$dst, 6517f4a2713aSLionel Sambuc (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), 6518*0a6a1f1dSLionel Sambuc imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; 6519f4a2713aSLionel Sambuc} 6520f4a2713aSLionel Sambuc 6521f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 6522f4a2713aSLionel Sambuc defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; 6523f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in 6524f4a2713aSLionel Sambuc defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; 6525f4a2713aSLionel Sambuc 6526f4a2713aSLionel Sambuc// insertps has a few different modes, there's the first two here below which 6527f4a2713aSLionel Sambuc// are optimized inserts that won't zero arbitrary elements in the destination 6528f4a2713aSLionel Sambuc// vector. The next one matches the intrinsic and could zero arbitrary elements 6529f4a2713aSLionel Sambuc// in the target vector. 6530f4a2713aSLionel Sambucmulticlass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1, 6531f4a2713aSLionel Sambuc OpndItins itins = DEFAULT_ITINS> { 6532f4a2713aSLionel Sambuc def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6533*0a6a1f1dSLionel Sambuc (ins VR128:$src1, VR128:$src2, i8imm:$src3), 6534f4a2713aSLionel Sambuc !if(Is2Addr, 6535f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6536f4a2713aSLionel Sambuc !strconcat(asm, 6537f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6538f4a2713aSLionel Sambuc [(set VR128:$dst, 6539*0a6a1f1dSLionel Sambuc (X86insertps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>, 6540*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffle]>; 6541f4a2713aSLionel Sambuc def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6542*0a6a1f1dSLionel Sambuc (ins VR128:$src1, f32mem:$src2, i8imm:$src3), 6543f4a2713aSLionel Sambuc !if(Is2Addr, 6544f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6545f4a2713aSLionel Sambuc !strconcat(asm, 6546f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6547f4a2713aSLionel Sambuc [(set VR128:$dst, 6548*0a6a1f1dSLionel Sambuc (X86insertps VR128:$src1, 6549f4a2713aSLionel Sambuc (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 6550*0a6a1f1dSLionel Sambuc imm:$src3))], itins.rm>, 6551*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffleLd, ReadAfterLd]>; 6552f4a2713aSLionel Sambuc} 6553f4a2713aSLionel Sambuc 6554f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in { 6555f4a2713aSLionel Sambuc let Predicates = [UseAVX] in 6556f4a2713aSLionel Sambuc defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; 6557f4a2713aSLionel Sambuc let Constraints = "$src1 = $dst" in 6558f4a2713aSLionel Sambuc defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>; 6559f4a2713aSLionel Sambuc} 6560f4a2713aSLionel Sambuc 6561*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE41] in { 6562*0a6a1f1dSLionel Sambuc // If we're inserting an element from a load or a null pshuf of a load, 6563*0a6a1f1dSLionel Sambuc // fold the load into the insertps instruction. 6564*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), (X86PShufd (v4f32 6565*0a6a1f1dSLionel Sambuc (scalar_to_vector (loadf32 addr:$src2))), (i8 0)), 6566*0a6a1f1dSLionel Sambuc imm:$src3)), 6567*0a6a1f1dSLionel Sambuc (INSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; 6568*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), (X86PShufd 6569*0a6a1f1dSLionel Sambuc (loadv4f32 addr:$src2), (i8 0)), imm:$src3)), 6570*0a6a1f1dSLionel Sambuc (INSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; 6571*0a6a1f1dSLionel Sambuc} 6572*0a6a1f1dSLionel Sambuc 6573*0a6a1f1dSLionel Sambuclet Predicates = [UseAVX] in { 6574*0a6a1f1dSLionel Sambuc // If we're inserting an element from a vbroadcast of a load, fold the 6575*0a6a1f1dSLionel Sambuc // load into the X86insertps instruction. 6576*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), 6577*0a6a1f1dSLionel Sambuc (X86VBroadcast (loadf32 addr:$src2)), imm:$src3)), 6578*0a6a1f1dSLionel Sambuc (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; 6579*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), 6580*0a6a1f1dSLionel Sambuc (X86VBroadcast (loadv4f32 addr:$src2)), imm:$src3)), 6581*0a6a1f1dSLionel Sambuc (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; 6582*0a6a1f1dSLionel Sambuc} 6583*0a6a1f1dSLionel Sambuc 6584f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 6585f4a2713aSLionel Sambuc// SSE4.1 - Round Instructions 6586f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 6587f4a2713aSLionel Sambuc 6588f4a2713aSLionel Sambucmulticlass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, 6589f4a2713aSLionel Sambuc X86MemOperand x86memop, RegisterClass RC, 6590f4a2713aSLionel Sambuc PatFrag mem_frag32, PatFrag mem_frag64, 6591f4a2713aSLionel Sambuc Intrinsic V4F32Int, Intrinsic V2F64Int> { 6592f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in { 6593f4a2713aSLionel Sambuc // Intrinsic operation, reg. 6594f4a2713aSLionel Sambuc // Vector intrinsic operation, reg 6595f4a2713aSLionel Sambuc def PSr : SS4AIi8<opcps, MRMSrcReg, 6596f4a2713aSLionel Sambuc (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), 6597f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6598f4a2713aSLionel Sambuc "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6599f4a2713aSLionel Sambuc [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))], 6600*0a6a1f1dSLionel Sambuc IIC_SSE_ROUNDPS_REG>, Sched<[WriteFAdd]>; 6601f4a2713aSLionel Sambuc 6602f4a2713aSLionel Sambuc // Vector intrinsic operation, mem 6603f4a2713aSLionel Sambuc def PSm : SS4AIi8<opcps, MRMSrcMem, 6604f4a2713aSLionel Sambuc (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), 6605f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6606f4a2713aSLionel Sambuc "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6607f4a2713aSLionel Sambuc [(set RC:$dst, 6608f4a2713aSLionel Sambuc (V4F32Int (mem_frag32 addr:$src1),imm:$src2))], 6609*0a6a1f1dSLionel Sambuc IIC_SSE_ROUNDPS_MEM>, Sched<[WriteFAddLd]>; 6610f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedSingle 6611f4a2713aSLionel Sambuc 6612f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in { 6613f4a2713aSLionel Sambuc // Vector intrinsic operation, reg 6614f4a2713aSLionel Sambuc def PDr : SS4AIi8<opcpd, MRMSrcReg, 6615f4a2713aSLionel Sambuc (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), 6616f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6617f4a2713aSLionel Sambuc "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6618f4a2713aSLionel Sambuc [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))], 6619*0a6a1f1dSLionel Sambuc IIC_SSE_ROUNDPS_REG>, Sched<[WriteFAdd]>; 6620f4a2713aSLionel Sambuc 6621f4a2713aSLionel Sambuc // Vector intrinsic operation, mem 6622f4a2713aSLionel Sambuc def PDm : SS4AIi8<opcpd, MRMSrcMem, 6623f4a2713aSLionel Sambuc (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), 6624f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6625f4a2713aSLionel Sambuc "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6626f4a2713aSLionel Sambuc [(set RC:$dst, 6627f4a2713aSLionel Sambuc (V2F64Int (mem_frag64 addr:$src1),imm:$src2))], 6628*0a6a1f1dSLionel Sambuc IIC_SSE_ROUNDPS_REG>, Sched<[WriteFAddLd]>; 6629f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedDouble 6630f4a2713aSLionel Sambuc} 6631f4a2713aSLionel Sambuc 6632f4a2713aSLionel Sambucmulticlass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, 6633f4a2713aSLionel Sambuc string OpcodeStr, 6634f4a2713aSLionel Sambuc Intrinsic F32Int, 6635f4a2713aSLionel Sambuc Intrinsic F64Int, bit Is2Addr = 1> { 6636f4a2713aSLionel Sambuclet ExeDomain = GenericDomain in { 6637f4a2713aSLionel Sambuc // Operation, reg. 6638f4a2713aSLionel Sambuc let hasSideEffects = 0 in 6639f4a2713aSLionel Sambuc def SSr : SS4AIi8<opcss, MRMSrcReg, 6640f4a2713aSLionel Sambuc (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3), 6641f4a2713aSLionel Sambuc !if(Is2Addr, 6642f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6643f4a2713aSLionel Sambuc "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6644f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6645f4a2713aSLionel Sambuc "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6646*0a6a1f1dSLionel Sambuc []>, Sched<[WriteFAdd]>; 6647f4a2713aSLionel Sambuc 6648f4a2713aSLionel Sambuc // Intrinsic operation, reg. 6649*0a6a1f1dSLionel Sambuc let isCodeGenOnly = 1 in 6650f4a2713aSLionel Sambuc def SSr_Int : SS4AIi8<opcss, MRMSrcReg, 6651f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), 6652f4a2713aSLionel Sambuc !if(Is2Addr, 6653f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6654f4a2713aSLionel Sambuc "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6655f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6656f4a2713aSLionel Sambuc "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6657f4a2713aSLionel Sambuc [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, 6658*0a6a1f1dSLionel Sambuc Sched<[WriteFAdd]>; 6659f4a2713aSLionel Sambuc 6660f4a2713aSLionel Sambuc // Intrinsic operation, mem. 6661f4a2713aSLionel Sambuc def SSm : SS4AIi8<opcss, MRMSrcMem, 6662f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), 6663f4a2713aSLionel Sambuc !if(Is2Addr, 6664f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6665f4a2713aSLionel Sambuc "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6666f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6667f4a2713aSLionel Sambuc "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6668f4a2713aSLionel Sambuc [(set VR128:$dst, 6669f4a2713aSLionel Sambuc (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, 6670*0a6a1f1dSLionel Sambuc Sched<[WriteFAddLd, ReadAfterLd]>; 6671f4a2713aSLionel Sambuc 6672f4a2713aSLionel Sambuc // Operation, reg. 6673f4a2713aSLionel Sambuc let hasSideEffects = 0 in 6674f4a2713aSLionel Sambuc def SDr : SS4AIi8<opcsd, MRMSrcReg, 6675f4a2713aSLionel Sambuc (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3), 6676f4a2713aSLionel Sambuc !if(Is2Addr, 6677f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6678f4a2713aSLionel Sambuc "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6679f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6680f4a2713aSLionel Sambuc "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6681*0a6a1f1dSLionel Sambuc []>, Sched<[WriteFAdd]>; 6682f4a2713aSLionel Sambuc 6683f4a2713aSLionel Sambuc // Intrinsic operation, reg. 6684*0a6a1f1dSLionel Sambuc let isCodeGenOnly = 1 in 6685f4a2713aSLionel Sambuc def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, 6686f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), 6687f4a2713aSLionel Sambuc !if(Is2Addr, 6688f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6689f4a2713aSLionel Sambuc "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6690f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6691f4a2713aSLionel Sambuc "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6692f4a2713aSLionel Sambuc [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, 6693*0a6a1f1dSLionel Sambuc Sched<[WriteFAdd]>; 6694f4a2713aSLionel Sambuc 6695f4a2713aSLionel Sambuc // Intrinsic operation, mem. 6696f4a2713aSLionel Sambuc def SDm : SS4AIi8<opcsd, MRMSrcMem, 6697f4a2713aSLionel Sambuc (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), 6698f4a2713aSLionel Sambuc !if(Is2Addr, 6699f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6700f4a2713aSLionel Sambuc "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6701f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 6702f4a2713aSLionel Sambuc "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6703f4a2713aSLionel Sambuc [(set VR128:$dst, 6704f4a2713aSLionel Sambuc (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, 6705*0a6a1f1dSLionel Sambuc Sched<[WriteFAddLd, ReadAfterLd]>; 6706f4a2713aSLionel Sambuc} // ExeDomain = GenericDomain 6707f4a2713aSLionel Sambuc} 6708f4a2713aSLionel Sambuc 6709f4a2713aSLionel Sambuc// FP round - roundss, roundps, roundsd, roundpd 6710f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 6711f4a2713aSLionel Sambuc // Intrinsic form 6712f4a2713aSLionel Sambuc defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128, 6713f4a2713aSLionel Sambuc loadv4f32, loadv2f64, 6714f4a2713aSLionel Sambuc int_x86_sse41_round_ps, 6715f4a2713aSLionel Sambuc int_x86_sse41_round_pd>, VEX; 6716f4a2713aSLionel Sambuc defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256, 6717f4a2713aSLionel Sambuc loadv8f32, loadv4f64, 6718f4a2713aSLionel Sambuc int_x86_avx_round_ps_256, 6719f4a2713aSLionel Sambuc int_x86_avx_round_pd_256>, VEX, VEX_L; 6720f4a2713aSLionel Sambuc defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", 6721f4a2713aSLionel Sambuc int_x86_sse41_round_ss, 6722f4a2713aSLionel Sambuc int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; 6723f4a2713aSLionel Sambuc 6724f4a2713aSLionel Sambuc def : Pat<(ffloor FR32:$src), 6725f4a2713aSLionel Sambuc (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; 6726f4a2713aSLionel Sambuc def : Pat<(f64 (ffloor FR64:$src)), 6727f4a2713aSLionel Sambuc (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; 6728f4a2713aSLionel Sambuc def : Pat<(f32 (fnearbyint FR32:$src)), 6729f4a2713aSLionel Sambuc (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; 6730f4a2713aSLionel Sambuc def : Pat<(f64 (fnearbyint FR64:$src)), 6731f4a2713aSLionel Sambuc (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; 6732f4a2713aSLionel Sambuc def : Pat<(f32 (fceil FR32:$src)), 6733f4a2713aSLionel Sambuc (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; 6734f4a2713aSLionel Sambuc def : Pat<(f64 (fceil FR64:$src)), 6735f4a2713aSLionel Sambuc (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; 6736f4a2713aSLionel Sambuc def : Pat<(f32 (frint FR32:$src)), 6737f4a2713aSLionel Sambuc (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; 6738f4a2713aSLionel Sambuc def : Pat<(f64 (frint FR64:$src)), 6739f4a2713aSLionel Sambuc (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; 6740f4a2713aSLionel Sambuc def : Pat<(f32 (ftrunc FR32:$src)), 6741f4a2713aSLionel Sambuc (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; 6742f4a2713aSLionel Sambuc def : Pat<(f64 (ftrunc FR64:$src)), 6743f4a2713aSLionel Sambuc (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; 6744f4a2713aSLionel Sambuc 6745f4a2713aSLionel Sambuc def : Pat<(v4f32 (ffloor VR128:$src)), 6746f4a2713aSLionel Sambuc (VROUNDPSr VR128:$src, (i32 0x1))>; 6747f4a2713aSLionel Sambuc def : Pat<(v4f32 (fnearbyint VR128:$src)), 6748f4a2713aSLionel Sambuc (VROUNDPSr VR128:$src, (i32 0xC))>; 6749f4a2713aSLionel Sambuc def : Pat<(v4f32 (fceil VR128:$src)), 6750f4a2713aSLionel Sambuc (VROUNDPSr VR128:$src, (i32 0x2))>; 6751f4a2713aSLionel Sambuc def : Pat<(v4f32 (frint VR128:$src)), 6752f4a2713aSLionel Sambuc (VROUNDPSr VR128:$src, (i32 0x4))>; 6753f4a2713aSLionel Sambuc def : Pat<(v4f32 (ftrunc VR128:$src)), 6754f4a2713aSLionel Sambuc (VROUNDPSr VR128:$src, (i32 0x3))>; 6755f4a2713aSLionel Sambuc 6756f4a2713aSLionel Sambuc def : Pat<(v2f64 (ffloor VR128:$src)), 6757f4a2713aSLionel Sambuc (VROUNDPDr VR128:$src, (i32 0x1))>; 6758f4a2713aSLionel Sambuc def : Pat<(v2f64 (fnearbyint VR128:$src)), 6759f4a2713aSLionel Sambuc (VROUNDPDr VR128:$src, (i32 0xC))>; 6760f4a2713aSLionel Sambuc def : Pat<(v2f64 (fceil VR128:$src)), 6761f4a2713aSLionel Sambuc (VROUNDPDr VR128:$src, (i32 0x2))>; 6762f4a2713aSLionel Sambuc def : Pat<(v2f64 (frint VR128:$src)), 6763f4a2713aSLionel Sambuc (VROUNDPDr VR128:$src, (i32 0x4))>; 6764f4a2713aSLionel Sambuc def : Pat<(v2f64 (ftrunc VR128:$src)), 6765f4a2713aSLionel Sambuc (VROUNDPDr VR128:$src, (i32 0x3))>; 6766f4a2713aSLionel Sambuc 6767f4a2713aSLionel Sambuc def : Pat<(v8f32 (ffloor VR256:$src)), 6768f4a2713aSLionel Sambuc (VROUNDYPSr VR256:$src, (i32 0x1))>; 6769f4a2713aSLionel Sambuc def : Pat<(v8f32 (fnearbyint VR256:$src)), 6770f4a2713aSLionel Sambuc (VROUNDYPSr VR256:$src, (i32 0xC))>; 6771f4a2713aSLionel Sambuc def : Pat<(v8f32 (fceil VR256:$src)), 6772f4a2713aSLionel Sambuc (VROUNDYPSr VR256:$src, (i32 0x2))>; 6773f4a2713aSLionel Sambuc def : Pat<(v8f32 (frint VR256:$src)), 6774f4a2713aSLionel Sambuc (VROUNDYPSr VR256:$src, (i32 0x4))>; 6775f4a2713aSLionel Sambuc def : Pat<(v8f32 (ftrunc VR256:$src)), 6776f4a2713aSLionel Sambuc (VROUNDYPSr VR256:$src, (i32 0x3))>; 6777f4a2713aSLionel Sambuc 6778f4a2713aSLionel Sambuc def : Pat<(v4f64 (ffloor VR256:$src)), 6779f4a2713aSLionel Sambuc (VROUNDYPDr VR256:$src, (i32 0x1))>; 6780f4a2713aSLionel Sambuc def : Pat<(v4f64 (fnearbyint VR256:$src)), 6781f4a2713aSLionel Sambuc (VROUNDYPDr VR256:$src, (i32 0xC))>; 6782f4a2713aSLionel Sambuc def : Pat<(v4f64 (fceil VR256:$src)), 6783f4a2713aSLionel Sambuc (VROUNDYPDr VR256:$src, (i32 0x2))>; 6784f4a2713aSLionel Sambuc def : Pat<(v4f64 (frint VR256:$src)), 6785f4a2713aSLionel Sambuc (VROUNDYPDr VR256:$src, (i32 0x4))>; 6786f4a2713aSLionel Sambuc def : Pat<(v4f64 (ftrunc VR256:$src)), 6787f4a2713aSLionel Sambuc (VROUNDYPDr VR256:$src, (i32 0x3))>; 6788f4a2713aSLionel Sambuc} 6789f4a2713aSLionel Sambuc 6790f4a2713aSLionel Sambucdefm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, 6791f4a2713aSLionel Sambuc memopv4f32, memopv2f64, 6792f4a2713aSLionel Sambuc int_x86_sse41_round_ps, int_x86_sse41_round_pd>; 6793f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in 6794f4a2713aSLionel Sambucdefm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", 6795f4a2713aSLionel Sambuc int_x86_sse41_round_ss, int_x86_sse41_round_sd>; 6796f4a2713aSLionel Sambuc 6797f4a2713aSLionel Sambuclet Predicates = [UseSSE41] in { 6798f4a2713aSLionel Sambuc def : Pat<(ffloor FR32:$src), 6799f4a2713aSLionel Sambuc (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; 6800f4a2713aSLionel Sambuc def : Pat<(f64 (ffloor FR64:$src)), 6801f4a2713aSLionel Sambuc (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; 6802f4a2713aSLionel Sambuc def : Pat<(f32 (fnearbyint FR32:$src)), 6803f4a2713aSLionel Sambuc (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; 6804f4a2713aSLionel Sambuc def : Pat<(f64 (fnearbyint FR64:$src)), 6805f4a2713aSLionel Sambuc (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; 6806f4a2713aSLionel Sambuc def : Pat<(f32 (fceil FR32:$src)), 6807f4a2713aSLionel Sambuc (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; 6808f4a2713aSLionel Sambuc def : Pat<(f64 (fceil FR64:$src)), 6809f4a2713aSLionel Sambuc (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; 6810f4a2713aSLionel Sambuc def : Pat<(f32 (frint FR32:$src)), 6811f4a2713aSLionel Sambuc (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; 6812f4a2713aSLionel Sambuc def : Pat<(f64 (frint FR64:$src)), 6813f4a2713aSLionel Sambuc (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; 6814f4a2713aSLionel Sambuc def : Pat<(f32 (ftrunc FR32:$src)), 6815f4a2713aSLionel Sambuc (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; 6816f4a2713aSLionel Sambuc def : Pat<(f64 (ftrunc FR64:$src)), 6817f4a2713aSLionel Sambuc (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; 6818f4a2713aSLionel Sambuc 6819f4a2713aSLionel Sambuc def : Pat<(v4f32 (ffloor VR128:$src)), 6820f4a2713aSLionel Sambuc (ROUNDPSr VR128:$src, (i32 0x1))>; 6821f4a2713aSLionel Sambuc def : Pat<(v4f32 (fnearbyint VR128:$src)), 6822f4a2713aSLionel Sambuc (ROUNDPSr VR128:$src, (i32 0xC))>; 6823f4a2713aSLionel Sambuc def : Pat<(v4f32 (fceil VR128:$src)), 6824f4a2713aSLionel Sambuc (ROUNDPSr VR128:$src, (i32 0x2))>; 6825f4a2713aSLionel Sambuc def : Pat<(v4f32 (frint VR128:$src)), 6826f4a2713aSLionel Sambuc (ROUNDPSr VR128:$src, (i32 0x4))>; 6827f4a2713aSLionel Sambuc def : Pat<(v4f32 (ftrunc VR128:$src)), 6828f4a2713aSLionel Sambuc (ROUNDPSr VR128:$src, (i32 0x3))>; 6829f4a2713aSLionel Sambuc 6830f4a2713aSLionel Sambuc def : Pat<(v2f64 (ffloor VR128:$src)), 6831f4a2713aSLionel Sambuc (ROUNDPDr VR128:$src, (i32 0x1))>; 6832f4a2713aSLionel Sambuc def : Pat<(v2f64 (fnearbyint VR128:$src)), 6833f4a2713aSLionel Sambuc (ROUNDPDr VR128:$src, (i32 0xC))>; 6834f4a2713aSLionel Sambuc def : Pat<(v2f64 (fceil VR128:$src)), 6835f4a2713aSLionel Sambuc (ROUNDPDr VR128:$src, (i32 0x2))>; 6836f4a2713aSLionel Sambuc def : Pat<(v2f64 (frint VR128:$src)), 6837f4a2713aSLionel Sambuc (ROUNDPDr VR128:$src, (i32 0x4))>; 6838f4a2713aSLionel Sambuc def : Pat<(v2f64 (ftrunc VR128:$src)), 6839f4a2713aSLionel Sambuc (ROUNDPDr VR128:$src, (i32 0x3))>; 6840f4a2713aSLionel Sambuc} 6841f4a2713aSLionel Sambuc 6842f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 6843f4a2713aSLionel Sambuc// SSE4.1 - Packed Bit Test 6844f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 6845f4a2713aSLionel Sambuc 6846f4a2713aSLionel Sambuc// ptest instruction we'll lower to this in X86ISelLowering primarily from 6847f4a2713aSLionel Sambuc// the intel intrinsic that corresponds to this. 6848f4a2713aSLionel Sambuclet Defs = [EFLAGS], Predicates = [HasAVX] in { 6849f4a2713aSLionel Sambucdef VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 6850f4a2713aSLionel Sambuc "vptest\t{$src2, $src1|$src1, $src2}", 6851f4a2713aSLionel Sambuc [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 6852*0a6a1f1dSLionel Sambuc Sched<[WriteVecLogic]>, VEX; 6853f4a2713aSLionel Sambucdef VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 6854f4a2713aSLionel Sambuc "vptest\t{$src2, $src1|$src1, $src2}", 6855f4a2713aSLionel Sambuc [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, 6856*0a6a1f1dSLionel Sambuc Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX; 6857f4a2713aSLionel Sambuc 6858f4a2713aSLionel Sambucdef VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), 6859f4a2713aSLionel Sambuc "vptest\t{$src2, $src1|$src1, $src2}", 6860f4a2713aSLionel Sambuc [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, 6861*0a6a1f1dSLionel Sambuc Sched<[WriteVecLogic]>, VEX, VEX_L; 6862f4a2713aSLionel Sambucdef VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), 6863f4a2713aSLionel Sambuc "vptest\t{$src2, $src1|$src1, $src2}", 6864f4a2713aSLionel Sambuc [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, 6865*0a6a1f1dSLionel Sambuc Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_L; 6866f4a2713aSLionel Sambuc} 6867f4a2713aSLionel Sambuc 6868f4a2713aSLionel Sambuclet Defs = [EFLAGS] in { 6869f4a2713aSLionel Sambucdef PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 6870f4a2713aSLionel Sambuc "ptest\t{$src2, $src1|$src1, $src2}", 6871f4a2713aSLionel Sambuc [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 6872*0a6a1f1dSLionel Sambuc Sched<[WriteVecLogic]>; 6873f4a2713aSLionel Sambucdef PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 6874f4a2713aSLionel Sambuc "ptest\t{$src2, $src1|$src1, $src2}", 6875f4a2713aSLionel Sambuc [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, 6876*0a6a1f1dSLionel Sambuc Sched<[WriteVecLogicLd, ReadAfterLd]>; 6877f4a2713aSLionel Sambuc} 6878f4a2713aSLionel Sambuc 6879f4a2713aSLionel Sambuc// The bit test instructions below are AVX only 6880f4a2713aSLionel Sambucmulticlass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, 6881f4a2713aSLionel Sambuc X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> { 6882f4a2713aSLionel Sambuc def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 6883f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 6884*0a6a1f1dSLionel Sambuc [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, 6885*0a6a1f1dSLionel Sambuc Sched<[WriteVecLogic]>, VEX; 6886f4a2713aSLionel Sambuc def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 6887f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 6888f4a2713aSLionel Sambuc [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, 6889*0a6a1f1dSLionel Sambuc Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX; 6890f4a2713aSLionel Sambuc} 6891f4a2713aSLionel Sambuc 6892f4a2713aSLionel Sambuclet Defs = [EFLAGS], Predicates = [HasAVX] in { 6893f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in { 6894f4a2713aSLionel Sambucdefm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32>; 6895f4a2713aSLionel Sambucdefm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32>, 6896f4a2713aSLionel Sambuc VEX_L; 6897f4a2713aSLionel Sambuc} 6898f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in { 6899f4a2713aSLionel Sambucdefm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64>; 6900f4a2713aSLionel Sambucdefm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64>, 6901f4a2713aSLionel Sambuc VEX_L; 6902f4a2713aSLionel Sambuc} 6903f4a2713aSLionel Sambuc} 6904f4a2713aSLionel Sambuc 6905f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 6906f4a2713aSLionel Sambuc// SSE4.1 - Misc Instructions 6907f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 6908f4a2713aSLionel Sambuc 6909f4a2713aSLionel Sambuclet Defs = [EFLAGS], Predicates = [HasPOPCNT] in { 6910f4a2713aSLionel Sambuc def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), 6911f4a2713aSLionel Sambuc "popcnt{w}\t{$src, $dst|$dst, $src}", 6912f4a2713aSLionel Sambuc [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)], 6913*0a6a1f1dSLionel Sambuc IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>, 6914*0a6a1f1dSLionel Sambuc OpSize16, XS; 6915f4a2713aSLionel Sambuc def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 6916f4a2713aSLionel Sambuc "popcnt{w}\t{$src, $dst|$dst, $src}", 6917f4a2713aSLionel Sambuc [(set GR16:$dst, (ctpop (loadi16 addr:$src))), 6918*0a6a1f1dSLionel Sambuc (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, 6919*0a6a1f1dSLionel Sambuc Sched<[WriteFAddLd]>, OpSize16, XS; 6920f4a2713aSLionel Sambuc 6921f4a2713aSLionel Sambuc def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), 6922f4a2713aSLionel Sambuc "popcnt{l}\t{$src, $dst|$dst, $src}", 6923f4a2713aSLionel Sambuc [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)], 6924*0a6a1f1dSLionel Sambuc IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>, 6925*0a6a1f1dSLionel Sambuc OpSize32, XS; 6926*0a6a1f1dSLionel Sambuc 6927f4a2713aSLionel Sambuc def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), 6928f4a2713aSLionel Sambuc "popcnt{l}\t{$src, $dst|$dst, $src}", 6929f4a2713aSLionel Sambuc [(set GR32:$dst, (ctpop (loadi32 addr:$src))), 6930*0a6a1f1dSLionel Sambuc (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, 6931*0a6a1f1dSLionel Sambuc Sched<[WriteFAddLd]>, OpSize32, XS; 6932f4a2713aSLionel Sambuc 6933f4a2713aSLionel Sambuc def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), 6934f4a2713aSLionel Sambuc "popcnt{q}\t{$src, $dst|$dst, $src}", 6935f4a2713aSLionel Sambuc [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)], 6936*0a6a1f1dSLionel Sambuc IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>, XS; 6937f4a2713aSLionel Sambuc def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), 6938f4a2713aSLionel Sambuc "popcnt{q}\t{$src, $dst|$dst, $src}", 6939f4a2713aSLionel Sambuc [(set GR64:$dst, (ctpop (loadi64 addr:$src))), 6940*0a6a1f1dSLionel Sambuc (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, 6941*0a6a1f1dSLionel Sambuc Sched<[WriteFAddLd]>, XS; 6942f4a2713aSLionel Sambuc} 6943f4a2713aSLionel Sambuc 6944f4a2713aSLionel Sambuc 6945f4a2713aSLionel Sambuc 6946f4a2713aSLionel Sambuc// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. 6947f4a2713aSLionel Sambucmulticlass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, 6948*0a6a1f1dSLionel Sambuc Intrinsic IntId128, 6949*0a6a1f1dSLionel Sambuc X86FoldableSchedWrite Sched> { 6950f4a2713aSLionel Sambuc def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 6951f4a2713aSLionel Sambuc (ins VR128:$src), 6952f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6953*0a6a1f1dSLionel Sambuc [(set VR128:$dst, (IntId128 VR128:$src))]>, 6954*0a6a1f1dSLionel Sambuc Sched<[Sched]>; 6955f4a2713aSLionel Sambuc def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 6956f4a2713aSLionel Sambuc (ins i128mem:$src), 6957f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6958f4a2713aSLionel Sambuc [(set VR128:$dst, 6959*0a6a1f1dSLionel Sambuc (IntId128 (bitconvert (memopv2i64 addr:$src))))]>, 6960*0a6a1f1dSLionel Sambuc Sched<[Sched.Folded]>; 6961f4a2713aSLionel Sambuc} 6962f4a2713aSLionel Sambuc 6963*0a6a1f1dSLionel Sambuc// PHMIN has the same profile as PSAD, thus we use the same scheduling 6964*0a6a1f1dSLionel Sambuc// model, although the naming is misleading. 6965f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 6966f4a2713aSLionel Sambucdefm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw", 6967*0a6a1f1dSLionel Sambuc int_x86_sse41_phminposuw, 6968*0a6a1f1dSLionel Sambuc WriteVecIMul>, VEX; 6969f4a2713aSLionel Sambucdefm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", 6970*0a6a1f1dSLionel Sambuc int_x86_sse41_phminposuw, 6971*0a6a1f1dSLionel Sambuc WriteVecIMul>; 6972f4a2713aSLionel Sambuc 6973f4a2713aSLionel Sambuc/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator 6974f4a2713aSLionel Sambucmulticlass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, 6975f4a2713aSLionel Sambuc Intrinsic IntId128, bit Is2Addr = 1, 6976f4a2713aSLionel Sambuc OpndItins itins = DEFAULT_ITINS> { 6977f4a2713aSLionel Sambuc let isCommutable = 1 in 6978f4a2713aSLionel Sambuc def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 6979f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2), 6980f4a2713aSLionel Sambuc !if(Is2Addr, 6981f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6982f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6983f4a2713aSLionel Sambuc [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))], 6984*0a6a1f1dSLionel Sambuc itins.rr>, Sched<[itins.Sched]>; 6985f4a2713aSLionel Sambuc def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 6986f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src2), 6987f4a2713aSLionel Sambuc !if(Is2Addr, 6988f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6989f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6990f4a2713aSLionel Sambuc [(set VR128:$dst, 6991*0a6a1f1dSLionel Sambuc (IntId128 VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))], 6992*0a6a1f1dSLionel Sambuc itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 6993f4a2713aSLionel Sambuc} 6994f4a2713aSLionel Sambuc 6995f4a2713aSLionel Sambuc/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator 6996f4a2713aSLionel Sambucmulticlass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 6997*0a6a1f1dSLionel Sambuc Intrinsic IntId256, 6998*0a6a1f1dSLionel Sambuc X86FoldableSchedWrite Sched> { 6999f4a2713aSLionel Sambuc let isCommutable = 1 in 7000f4a2713aSLionel Sambuc def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), 7001f4a2713aSLionel Sambuc (ins VR256:$src1, VR256:$src2), 7002f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7003*0a6a1f1dSLionel Sambuc [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, 7004*0a6a1f1dSLionel Sambuc Sched<[Sched]>; 7005f4a2713aSLionel Sambuc def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), 7006f4a2713aSLionel Sambuc (ins VR256:$src1, i256mem:$src2), 7007f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7008f4a2713aSLionel Sambuc [(set VR256:$dst, 7009*0a6a1f1dSLionel Sambuc (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>, 7010*0a6a1f1dSLionel Sambuc Sched<[Sched.Folded, ReadAfterLd]>; 7011f4a2713aSLionel Sambuc} 7012f4a2713aSLionel Sambuc 7013f4a2713aSLionel Sambuc 7014f4a2713aSLionel Sambuc/// SS48I_binop_rm - Simple SSE41 binary operator. 7015f4a2713aSLionel Sambucmulticlass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7016f4a2713aSLionel Sambuc ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 7017f4a2713aSLionel Sambuc X86MemOperand x86memop, bit Is2Addr = 1, 7018*0a6a1f1dSLionel Sambuc OpndItins itins = SSE_INTALU_ITINS_P> { 7019f4a2713aSLionel Sambuc let isCommutable = 1 in 7020f4a2713aSLionel Sambuc def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), 7021f4a2713aSLionel Sambuc (ins RC:$src1, RC:$src2), 7022f4a2713aSLionel Sambuc !if(Is2Addr, 7023f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7024f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7025*0a6a1f1dSLionel Sambuc [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 7026*0a6a1f1dSLionel Sambuc Sched<[itins.Sched]>; 7027f4a2713aSLionel Sambuc def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), 7028f4a2713aSLionel Sambuc (ins RC:$src1, x86memop:$src2), 7029f4a2713aSLionel Sambuc !if(Is2Addr, 7030f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7031f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7032f4a2713aSLionel Sambuc [(set RC:$dst, 7033*0a6a1f1dSLionel Sambuc (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>, 7034*0a6a1f1dSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 7035f4a2713aSLionel Sambuc} 7036f4a2713aSLionel Sambuc 7037*0a6a1f1dSLionel Sambuc/// SS48I_binop_rm2 - Simple SSE41 binary operator with different src and dst 7038*0a6a1f1dSLionel Sambuc/// types. 7039*0a6a1f1dSLionel Sambucmulticlass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, 7040*0a6a1f1dSLionel Sambuc ValueType DstVT, ValueType SrcVT, RegisterClass RC, 7041*0a6a1f1dSLionel Sambuc PatFrag memop_frag, X86MemOperand x86memop, 7042*0a6a1f1dSLionel Sambuc OpndItins itins, 7043*0a6a1f1dSLionel Sambuc bit IsCommutable = 0, bit Is2Addr = 1> { 7044*0a6a1f1dSLionel Sambuc let isCommutable = IsCommutable in 7045*0a6a1f1dSLionel Sambuc def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), 7046*0a6a1f1dSLionel Sambuc (ins RC:$src1, RC:$src2), 7047*0a6a1f1dSLionel Sambuc !if(Is2Addr, 7048*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7049*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7050*0a6a1f1dSLionel Sambuc [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, 7051*0a6a1f1dSLionel Sambuc Sched<[itins.Sched]>; 7052*0a6a1f1dSLionel Sambuc def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), 7053*0a6a1f1dSLionel Sambuc (ins RC:$src1, x86memop:$src2), 7054*0a6a1f1dSLionel Sambuc !if(Is2Addr, 7055*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7056*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7057*0a6a1f1dSLionel Sambuc [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), 7058*0a6a1f1dSLionel Sambuc (bitconvert (memop_frag addr:$src2)))))]>, 7059*0a6a1f1dSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 7060*0a6a1f1dSLionel Sambuc} 7061*0a6a1f1dSLionel Sambuc 7062*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, NoVLX] in { 7063f4a2713aSLionel Sambuc let isCommutable = 0 in 7064f4a2713aSLionel Sambuc defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128, 7065*0a6a1f1dSLionel Sambuc loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7066*0a6a1f1dSLionel Sambuc VEX_4V; 7067f4a2713aSLionel Sambuc defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128, 7068*0a6a1f1dSLionel Sambuc loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7069*0a6a1f1dSLionel Sambuc VEX_4V; 7070f4a2713aSLionel Sambuc defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128, 7071*0a6a1f1dSLionel Sambuc loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7072*0a6a1f1dSLionel Sambuc VEX_4V; 7073f4a2713aSLionel Sambuc defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128, 7074*0a6a1f1dSLionel Sambuc loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7075*0a6a1f1dSLionel Sambuc VEX_4V; 7076f4a2713aSLionel Sambuc defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128, 7077*0a6a1f1dSLionel Sambuc loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7078*0a6a1f1dSLionel Sambuc VEX_4V; 7079f4a2713aSLionel Sambuc defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128, 7080*0a6a1f1dSLionel Sambuc loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7081*0a6a1f1dSLionel Sambuc VEX_4V; 7082f4a2713aSLionel Sambuc defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128, 7083*0a6a1f1dSLionel Sambuc loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7084*0a6a1f1dSLionel Sambuc VEX_4V; 7085f4a2713aSLionel Sambuc defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128, 7086*0a6a1f1dSLionel Sambuc loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7087*0a6a1f1dSLionel Sambuc VEX_4V; 7088*0a6a1f1dSLionel Sambuc defm VPMULDQ : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v2i64, v4i32, 7089*0a6a1f1dSLionel Sambuc VR128, loadv2i64, i128mem, 7090*0a6a1f1dSLionel Sambuc SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; 7091f4a2713aSLionel Sambuc} 7092f4a2713aSLionel Sambuc 7093*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX2, NoVLX] in { 7094f4a2713aSLionel Sambuc let isCommutable = 0 in 7095f4a2713aSLionel Sambuc defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256, 7096*0a6a1f1dSLionel Sambuc loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7097*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L; 7098f4a2713aSLionel Sambuc defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256, 7099*0a6a1f1dSLionel Sambuc loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7100*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L; 7101f4a2713aSLionel Sambuc defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256, 7102*0a6a1f1dSLionel Sambuc loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7103*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L; 7104f4a2713aSLionel Sambuc defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256, 7105*0a6a1f1dSLionel Sambuc loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7106*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L; 7107f4a2713aSLionel Sambuc defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256, 7108*0a6a1f1dSLionel Sambuc loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7109*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L; 7110f4a2713aSLionel Sambuc defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256, 7111*0a6a1f1dSLionel Sambuc loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7112*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L; 7113f4a2713aSLionel Sambuc defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256, 7114*0a6a1f1dSLionel Sambuc loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7115*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L; 7116f4a2713aSLionel Sambuc defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256, 7117*0a6a1f1dSLionel Sambuc loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7118*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L; 7119*0a6a1f1dSLionel Sambuc defm VPMULDQY : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v4i64, v8i32, 7120*0a6a1f1dSLionel Sambuc VR256, loadv4i64, i256mem, 7121*0a6a1f1dSLionel Sambuc SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; 7122f4a2713aSLionel Sambuc} 7123f4a2713aSLionel Sambuc 7124f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { 7125f4a2713aSLionel Sambuc let isCommutable = 0 in 7126f4a2713aSLionel Sambuc defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128, 7127f4a2713aSLionel Sambuc memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7128f4a2713aSLionel Sambuc defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128, 7129f4a2713aSLionel Sambuc memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7130f4a2713aSLionel Sambuc defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128, 7131f4a2713aSLionel Sambuc memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7132f4a2713aSLionel Sambuc defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128, 7133f4a2713aSLionel Sambuc memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7134f4a2713aSLionel Sambuc defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128, 7135f4a2713aSLionel Sambuc memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7136f4a2713aSLionel Sambuc defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128, 7137f4a2713aSLionel Sambuc memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7138f4a2713aSLionel Sambuc defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128, 7139f4a2713aSLionel Sambuc memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7140f4a2713aSLionel Sambuc defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128, 7141f4a2713aSLionel Sambuc memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 7142*0a6a1f1dSLionel Sambuc defm PMULDQ : SS48I_binop_rm2<0x28, "pmuldq", X86pmuldq, v2i64, v4i32, 7143*0a6a1f1dSLionel Sambuc VR128, memopv2i64, i128mem, 7144*0a6a1f1dSLionel Sambuc SSE_INTMUL_ITINS_P, 1>; 7145f4a2713aSLionel Sambuc} 7146f4a2713aSLionel Sambuc 7147*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, NoVLX] in { 7148f4a2713aSLionel Sambuc defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, 7149*0a6a1f1dSLionel Sambuc memopv2i64, i128mem, 0, SSE_PMULLD_ITINS>, 7150*0a6a1f1dSLionel Sambuc VEX_4V; 7151f4a2713aSLionel Sambuc defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, 7152*0a6a1f1dSLionel Sambuc memopv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 7153*0a6a1f1dSLionel Sambuc VEX_4V; 7154f4a2713aSLionel Sambuc} 7155f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 7156f4a2713aSLionel Sambuc defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, 7157*0a6a1f1dSLionel Sambuc memopv4i64, i256mem, 0, SSE_PMULLD_ITINS>, 7158*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L; 7159f4a2713aSLionel Sambuc defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, 7160*0a6a1f1dSLionel Sambuc memopv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 7161*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L; 7162f4a2713aSLionel Sambuc} 7163f4a2713aSLionel Sambuc 7164f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { 7165f4a2713aSLionel Sambuc defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, 7166f4a2713aSLionel Sambuc memopv2i64, i128mem, 1, SSE_PMULLD_ITINS>; 7167f4a2713aSLionel Sambuc defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, 7168f4a2713aSLionel Sambuc memopv2i64, i128mem, 1, SSE_INTALUQ_ITINS_P>; 7169f4a2713aSLionel Sambuc} 7170f4a2713aSLionel Sambuc 7171f4a2713aSLionel Sambuc/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate 7172f4a2713aSLionel Sambucmulticlass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, 7173f4a2713aSLionel Sambuc Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 7174f4a2713aSLionel Sambuc X86MemOperand x86memop, bit Is2Addr = 1, 7175f4a2713aSLionel Sambuc OpndItins itins = DEFAULT_ITINS> { 7176f4a2713aSLionel Sambuc let isCommutable = 1 in 7177f4a2713aSLionel Sambuc def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 7178*0a6a1f1dSLionel Sambuc (ins RC:$src1, RC:$src2, i8imm:$src3), 7179f4a2713aSLionel Sambuc !if(Is2Addr, 7180f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 7181f4a2713aSLionel Sambuc "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7182f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 7183f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 7184f4a2713aSLionel Sambuc [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>, 7185*0a6a1f1dSLionel Sambuc Sched<[itins.Sched]>; 7186f4a2713aSLionel Sambuc def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 7187*0a6a1f1dSLionel Sambuc (ins RC:$src1, x86memop:$src2, i8imm:$src3), 7188f4a2713aSLionel Sambuc !if(Is2Addr, 7189f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 7190f4a2713aSLionel Sambuc "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 7191f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 7192f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 7193f4a2713aSLionel Sambuc [(set RC:$dst, 7194f4a2713aSLionel Sambuc (IntId RC:$src1, 7195f4a2713aSLionel Sambuc (bitconvert (memop_frag addr:$src2)), imm:$src3))], itins.rm>, 7196*0a6a1f1dSLionel Sambuc Sched<[itins.Sched.Folded, ReadAfterLd]>; 7197f4a2713aSLionel Sambuc} 7198f4a2713aSLionel Sambuc 7199f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 7200f4a2713aSLionel Sambuc let isCommutable = 0 in { 7201*0a6a1f1dSLionel Sambuc defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, 7202*0a6a1f1dSLionel Sambuc VR128, loadv2i64, i128mem, 0, 7203*0a6a1f1dSLionel Sambuc DEFAULT_ITINS_MPSADSCHED>, VEX_4V; 7204*0a6a1f1dSLionel Sambuc } 7205*0a6a1f1dSLionel Sambuc 7206f4a2713aSLionel Sambuc let ExeDomain = SSEPackedSingle in { 7207f4a2713aSLionel Sambuc defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, 7208*0a6a1f1dSLionel Sambuc VR128, loadv4f32, f128mem, 0, 7209*0a6a1f1dSLionel Sambuc DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; 7210f4a2713aSLionel Sambuc defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", 7211f4a2713aSLionel Sambuc int_x86_avx_blend_ps_256, VR256, loadv8f32, 7212*0a6a1f1dSLionel Sambuc f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>, 7213*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L; 7214f4a2713aSLionel Sambuc } 7215f4a2713aSLionel Sambuc let ExeDomain = SSEPackedDouble in { 7216f4a2713aSLionel Sambuc defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, 7217*0a6a1f1dSLionel Sambuc VR128, loadv2f64, f128mem, 0, 7218*0a6a1f1dSLionel Sambuc DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; 7219f4a2713aSLionel Sambuc defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", 7220f4a2713aSLionel Sambuc int_x86_avx_blend_pd_256,VR256, loadv4f64, 7221*0a6a1f1dSLionel Sambuc f256mem, 0, DEFAULT_ITINS_FBLENDSCHED>, 7222*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L; 7223f4a2713aSLionel Sambuc } 7224f4a2713aSLionel Sambuc defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, 7225*0a6a1f1dSLionel Sambuc VR128, loadv2i64, i128mem, 0, 7226*0a6a1f1dSLionel Sambuc DEFAULT_ITINS_BLENDSCHED>, VEX_4V; 7227*0a6a1f1dSLionel Sambuc 7228f4a2713aSLionel Sambuc let ExeDomain = SSEPackedSingle in 7229f4a2713aSLionel Sambuc defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, 7230*0a6a1f1dSLionel Sambuc VR128, loadv4f32, f128mem, 0, 7231*0a6a1f1dSLionel Sambuc SSE_DPPS_ITINS>, VEX_4V; 7232f4a2713aSLionel Sambuc let ExeDomain = SSEPackedDouble in 7233f4a2713aSLionel Sambuc defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, 7234*0a6a1f1dSLionel Sambuc VR128, loadv2f64, f128mem, 0, 7235*0a6a1f1dSLionel Sambuc SSE_DPPS_ITINS>, VEX_4V; 7236f4a2713aSLionel Sambuc let ExeDomain = SSEPackedSingle in 7237f4a2713aSLionel Sambuc defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, 7238*0a6a1f1dSLionel Sambuc VR256, loadv8f32, i256mem, 0, 7239*0a6a1f1dSLionel Sambuc SSE_DPPS_ITINS>, VEX_4V, VEX_L; 7240f4a2713aSLionel Sambuc} 7241f4a2713aSLionel Sambuc 7242f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 7243f4a2713aSLionel Sambuc let isCommutable = 0 in { 7244f4a2713aSLionel Sambuc defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, 7245*0a6a1f1dSLionel Sambuc VR256, loadv4i64, i256mem, 0, 7246*0a6a1f1dSLionel Sambuc DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L; 7247f4a2713aSLionel Sambuc } 7248*0a6a1f1dSLionel Sambuc defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw, 7249*0a6a1f1dSLionel Sambuc VR256, loadv4i64, i256mem, 0, 7250*0a6a1f1dSLionel Sambuc DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L; 7251f4a2713aSLionel Sambuc} 7252f4a2713aSLionel Sambuc 7253f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { 7254f4a2713aSLionel Sambuc let isCommutable = 0 in { 7255*0a6a1f1dSLionel Sambuc defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, 7256*0a6a1f1dSLionel Sambuc VR128, memopv2i64, i128mem, 7257*0a6a1f1dSLionel Sambuc 1, SSE_MPSADBW_ITINS>; 7258*0a6a1f1dSLionel Sambuc } 7259f4a2713aSLionel Sambuc let ExeDomain = SSEPackedSingle in 7260f4a2713aSLionel Sambuc defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, 7261f4a2713aSLionel Sambuc VR128, memopv4f32, f128mem, 7262*0a6a1f1dSLionel Sambuc 1, SSE_INTALU_ITINS_FBLEND_P>; 7263f4a2713aSLionel Sambuc let ExeDomain = SSEPackedDouble in 7264f4a2713aSLionel Sambuc defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, 7265f4a2713aSLionel Sambuc VR128, memopv2f64, f128mem, 7266*0a6a1f1dSLionel Sambuc 1, SSE_INTALU_ITINS_FBLEND_P>; 7267f4a2713aSLionel Sambuc defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, 7268f4a2713aSLionel Sambuc VR128, memopv2i64, i128mem, 7269*0a6a1f1dSLionel Sambuc 1, SSE_INTALU_ITINS_BLEND_P>; 7270f4a2713aSLionel Sambuc let ExeDomain = SSEPackedSingle in 7271f4a2713aSLionel Sambuc defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, 7272f4a2713aSLionel Sambuc VR128, memopv4f32, f128mem, 1, 7273f4a2713aSLionel Sambuc SSE_DPPS_ITINS>; 7274f4a2713aSLionel Sambuc let ExeDomain = SSEPackedDouble in 7275f4a2713aSLionel Sambuc defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, 7276f4a2713aSLionel Sambuc VR128, memopv2f64, f128mem, 1, 7277f4a2713aSLionel Sambuc SSE_DPPD_ITINS>; 7278f4a2713aSLionel Sambuc} 7279f4a2713aSLionel Sambuc 7280f4a2713aSLionel Sambuc/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators 7281f4a2713aSLionel Sambucmulticlass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, 7282f4a2713aSLionel Sambuc RegisterClass RC, X86MemOperand x86memop, 7283*0a6a1f1dSLionel Sambuc PatFrag mem_frag, Intrinsic IntId, 7284*0a6a1f1dSLionel Sambuc X86FoldableSchedWrite Sched> { 7285f4a2713aSLionel Sambuc def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst), 7286f4a2713aSLionel Sambuc (ins RC:$src1, RC:$src2, RC:$src3), 7287f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 7288f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7289f4a2713aSLionel Sambuc [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], 7290*0a6a1f1dSLionel Sambuc NoItinerary, SSEPackedInt>, TAPD, VEX_4V, VEX_I8IMM, 7291*0a6a1f1dSLionel Sambuc Sched<[Sched]>; 7292f4a2713aSLionel Sambuc 7293f4a2713aSLionel Sambuc def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst), 7294f4a2713aSLionel Sambuc (ins RC:$src1, x86memop:$src2, RC:$src3), 7295f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 7296f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7297f4a2713aSLionel Sambuc [(set RC:$dst, 7298f4a2713aSLionel Sambuc (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), 7299f4a2713aSLionel Sambuc RC:$src3))], 7300*0a6a1f1dSLionel Sambuc NoItinerary, SSEPackedInt>, TAPD, VEX_4V, VEX_I8IMM, 7301*0a6a1f1dSLionel Sambuc Sched<[Sched.Folded, ReadAfterLd]>; 7302f4a2713aSLionel Sambuc} 7303f4a2713aSLionel Sambuc 7304f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 7305f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in { 7306f4a2713aSLionel Sambucdefm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem, 7307*0a6a1f1dSLionel Sambuc loadv2f64, int_x86_sse41_blendvpd, 7308*0a6a1f1dSLionel Sambuc WriteFVarBlend>; 7309f4a2713aSLionel Sambucdefm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem, 7310*0a6a1f1dSLionel Sambuc loadv4f64, int_x86_avx_blendv_pd_256, 7311*0a6a1f1dSLionel Sambuc WriteFVarBlend>, VEX_L; 7312f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedDouble 7313f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in { 7314f4a2713aSLionel Sambucdefm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem, 7315*0a6a1f1dSLionel Sambuc loadv4f32, int_x86_sse41_blendvps, 7316*0a6a1f1dSLionel Sambuc WriteFVarBlend>; 7317f4a2713aSLionel Sambucdefm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem, 7318*0a6a1f1dSLionel Sambuc loadv8f32, int_x86_avx_blendv_ps_256, 7319*0a6a1f1dSLionel Sambuc WriteFVarBlend>, VEX_L; 7320f4a2713aSLionel Sambuc} // ExeDomain = SSEPackedSingle 7321f4a2713aSLionel Sambucdefm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, 7322*0a6a1f1dSLionel Sambuc loadv2i64, int_x86_sse41_pblendvb, 7323*0a6a1f1dSLionel Sambuc WriteVarBlend>; 7324f4a2713aSLionel Sambuc} 7325f4a2713aSLionel Sambuc 7326f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 7327f4a2713aSLionel Sambucdefm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem, 7328*0a6a1f1dSLionel Sambuc loadv4i64, int_x86_avx2_pblendvb, 7329*0a6a1f1dSLionel Sambuc WriteVarBlend>, VEX_L; 7330f4a2713aSLionel Sambuc} 7331f4a2713aSLionel Sambuc 7332f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 7333f4a2713aSLionel Sambuc def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1), 7334f4a2713aSLionel Sambuc (v16i8 VR128:$src2))), 7335f4a2713aSLionel Sambuc (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7336f4a2713aSLionel Sambuc def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1), 7337f4a2713aSLionel Sambuc (v4i32 VR128:$src2))), 7338f4a2713aSLionel Sambuc (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7339f4a2713aSLionel Sambuc def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1), 7340f4a2713aSLionel Sambuc (v4f32 VR128:$src2))), 7341f4a2713aSLionel Sambuc (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7342f4a2713aSLionel Sambuc def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1), 7343f4a2713aSLionel Sambuc (v2i64 VR128:$src2))), 7344f4a2713aSLionel Sambuc (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7345f4a2713aSLionel Sambuc def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1), 7346f4a2713aSLionel Sambuc (v2f64 VR128:$src2))), 7347f4a2713aSLionel Sambuc (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7348f4a2713aSLionel Sambuc def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1), 7349f4a2713aSLionel Sambuc (v8i32 VR256:$src2))), 7350f4a2713aSLionel Sambuc (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7351f4a2713aSLionel Sambuc def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1), 7352f4a2713aSLionel Sambuc (v8f32 VR256:$src2))), 7353f4a2713aSLionel Sambuc (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7354f4a2713aSLionel Sambuc def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1), 7355f4a2713aSLionel Sambuc (v4i64 VR256:$src2))), 7356f4a2713aSLionel Sambuc (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7357f4a2713aSLionel Sambuc def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), 7358f4a2713aSLionel Sambuc (v4f64 VR256:$src2))), 7359f4a2713aSLionel Sambuc (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7360f4a2713aSLionel Sambuc 7361f4a2713aSLionel Sambuc def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2), 7362f4a2713aSLionel Sambuc (imm:$mask))), 7363f4a2713aSLionel Sambuc (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>; 7364f4a2713aSLionel Sambuc def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2), 7365f4a2713aSLionel Sambuc (imm:$mask))), 7366f4a2713aSLionel Sambuc (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>; 7367f4a2713aSLionel Sambuc 7368f4a2713aSLionel Sambuc def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2), 7369f4a2713aSLionel Sambuc (imm:$mask))), 7370f4a2713aSLionel Sambuc (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>; 7371f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2), 7372f4a2713aSLionel Sambuc (imm:$mask))), 7373f4a2713aSLionel Sambuc (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>; 7374f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2), 7375f4a2713aSLionel Sambuc (imm:$mask))), 7376f4a2713aSLionel Sambuc (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>; 7377f4a2713aSLionel Sambuc} 7378f4a2713aSLionel Sambuc 7379f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 7380f4a2713aSLionel Sambuc def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), 7381f4a2713aSLionel Sambuc (v32i8 VR256:$src2))), 7382f4a2713aSLionel Sambuc (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7383f4a2713aSLionel Sambuc def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2), 7384f4a2713aSLionel Sambuc (imm:$mask))), 7385f4a2713aSLionel Sambuc (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>; 7386f4a2713aSLionel Sambuc} 7387f4a2713aSLionel Sambuc 7388*0a6a1f1dSLionel Sambuc// Patterns 7389*0a6a1f1dSLionel Sambuclet Predicates = [UseAVX] in { 7390*0a6a1f1dSLionel Sambuc let AddedComplexity = 15 in { 7391*0a6a1f1dSLionel Sambuc // Move scalar to XMM zero-extended, zeroing a VR128 then do a 7392*0a6a1f1dSLionel Sambuc // MOVS{S,D} to the lower bits. 7393*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), 7394*0a6a1f1dSLionel Sambuc (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>; 7395*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 7396*0a6a1f1dSLionel Sambuc (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 7397*0a6a1f1dSLionel Sambuc def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 7398*0a6a1f1dSLionel Sambuc (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 7399*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), 7400*0a6a1f1dSLionel Sambuc (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; 7401*0a6a1f1dSLionel Sambuc 7402*0a6a1f1dSLionel Sambuc // Move low f32 and clear high bits. 7403*0a6a1f1dSLionel Sambuc def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 7404*0a6a1f1dSLionel Sambuc (VBLENDPSYrri (v8f32 (AVX_SET0)), VR256:$src, (i8 1))>; 7405*0a6a1f1dSLionel Sambuc def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 7406*0a6a1f1dSLionel Sambuc (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>; 7407*0a6a1f1dSLionel Sambuc } 7408*0a6a1f1dSLionel Sambuc 7409*0a6a1f1dSLionel Sambuc def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, 7410*0a6a1f1dSLionel Sambuc (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))), 7411*0a6a1f1dSLionel Sambuc (SUBREG_TO_REG (i32 0), 7412*0a6a1f1dSLionel Sambuc (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)), 7413*0a6a1f1dSLionel Sambuc sub_xmm)>; 7414*0a6a1f1dSLionel Sambuc def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, 7415*0a6a1f1dSLionel Sambuc (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))), 7416*0a6a1f1dSLionel Sambuc (SUBREG_TO_REG (i64 0), 7417*0a6a1f1dSLionel Sambuc (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), 7418*0a6a1f1dSLionel Sambuc sub_xmm)>; 7419*0a6a1f1dSLionel Sambuc 7420*0a6a1f1dSLionel Sambuc // Move low f64 and clear high bits. 7421*0a6a1f1dSLionel Sambuc def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), 7422*0a6a1f1dSLionel Sambuc (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>; 7423*0a6a1f1dSLionel Sambuc 7424*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), 7425*0a6a1f1dSLionel Sambuc (VBLENDPDYrri (v4i64 (AVX_SET0)), VR256:$src, (i8 1))>; 7426*0a6a1f1dSLionel Sambuc} 7427*0a6a1f1dSLionel Sambuc 7428*0a6a1f1dSLionel Sambuclet Predicates = [UseSSE41] in { 7429*0a6a1f1dSLionel Sambuc // With SSE41 we can use blends for these patterns. 7430*0a6a1f1dSLionel Sambuc def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 7431*0a6a1f1dSLionel Sambuc (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 7432*0a6a1f1dSLionel Sambuc def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 7433*0a6a1f1dSLionel Sambuc (PBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 7434*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 7435*0a6a1f1dSLionel Sambuc (BLENDPDrri (v2f64 (V_SET0)), VR128:$src, (i8 1))>; 7436*0a6a1f1dSLionel Sambuc} 7437*0a6a1f1dSLionel Sambuc 7438*0a6a1f1dSLionel Sambuc 7439f4a2713aSLionel Sambuc/// SS41I_ternary_int - SSE 4.1 ternary operator 7440f4a2713aSLionel Sambuclet Uses = [XMM0], Constraints = "$src1 = $dst" in { 7441f4a2713aSLionel Sambuc multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 7442f4a2713aSLionel Sambuc X86MemOperand x86memop, Intrinsic IntId, 7443f4a2713aSLionel Sambuc OpndItins itins = DEFAULT_ITINS> { 7444f4a2713aSLionel Sambuc def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 7445f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2), 7446f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 7447f4a2713aSLionel Sambuc "\t{$src2, $dst|$dst, $src2}"), 7448f4a2713aSLionel Sambuc [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))], 7449*0a6a1f1dSLionel Sambuc itins.rr>, Sched<[itins.Sched]>; 7450f4a2713aSLionel Sambuc 7451f4a2713aSLionel Sambuc def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 7452f4a2713aSLionel Sambuc (ins VR128:$src1, x86memop:$src2), 7453f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 7454f4a2713aSLionel Sambuc "\t{$src2, $dst|$dst, $src2}"), 7455f4a2713aSLionel Sambuc [(set VR128:$dst, 7456f4a2713aSLionel Sambuc (IntId VR128:$src1, 7457f4a2713aSLionel Sambuc (bitconvert (mem_frag addr:$src2)), XMM0))], 7458*0a6a1f1dSLionel Sambuc itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 7459f4a2713aSLionel Sambuc } 7460f4a2713aSLionel Sambuc} 7461f4a2713aSLionel Sambuc 7462f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in 7463f4a2713aSLionel Sambucdefm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem, 7464*0a6a1f1dSLionel Sambuc int_x86_sse41_blendvpd, 7465*0a6a1f1dSLionel Sambuc DEFAULT_ITINS_FBLENDSCHED>; 7466f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in 7467f4a2713aSLionel Sambucdefm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem, 7468*0a6a1f1dSLionel Sambuc int_x86_sse41_blendvps, 7469*0a6a1f1dSLionel Sambuc DEFAULT_ITINS_FBLENDSCHED>; 7470f4a2713aSLionel Sambucdefm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem, 7471*0a6a1f1dSLionel Sambuc int_x86_sse41_pblendvb, 7472*0a6a1f1dSLionel Sambuc DEFAULT_ITINS_VARBLENDSCHED>; 7473f4a2713aSLionel Sambuc 7474f4a2713aSLionel Sambuc// Aliases with the implicit xmm0 argument 7475f4a2713aSLionel Sambucdef : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7476f4a2713aSLionel Sambuc (BLENDVPDrr0 VR128:$dst, VR128:$src2)>; 7477f4a2713aSLionel Sambucdef : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7478f4a2713aSLionel Sambuc (BLENDVPDrm0 VR128:$dst, f128mem:$src2)>; 7479f4a2713aSLionel Sambucdef : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7480f4a2713aSLionel Sambuc (BLENDVPSrr0 VR128:$dst, VR128:$src2)>; 7481f4a2713aSLionel Sambucdef : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7482f4a2713aSLionel Sambuc (BLENDVPSrm0 VR128:$dst, f128mem:$src2)>; 7483f4a2713aSLionel Sambucdef : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7484f4a2713aSLionel Sambuc (PBLENDVBrr0 VR128:$dst, VR128:$src2)>; 7485f4a2713aSLionel Sambucdef : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7486f4a2713aSLionel Sambuc (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>; 7487f4a2713aSLionel Sambuc 7488f4a2713aSLionel Sambuclet Predicates = [UseSSE41] in { 7489f4a2713aSLionel Sambuc def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), 7490f4a2713aSLionel Sambuc (v16i8 VR128:$src2))), 7491f4a2713aSLionel Sambuc (PBLENDVBrr0 VR128:$src2, VR128:$src1)>; 7492f4a2713aSLionel Sambuc def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1), 7493f4a2713aSLionel Sambuc (v4i32 VR128:$src2))), 7494f4a2713aSLionel Sambuc (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 7495f4a2713aSLionel Sambuc def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1), 7496f4a2713aSLionel Sambuc (v4f32 VR128:$src2))), 7497f4a2713aSLionel Sambuc (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 7498f4a2713aSLionel Sambuc def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1), 7499f4a2713aSLionel Sambuc (v2i64 VR128:$src2))), 7500f4a2713aSLionel Sambuc (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 7501f4a2713aSLionel Sambuc def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), 7502f4a2713aSLionel Sambuc (v2f64 VR128:$src2))), 7503f4a2713aSLionel Sambuc (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 7504f4a2713aSLionel Sambuc 7505f4a2713aSLionel Sambuc def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2), 7506f4a2713aSLionel Sambuc (imm:$mask))), 7507f4a2713aSLionel Sambuc (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>; 7508f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2), 7509f4a2713aSLionel Sambuc (imm:$mask))), 7510f4a2713aSLionel Sambuc (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>; 7511f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2), 7512f4a2713aSLionel Sambuc (imm:$mask))), 7513f4a2713aSLionel Sambuc (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>; 7514f4a2713aSLionel Sambuc 7515f4a2713aSLionel Sambuc} 7516f4a2713aSLionel Sambuc 7517*0a6a1f1dSLionel Sambuclet SchedRW = [WriteLoad] in { 7518f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 7519f4a2713aSLionel Sambucdef VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 7520f4a2713aSLionel Sambuc "vmovntdqa\t{$src, $dst|$dst, $src}", 7521f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, 7522*0a6a1f1dSLionel Sambuc VEX; 7523f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in 7524f4a2713aSLionel Sambucdef VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 7525f4a2713aSLionel Sambuc "vmovntdqa\t{$src, $dst|$dst, $src}", 7526f4a2713aSLionel Sambuc [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>, 7527*0a6a1f1dSLionel Sambuc VEX, VEX_L; 7528f4a2713aSLionel Sambucdef MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 7529f4a2713aSLionel Sambuc "movntdqa\t{$src, $dst|$dst, $src}", 7530*0a6a1f1dSLionel Sambuc [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>; 7531*0a6a1f1dSLionel Sambuc} // SchedRW 7532f4a2713aSLionel Sambuc 7533f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 7534f4a2713aSLionel Sambuc// SSE4.2 - Compare Instructions 7535f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 7536f4a2713aSLionel Sambuc 7537f4a2713aSLionel Sambuc/// SS42I_binop_rm - Simple SSE 4.2 binary operator 7538f4a2713aSLionel Sambucmulticlass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7539f4a2713aSLionel Sambuc ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 7540f4a2713aSLionel Sambuc X86MemOperand x86memop, bit Is2Addr = 1> { 7541f4a2713aSLionel Sambuc def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst), 7542f4a2713aSLionel Sambuc (ins RC:$src1, RC:$src2), 7543f4a2713aSLionel Sambuc !if(Is2Addr, 7544f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7545f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7546*0a6a1f1dSLionel Sambuc [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>; 7547f4a2713aSLionel Sambuc def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), 7548f4a2713aSLionel Sambuc (ins RC:$src1, x86memop:$src2), 7549f4a2713aSLionel Sambuc !if(Is2Addr, 7550f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7551f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7552f4a2713aSLionel Sambuc [(set RC:$dst, 7553*0a6a1f1dSLionel Sambuc (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>; 7554f4a2713aSLionel Sambuc} 7555f4a2713aSLionel Sambuc 7556f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 7557f4a2713aSLionel Sambuc defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, 7558f4a2713aSLionel Sambuc loadv2i64, i128mem, 0>, VEX_4V; 7559f4a2713aSLionel Sambuc 7560f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in 7561f4a2713aSLionel Sambuc defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, 7562f4a2713aSLionel Sambuc loadv4i64, i256mem, 0>, VEX_4V, VEX_L; 7563f4a2713aSLionel Sambuc 7564f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in 7565f4a2713aSLionel Sambuc defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, 7566f4a2713aSLionel Sambuc memopv2i64, i128mem>; 7567f4a2713aSLionel Sambuc 7568f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 7569f4a2713aSLionel Sambuc// SSE4.2 - String/text Processing Instructions 7570f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 7571f4a2713aSLionel Sambuc 7572f4a2713aSLionel Sambuc// Packed Compare Implicit Length Strings, Return Mask 7573f4a2713aSLionel Sambucmulticlass pseudo_pcmpistrm<string asm> { 7574f4a2713aSLionel Sambuc def REG : PseudoI<(outs VR128:$dst), 7575f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7576f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, 7577f4a2713aSLionel Sambuc imm:$src3))]>; 7578f4a2713aSLionel Sambuc def MEM : PseudoI<(outs VR128:$dst), 7579f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7580f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, 7581f4a2713aSLionel Sambuc (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>; 7582f4a2713aSLionel Sambuc} 7583f4a2713aSLionel Sambuc 7584f4a2713aSLionel Sambuclet Defs = [EFLAGS], usesCustomInserter = 1 in { 7585f4a2713aSLionel Sambuc defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; 7586f4a2713aSLionel Sambuc defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>; 7587f4a2713aSLionel Sambuc} 7588f4a2713aSLionel Sambuc 7589f4a2713aSLionel Sambucmulticlass pcmpistrm_SS42AI<string asm> { 7590f4a2713aSLionel Sambuc def rr : SS42AI<0x62, MRMSrcReg, (outs), 7591f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7592f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7593*0a6a1f1dSLionel Sambuc []>, Sched<[WritePCmpIStrM]>; 7594f4a2713aSLionel Sambuc let mayLoad = 1 in 7595f4a2713aSLionel Sambuc def rm :SS42AI<0x62, MRMSrcMem, (outs), 7596f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7597f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7598*0a6a1f1dSLionel Sambuc []>, Sched<[WritePCmpIStrMLd, ReadAfterLd]>; 7599f4a2713aSLionel Sambuc} 7600f4a2713aSLionel Sambuc 7601*0a6a1f1dSLionel Sambuclet Defs = [XMM0, EFLAGS], hasSideEffects = 0 in { 7602f4a2713aSLionel Sambuc let Predicates = [HasAVX] in 7603f4a2713aSLionel Sambuc defm VPCMPISTRM128 : pcmpistrm_SS42AI<"vpcmpistrm">, VEX; 7604f4a2713aSLionel Sambuc defm PCMPISTRM128 : pcmpistrm_SS42AI<"pcmpistrm"> ; 7605f4a2713aSLionel Sambuc} 7606f4a2713aSLionel Sambuc 7607f4a2713aSLionel Sambuc// Packed Compare Explicit Length Strings, Return Mask 7608f4a2713aSLionel Sambucmulticlass pseudo_pcmpestrm<string asm> { 7609f4a2713aSLionel Sambuc def REG : PseudoI<(outs VR128:$dst), 7610f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src3, i8imm:$src5), 7611f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 7612f4a2713aSLionel Sambuc VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; 7613f4a2713aSLionel Sambuc def MEM : PseudoI<(outs VR128:$dst), 7614f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 7615f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, 7616f4a2713aSLionel Sambuc (bc_v16i8 (memopv2i64 addr:$src3)), EDX, imm:$src5))]>; 7617f4a2713aSLionel Sambuc} 7618f4a2713aSLionel Sambuc 7619f4a2713aSLionel Sambuclet Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { 7620f4a2713aSLionel Sambuc defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; 7621f4a2713aSLionel Sambuc defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>; 7622f4a2713aSLionel Sambuc} 7623f4a2713aSLionel Sambuc 7624f4a2713aSLionel Sambucmulticlass SS42AI_pcmpestrm<string asm> { 7625f4a2713aSLionel Sambuc def rr : SS42AI<0x60, MRMSrcReg, (outs), 7626f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src3, i8imm:$src5), 7627f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7628*0a6a1f1dSLionel Sambuc []>, Sched<[WritePCmpEStrM]>; 7629f4a2713aSLionel Sambuc let mayLoad = 1 in 7630f4a2713aSLionel Sambuc def rm : SS42AI<0x60, MRMSrcMem, (outs), 7631f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 7632f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7633*0a6a1f1dSLionel Sambuc []>, Sched<[WritePCmpEStrMLd, ReadAfterLd]>; 7634f4a2713aSLionel Sambuc} 7635f4a2713aSLionel Sambuc 7636*0a6a1f1dSLionel Sambuclet Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 7637f4a2713aSLionel Sambuc let Predicates = [HasAVX] in 7638f4a2713aSLionel Sambuc defm VPCMPESTRM128 : SS42AI_pcmpestrm<"vpcmpestrm">, VEX; 7639f4a2713aSLionel Sambuc defm PCMPESTRM128 : SS42AI_pcmpestrm<"pcmpestrm">; 7640f4a2713aSLionel Sambuc} 7641f4a2713aSLionel Sambuc 7642f4a2713aSLionel Sambuc// Packed Compare Implicit Length Strings, Return Index 7643f4a2713aSLionel Sambucmulticlass pseudo_pcmpistri<string asm> { 7644f4a2713aSLionel Sambuc def REG : PseudoI<(outs GR32:$dst), 7645f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7646f4a2713aSLionel Sambuc [(set GR32:$dst, EFLAGS, 7647f4a2713aSLionel Sambuc (X86pcmpistri VR128:$src1, VR128:$src2, imm:$src3))]>; 7648f4a2713aSLionel Sambuc def MEM : PseudoI<(outs GR32:$dst), 7649f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7650f4a2713aSLionel Sambuc [(set GR32:$dst, EFLAGS, (X86pcmpistri VR128:$src1, 7651f4a2713aSLionel Sambuc (bc_v16i8 (memopv2i64 addr:$src2)), imm:$src3))]>; 7652f4a2713aSLionel Sambuc} 7653f4a2713aSLionel Sambuc 7654f4a2713aSLionel Sambuclet Defs = [EFLAGS], usesCustomInserter = 1 in { 7655f4a2713aSLionel Sambuc defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI">, Requires<[HasAVX]>; 7656f4a2713aSLionel Sambuc defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI">, Requires<[UseSSE42]>; 7657f4a2713aSLionel Sambuc} 7658f4a2713aSLionel Sambuc 7659f4a2713aSLionel Sambucmulticlass SS42AI_pcmpistri<string asm> { 7660f4a2713aSLionel Sambuc def rr : SS42AI<0x63, MRMSrcReg, (outs), 7661f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7662f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7663*0a6a1f1dSLionel Sambuc []>, Sched<[WritePCmpIStrI]>; 7664f4a2713aSLionel Sambuc let mayLoad = 1 in 7665f4a2713aSLionel Sambuc def rm : SS42AI<0x63, MRMSrcMem, (outs), 7666f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7667f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7668*0a6a1f1dSLionel Sambuc []>, Sched<[WritePCmpIStrILd, ReadAfterLd]>; 7669f4a2713aSLionel Sambuc} 7670f4a2713aSLionel Sambuc 7671*0a6a1f1dSLionel Sambuclet Defs = [ECX, EFLAGS], hasSideEffects = 0 in { 7672f4a2713aSLionel Sambuc let Predicates = [HasAVX] in 7673f4a2713aSLionel Sambuc defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; 7674f4a2713aSLionel Sambuc defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; 7675f4a2713aSLionel Sambuc} 7676f4a2713aSLionel Sambuc 7677f4a2713aSLionel Sambuc// Packed Compare Explicit Length Strings, Return Index 7678f4a2713aSLionel Sambucmulticlass pseudo_pcmpestri<string asm> { 7679f4a2713aSLionel Sambuc def REG : PseudoI<(outs GR32:$dst), 7680f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src3, i8imm:$src5), 7681f4a2713aSLionel Sambuc [(set GR32:$dst, EFLAGS, 7682f4a2713aSLionel Sambuc (X86pcmpestri VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; 7683f4a2713aSLionel Sambuc def MEM : PseudoI<(outs GR32:$dst), 7684f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 7685f4a2713aSLionel Sambuc [(set GR32:$dst, EFLAGS, 7686f4a2713aSLionel Sambuc (X86pcmpestri VR128:$src1, EAX, (bc_v16i8 (memopv2i64 addr:$src3)), EDX, 7687f4a2713aSLionel Sambuc imm:$src5))]>; 7688f4a2713aSLionel Sambuc} 7689f4a2713aSLionel Sambuc 7690f4a2713aSLionel Sambuclet Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { 7691f4a2713aSLionel Sambuc defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI">, Requires<[HasAVX]>; 7692f4a2713aSLionel Sambuc defm PCMPESTRI : pseudo_pcmpestri<"#PCMPESTRI">, Requires<[UseSSE42]>; 7693f4a2713aSLionel Sambuc} 7694f4a2713aSLionel Sambuc 7695f4a2713aSLionel Sambucmulticlass SS42AI_pcmpestri<string asm> { 7696f4a2713aSLionel Sambuc def rr : SS42AI<0x61, MRMSrcReg, (outs), 7697f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src3, i8imm:$src5), 7698f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7699*0a6a1f1dSLionel Sambuc []>, Sched<[WritePCmpEStrI]>; 7700f4a2713aSLionel Sambuc let mayLoad = 1 in 7701f4a2713aSLionel Sambuc def rm : SS42AI<0x61, MRMSrcMem, (outs), 7702f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 7703f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7704*0a6a1f1dSLionel Sambuc []>, Sched<[WritePCmpEStrILd, ReadAfterLd]>; 7705f4a2713aSLionel Sambuc} 7706f4a2713aSLionel Sambuc 7707*0a6a1f1dSLionel Sambuclet Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 7708f4a2713aSLionel Sambuc let Predicates = [HasAVX] in 7709f4a2713aSLionel Sambuc defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; 7710f4a2713aSLionel Sambuc defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; 7711f4a2713aSLionel Sambuc} 7712f4a2713aSLionel Sambuc 7713f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 7714f4a2713aSLionel Sambuc// SSE4.2 - CRC Instructions 7715f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 7716f4a2713aSLionel Sambuc 7717f4a2713aSLionel Sambuc// No CRC instructions have AVX equivalents 7718f4a2713aSLionel Sambuc 7719f4a2713aSLionel Sambuc// crc intrinsic instruction 7720f4a2713aSLionel Sambuc// This set of instructions are only rm, the only difference is the size 7721f4a2713aSLionel Sambuc// of r and m. 7722f4a2713aSLionel Sambucclass SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut, 7723f4a2713aSLionel Sambuc RegisterClass RCIn, SDPatternOperator Int> : 7724f4a2713aSLionel Sambuc SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2), 7725f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 7726*0a6a1f1dSLionel Sambuc [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))], IIC_CRC32_REG>, 7727*0a6a1f1dSLionel Sambuc Sched<[WriteFAdd]>; 7728f4a2713aSLionel Sambuc 7729f4a2713aSLionel Sambucclass SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut, 7730f4a2713aSLionel Sambuc X86MemOperand x86memop, SDPatternOperator Int> : 7731f4a2713aSLionel Sambuc SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2), 7732f4a2713aSLionel Sambuc !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 7733f4a2713aSLionel Sambuc [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))], 7734*0a6a1f1dSLionel Sambuc IIC_CRC32_MEM>, Sched<[WriteFAddLd, ReadAfterLd]>; 7735f4a2713aSLionel Sambuc 7736f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { 7737f4a2713aSLionel Sambuc def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, 7738f4a2713aSLionel Sambuc int_x86_sse42_crc32_32_8>; 7739f4a2713aSLionel Sambuc def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8, 7740f4a2713aSLionel Sambuc int_x86_sse42_crc32_32_8>; 7741f4a2713aSLionel Sambuc def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem, 7742*0a6a1f1dSLionel Sambuc int_x86_sse42_crc32_32_16>, OpSize16; 7743f4a2713aSLionel Sambuc def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16, 7744*0a6a1f1dSLionel Sambuc int_x86_sse42_crc32_32_16>, OpSize16; 7745f4a2713aSLionel Sambuc def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem, 7746*0a6a1f1dSLionel Sambuc int_x86_sse42_crc32_32_32>, OpSize32; 7747f4a2713aSLionel Sambuc def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32, 7748*0a6a1f1dSLionel Sambuc int_x86_sse42_crc32_32_32>, OpSize32; 7749f4a2713aSLionel Sambuc def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem, 7750f4a2713aSLionel Sambuc int_x86_sse42_crc32_64_64>, REX_W; 7751f4a2713aSLionel Sambuc def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64, 7752f4a2713aSLionel Sambuc int_x86_sse42_crc32_64_64>, REX_W; 7753f4a2713aSLionel Sambuc let hasSideEffects = 0 in { 7754f4a2713aSLionel Sambuc let mayLoad = 1 in 7755f4a2713aSLionel Sambuc def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem, 7756f4a2713aSLionel Sambuc null_frag>, REX_W; 7757f4a2713aSLionel Sambuc def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8, 7758f4a2713aSLionel Sambuc null_frag>, REX_W; 7759f4a2713aSLionel Sambuc } 7760f4a2713aSLionel Sambuc} 7761f4a2713aSLionel Sambuc 7762f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 7763f4a2713aSLionel Sambuc// SHA-NI Instructions 7764f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 7765f4a2713aSLionel Sambuc 7766f4a2713aSLionel Sambucmulticlass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId, 7767f4a2713aSLionel Sambuc bit UsesXMM0 = 0> { 7768f4a2713aSLionel Sambuc def rr : I<Opc, MRMSrcReg, (outs VR128:$dst), 7769f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2), 7770f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7771f4a2713aSLionel Sambuc [!if(UsesXMM0, 7772f4a2713aSLionel Sambuc (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), 7773f4a2713aSLionel Sambuc (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, T8; 7774f4a2713aSLionel Sambuc 7775f4a2713aSLionel Sambuc def rm : I<Opc, MRMSrcMem, (outs VR128:$dst), 7776f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src2), 7777f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7778f4a2713aSLionel Sambuc [!if(UsesXMM0, 7779f4a2713aSLionel Sambuc (set VR128:$dst, (IntId VR128:$src1, 7780f4a2713aSLionel Sambuc (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)), 7781f4a2713aSLionel Sambuc (set VR128:$dst, (IntId VR128:$src1, 7782f4a2713aSLionel Sambuc (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8; 7783f4a2713aSLionel Sambuc} 7784f4a2713aSLionel Sambuc 7785f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst", Predicates = [HasSHA] in { 7786f4a2713aSLionel Sambuc def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst), 7787f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7788f4a2713aSLionel Sambuc "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7789f4a2713aSLionel Sambuc [(set VR128:$dst, 7790f4a2713aSLionel Sambuc (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, 7791f4a2713aSLionel Sambuc (i8 imm:$src3)))]>, TA; 7792f4a2713aSLionel Sambuc def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), 7793f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7794f4a2713aSLionel Sambuc "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7795f4a2713aSLionel Sambuc [(set VR128:$dst, 7796f4a2713aSLionel Sambuc (int_x86_sha1rnds4 VR128:$src1, 7797f4a2713aSLionel Sambuc (bc_v4i32 (memopv2i64 addr:$src2)), 7798f4a2713aSLionel Sambuc (i8 imm:$src3)))]>, TA; 7799f4a2713aSLionel Sambuc 7800f4a2713aSLionel Sambuc defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte>; 7801f4a2713aSLionel Sambuc defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1>; 7802f4a2713aSLionel Sambuc defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2>; 7803f4a2713aSLionel Sambuc 7804f4a2713aSLionel Sambuc let Uses=[XMM0] in 7805f4a2713aSLionel Sambuc defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 1>; 7806f4a2713aSLionel Sambuc 7807f4a2713aSLionel Sambuc defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1>; 7808f4a2713aSLionel Sambuc defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2>; 7809f4a2713aSLionel Sambuc} 7810f4a2713aSLionel Sambuc 7811f4a2713aSLionel Sambuc// Aliases with explicit %xmm0 7812f4a2713aSLionel Sambucdef : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7813f4a2713aSLionel Sambuc (SHA256RNDS2rr VR128:$dst, VR128:$src2)>; 7814f4a2713aSLionel Sambucdef : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7815f4a2713aSLionel Sambuc (SHA256RNDS2rm VR128:$dst, i128mem:$src2)>; 7816f4a2713aSLionel Sambuc 7817f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 7818f4a2713aSLionel Sambuc// AES-NI Instructions 7819f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 7820f4a2713aSLionel Sambuc 7821f4a2713aSLionel Sambucmulticlass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, 7822f4a2713aSLionel Sambuc Intrinsic IntId128, bit Is2Addr = 1> { 7823f4a2713aSLionel Sambuc def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst), 7824f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2), 7825f4a2713aSLionel Sambuc !if(Is2Addr, 7826f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7827f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7828f4a2713aSLionel Sambuc [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 7829*0a6a1f1dSLionel Sambuc Sched<[WriteAESDecEnc]>; 7830f4a2713aSLionel Sambuc def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst), 7831f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src2), 7832f4a2713aSLionel Sambuc !if(Is2Addr, 7833f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7834f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7835f4a2713aSLionel Sambuc [(set VR128:$dst, 7836*0a6a1f1dSLionel Sambuc (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, 7837*0a6a1f1dSLionel Sambuc Sched<[WriteAESDecEncLd, ReadAfterLd]>; 7838f4a2713aSLionel Sambuc} 7839f4a2713aSLionel Sambuc 7840f4a2713aSLionel Sambuc// Perform One Round of an AES Encryption/Decryption Flow 7841f4a2713aSLionel Sambuclet Predicates = [HasAVX, HasAES] in { 7842f4a2713aSLionel Sambuc defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", 7843f4a2713aSLionel Sambuc int_x86_aesni_aesenc, 0>, VEX_4V; 7844f4a2713aSLionel Sambuc defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", 7845f4a2713aSLionel Sambuc int_x86_aesni_aesenclast, 0>, VEX_4V; 7846f4a2713aSLionel Sambuc defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", 7847f4a2713aSLionel Sambuc int_x86_aesni_aesdec, 0>, VEX_4V; 7848f4a2713aSLionel Sambuc defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", 7849f4a2713aSLionel Sambuc int_x86_aesni_aesdeclast, 0>, VEX_4V; 7850f4a2713aSLionel Sambuc} 7851f4a2713aSLionel Sambuc 7852f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { 7853f4a2713aSLionel Sambuc defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", 7854f4a2713aSLionel Sambuc int_x86_aesni_aesenc>; 7855f4a2713aSLionel Sambuc defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", 7856f4a2713aSLionel Sambuc int_x86_aesni_aesenclast>; 7857f4a2713aSLionel Sambuc defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", 7858f4a2713aSLionel Sambuc int_x86_aesni_aesdec>; 7859f4a2713aSLionel Sambuc defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", 7860f4a2713aSLionel Sambuc int_x86_aesni_aesdeclast>; 7861f4a2713aSLionel Sambuc} 7862f4a2713aSLionel Sambuc 7863f4a2713aSLionel Sambuc// Perform the AES InvMixColumn Transformation 7864f4a2713aSLionel Sambuclet Predicates = [HasAVX, HasAES] in { 7865f4a2713aSLionel Sambuc def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 7866f4a2713aSLionel Sambuc (ins VR128:$src1), 7867f4a2713aSLionel Sambuc "vaesimc\t{$src1, $dst|$dst, $src1}", 7868f4a2713aSLionel Sambuc [(set VR128:$dst, 7869*0a6a1f1dSLionel Sambuc (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>, 7870*0a6a1f1dSLionel Sambuc VEX; 7871f4a2713aSLionel Sambuc def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 7872f4a2713aSLionel Sambuc (ins i128mem:$src1), 7873f4a2713aSLionel Sambuc "vaesimc\t{$src1, $dst|$dst, $src1}", 7874f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>, 7875*0a6a1f1dSLionel Sambuc Sched<[WriteAESIMCLd]>, VEX; 7876f4a2713aSLionel Sambuc} 7877f4a2713aSLionel Sambucdef AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 7878f4a2713aSLionel Sambuc (ins VR128:$src1), 7879f4a2713aSLionel Sambuc "aesimc\t{$src1, $dst|$dst, $src1}", 7880f4a2713aSLionel Sambuc [(set VR128:$dst, 7881*0a6a1f1dSLionel Sambuc (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>; 7882f4a2713aSLionel Sambucdef AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 7883f4a2713aSLionel Sambuc (ins i128mem:$src1), 7884f4a2713aSLionel Sambuc "aesimc\t{$src1, $dst|$dst, $src1}", 7885f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>, 7886*0a6a1f1dSLionel Sambuc Sched<[WriteAESIMCLd]>; 7887f4a2713aSLionel Sambuc 7888f4a2713aSLionel Sambuc// AES Round Key Generation Assist 7889f4a2713aSLionel Sambuclet Predicates = [HasAVX, HasAES] in { 7890f4a2713aSLionel Sambuc def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 7891f4a2713aSLionel Sambuc (ins VR128:$src1, i8imm:$src2), 7892f4a2713aSLionel Sambuc "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7893f4a2713aSLionel Sambuc [(set VR128:$dst, 7894f4a2713aSLionel Sambuc (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, 7895*0a6a1f1dSLionel Sambuc Sched<[WriteAESKeyGen]>, VEX; 7896f4a2713aSLionel Sambuc def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 7897f4a2713aSLionel Sambuc (ins i128mem:$src1, i8imm:$src2), 7898f4a2713aSLionel Sambuc "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7899f4a2713aSLionel Sambuc [(set VR128:$dst, 7900f4a2713aSLionel Sambuc (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>, 7901*0a6a1f1dSLionel Sambuc Sched<[WriteAESKeyGenLd]>, VEX; 7902f4a2713aSLionel Sambuc} 7903f4a2713aSLionel Sambucdef AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 7904f4a2713aSLionel Sambuc (ins VR128:$src1, i8imm:$src2), 7905f4a2713aSLionel Sambuc "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7906f4a2713aSLionel Sambuc [(set VR128:$dst, 7907f4a2713aSLionel Sambuc (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, 7908*0a6a1f1dSLionel Sambuc Sched<[WriteAESKeyGen]>; 7909f4a2713aSLionel Sambucdef AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 7910f4a2713aSLionel Sambuc (ins i128mem:$src1, i8imm:$src2), 7911f4a2713aSLionel Sambuc "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7912f4a2713aSLionel Sambuc [(set VR128:$dst, 7913f4a2713aSLionel Sambuc (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>, 7914*0a6a1f1dSLionel Sambuc Sched<[WriteAESKeyGenLd]>; 7915f4a2713aSLionel Sambuc 7916f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 7917f4a2713aSLionel Sambuc// PCLMUL Instructions 7918f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 7919f4a2713aSLionel Sambuc 7920f4a2713aSLionel Sambuc// AVX carry-less Multiplication instructions 7921f4a2713aSLionel Sambucdef VPCLMULQDQrr : AVXPCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 7922f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7923f4a2713aSLionel Sambuc "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7924f4a2713aSLionel Sambuc [(set VR128:$dst, 7925*0a6a1f1dSLionel Sambuc (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>, 7926*0a6a1f1dSLionel Sambuc Sched<[WriteCLMul]>; 7927f4a2713aSLionel Sambuc 7928f4a2713aSLionel Sambucdef VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 7929f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7930f4a2713aSLionel Sambuc "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7931f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, 7932*0a6a1f1dSLionel Sambuc (loadv2i64 addr:$src2), imm:$src3))]>, 7933*0a6a1f1dSLionel Sambuc Sched<[WriteCLMulLd, ReadAfterLd]>; 7934f4a2713aSLionel Sambuc 7935f4a2713aSLionel Sambuc// Carry-less Multiplication instructions 7936f4a2713aSLionel Sambuclet Constraints = "$src1 = $dst" in { 7937f4a2713aSLionel Sambucdef PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 7938f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7939f4a2713aSLionel Sambuc "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7940f4a2713aSLionel Sambuc [(set VR128:$dst, 7941f4a2713aSLionel Sambuc (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))], 7942*0a6a1f1dSLionel Sambuc IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMul]>; 7943f4a2713aSLionel Sambuc 7944f4a2713aSLionel Sambucdef PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 7945f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7946f4a2713aSLionel Sambuc "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7947f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, 7948f4a2713aSLionel Sambuc (memopv2i64 addr:$src2), imm:$src3))], 7949*0a6a1f1dSLionel Sambuc IIC_SSE_PCLMULQDQ_RM>, 7950*0a6a1f1dSLionel Sambuc Sched<[WriteCLMulLd, ReadAfterLd]>; 7951f4a2713aSLionel Sambuc} // Constraints = "$src1 = $dst" 7952f4a2713aSLionel Sambuc 7953f4a2713aSLionel Sambuc 7954f4a2713aSLionel Sambucmulticlass pclmul_alias<string asm, int immop> { 7955f4a2713aSLionel Sambuc def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"), 7956*0a6a1f1dSLionel Sambuc (PCLMULQDQrr VR128:$dst, VR128:$src, immop), 0>; 7957f4a2713aSLionel Sambuc 7958f4a2713aSLionel Sambuc def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"), 7959*0a6a1f1dSLionel Sambuc (PCLMULQDQrm VR128:$dst, i128mem:$src, immop), 0>; 7960f4a2713aSLionel Sambuc 7961f4a2713aSLionel Sambuc def : InstAlias<!strconcat("vpclmul", asm, 7962f4a2713aSLionel Sambuc "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), 7963*0a6a1f1dSLionel Sambuc (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop), 7964*0a6a1f1dSLionel Sambuc 0>; 7965f4a2713aSLionel Sambuc 7966f4a2713aSLionel Sambuc def : InstAlias<!strconcat("vpclmul", asm, 7967f4a2713aSLionel Sambuc "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), 7968*0a6a1f1dSLionel Sambuc (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop), 7969*0a6a1f1dSLionel Sambuc 0>; 7970f4a2713aSLionel Sambuc} 7971f4a2713aSLionel Sambucdefm : pclmul_alias<"hqhq", 0x11>; 7972f4a2713aSLionel Sambucdefm : pclmul_alias<"hqlq", 0x01>; 7973f4a2713aSLionel Sambucdefm : pclmul_alias<"lqhq", 0x10>; 7974f4a2713aSLionel Sambucdefm : pclmul_alias<"lqlq", 0x00>; 7975f4a2713aSLionel Sambuc 7976f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 7977f4a2713aSLionel Sambuc// SSE4A Instructions 7978f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 7979f4a2713aSLionel Sambuc 7980f4a2713aSLionel Sambuclet Predicates = [HasSSE4A] in { 7981f4a2713aSLionel Sambuc 7982f4a2713aSLionel Sambuclet Constraints = "$src = $dst" in { 7983*0a6a1f1dSLionel Sambucdef EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), 7984f4a2713aSLionel Sambuc (ins VR128:$src, i8imm:$len, i8imm:$idx), 7985f4a2713aSLionel Sambuc "extrq\t{$idx, $len, $src|$src, $len, $idx}", 7986f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len, 7987*0a6a1f1dSLionel Sambuc imm:$idx))]>, PD; 7988f4a2713aSLionel Sambucdef EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 7989f4a2713aSLionel Sambuc (ins VR128:$src, VR128:$mask), 7990f4a2713aSLionel Sambuc "extrq\t{$mask, $src|$src, $mask}", 7991f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, 7992*0a6a1f1dSLionel Sambuc VR128:$mask))]>, PD; 7993f4a2713aSLionel Sambuc 7994f4a2713aSLionel Sambucdef INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), 7995f4a2713aSLionel Sambuc (ins VR128:$src, VR128:$src2, i8imm:$len, i8imm:$idx), 7996f4a2713aSLionel Sambuc "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", 7997f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src, 7998f4a2713aSLionel Sambuc VR128:$src2, imm:$len, imm:$idx))]>, XD; 7999f4a2713aSLionel Sambucdef INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 8000f4a2713aSLionel Sambuc (ins VR128:$src, VR128:$mask), 8001f4a2713aSLionel Sambuc "insertq\t{$mask, $src|$src, $mask}", 8002f4a2713aSLionel Sambuc [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, 8003f4a2713aSLionel Sambuc VR128:$mask))]>, XD; 8004f4a2713aSLionel Sambuc} 8005f4a2713aSLionel Sambuc 8006f4a2713aSLionel Sambucdef MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), 8007f4a2713aSLionel Sambuc "movntss\t{$src, $dst|$dst, $src}", 8008f4a2713aSLionel Sambuc [(int_x86_sse4a_movnt_ss addr:$dst, VR128:$src)]>, XS; 8009f4a2713aSLionel Sambuc 8010f4a2713aSLionel Sambucdef MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 8011f4a2713aSLionel Sambuc "movntsd\t{$src, $dst|$dst, $src}", 8012f4a2713aSLionel Sambuc [(int_x86_sse4a_movnt_sd addr:$dst, VR128:$src)]>, XD; 8013f4a2713aSLionel Sambuc} 8014f4a2713aSLionel Sambuc 8015f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8016f4a2713aSLionel Sambuc// AVX Instructions 8017f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8018f4a2713aSLionel Sambuc 8019f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8020f4a2713aSLionel Sambuc// VBROADCAST - Load from memory and broadcast to all elements of the 8021f4a2713aSLionel Sambuc// destination operand 8022f4a2713aSLionel Sambuc// 8023f4a2713aSLionel Sambucclass avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC, 8024*0a6a1f1dSLionel Sambuc X86MemOperand x86memop, Intrinsic Int, SchedWrite Sched> : 8025f4a2713aSLionel Sambuc AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 8026f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8027*0a6a1f1dSLionel Sambuc [(set RC:$dst, (Int addr:$src))]>, Sched<[Sched]>, VEX; 8028*0a6a1f1dSLionel Sambuc 8029*0a6a1f1dSLionel Sambucclass avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC, 8030*0a6a1f1dSLionel Sambuc X86MemOperand x86memop, ValueType VT, 8031*0a6a1f1dSLionel Sambuc PatFrag ld_frag, SchedWrite Sched> : 8032*0a6a1f1dSLionel Sambuc AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 8033*0a6a1f1dSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8034*0a6a1f1dSLionel Sambuc [(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>, 8035*0a6a1f1dSLionel Sambuc Sched<[Sched]>, VEX { 8036*0a6a1f1dSLionel Sambuc let mayLoad = 1; 8037*0a6a1f1dSLionel Sambuc} 8038f4a2713aSLionel Sambuc 8039f4a2713aSLionel Sambuc// AVX2 adds register forms 8040f4a2713aSLionel Sambucclass avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC, 8041*0a6a1f1dSLionel Sambuc Intrinsic Int, SchedWrite Sched> : 8042f4a2713aSLionel Sambuc AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 8043f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8044*0a6a1f1dSLionel Sambuc [(set RC:$dst, (Int VR128:$src))]>, Sched<[Sched]>, VEX; 8045f4a2713aSLionel Sambuc 8046f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in { 8047*0a6a1f1dSLionel Sambuc def VBROADCASTSSrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR128, 8048*0a6a1f1dSLionel Sambuc f32mem, v4f32, loadf32, WriteLoad>; 8049*0a6a1f1dSLionel Sambuc def VBROADCASTSSYrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR256, 8050*0a6a1f1dSLionel Sambuc f32mem, v8f32, loadf32, 8051*0a6a1f1dSLionel Sambuc WriteFShuffleLd>, VEX_L; 8052f4a2713aSLionel Sambuc} 8053f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in 8054*0a6a1f1dSLionel Sambucdef VBROADCASTSDYrm : avx_broadcast_no_int<0x19, "vbroadcastsd", VR256, f64mem, 8055*0a6a1f1dSLionel Sambuc v4f64, loadf64, WriteFShuffleLd>, VEX_L; 8056f4a2713aSLionel Sambucdef VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, 8057*0a6a1f1dSLionel Sambuc int_x86_avx_vbroadcastf128_pd_256, 8058*0a6a1f1dSLionel Sambuc WriteFShuffleLd>, VEX_L; 8059f4a2713aSLionel Sambuc 8060f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in { 8061f4a2713aSLionel Sambuc def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128, 8062*0a6a1f1dSLionel Sambuc int_x86_avx2_vbroadcast_ss_ps, 8063*0a6a1f1dSLionel Sambuc WriteFShuffle>; 8064f4a2713aSLionel Sambuc def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256, 8065*0a6a1f1dSLionel Sambuc int_x86_avx2_vbroadcast_ss_ps_256, 8066*0a6a1f1dSLionel Sambuc WriteFShuffle256>, VEX_L; 8067f4a2713aSLionel Sambuc} 8068f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in 8069f4a2713aSLionel Sambucdef VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, 8070*0a6a1f1dSLionel Sambuc int_x86_avx2_vbroadcast_sd_pd_256, 8071*0a6a1f1dSLionel Sambuc WriteFShuffle256>, VEX_L; 8072f4a2713aSLionel Sambuc 8073f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in 8074f4a2713aSLionel Sambucdef VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, 8075*0a6a1f1dSLionel Sambuc int_x86_avx2_vbroadcasti128, WriteLoad>, 8076*0a6a1f1dSLionel Sambuc VEX_L; 8077f4a2713aSLionel Sambuc 8078f4a2713aSLionel Sambuclet Predicates = [HasAVX] in 8079f4a2713aSLionel Sambucdef : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), 8080f4a2713aSLionel Sambuc (VBROADCASTF128 addr:$src)>; 8081f4a2713aSLionel Sambuc 8082f4a2713aSLionel Sambuc 8083f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8084f4a2713aSLionel Sambuc// VINSERTF128 - Insert packed floating-point values 8085f4a2713aSLionel Sambuc// 8086*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 8087f4a2713aSLionel Sambucdef VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), 8088f4a2713aSLionel Sambuc (ins VR256:$src1, VR128:$src2, i8imm:$src3), 8089f4a2713aSLionel Sambuc "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8090*0a6a1f1dSLionel Sambuc []>, Sched<[WriteFShuffle]>, VEX_4V, VEX_L; 8091f4a2713aSLionel Sambuclet mayLoad = 1 in 8092f4a2713aSLionel Sambucdef VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), 8093f4a2713aSLionel Sambuc (ins VR256:$src1, f128mem:$src2, i8imm:$src3), 8094f4a2713aSLionel Sambuc "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8095*0a6a1f1dSLionel Sambuc []>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L; 8096f4a2713aSLionel Sambuc} 8097f4a2713aSLionel Sambuc 8098f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 8099f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), 8100f4a2713aSLionel Sambuc (iPTR imm)), 8101f4a2713aSLionel Sambuc (VINSERTF128rr VR256:$src1, VR128:$src2, 8102f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8103f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), 8104f4a2713aSLionel Sambuc (iPTR imm)), 8105f4a2713aSLionel Sambuc (VINSERTF128rr VR256:$src1, VR128:$src2, 8106f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8107f4a2713aSLionel Sambuc 8108f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), 8109f4a2713aSLionel Sambuc (iPTR imm)), 8110f4a2713aSLionel Sambuc (VINSERTF128rm VR256:$src1, addr:$src2, 8111f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8112f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), 8113f4a2713aSLionel Sambuc (iPTR imm)), 8114f4a2713aSLionel Sambuc (VINSERTF128rm VR256:$src1, addr:$src2, 8115f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8116f4a2713aSLionel Sambuc} 8117f4a2713aSLionel Sambuc 8118*0a6a1f1dSLionel Sambuc// Combine two consecutive 16-byte loads with a common destination register into 8119*0a6a1f1dSLionel Sambuc// one 32-byte load to that register. 8120*0a6a1f1dSLionel Sambuclet Predicates = [HasAVX, HasFastMem32] in { 8121*0a6a1f1dSLionel Sambuc def : Pat<(insert_subvector 8122*0a6a1f1dSLionel Sambuc (v8f32 (insert_subvector undef, (loadv4f32 addr:$src), (iPTR 0))), 8123*0a6a1f1dSLionel Sambuc (loadv4f32 (add addr:$src, (iPTR 16))), 8124*0a6a1f1dSLionel Sambuc (iPTR 4)), 8125*0a6a1f1dSLionel Sambuc (VMOVUPSYrm addr:$src)>; 8126*0a6a1f1dSLionel Sambuc 8127*0a6a1f1dSLionel Sambuc def : Pat<(insert_subvector 8128*0a6a1f1dSLionel Sambuc (v4f64 (insert_subvector undef, (loadv2f64 addr:$src), (iPTR 0))), 8129*0a6a1f1dSLionel Sambuc (loadv2f64 (add addr:$src, (iPTR 16))), 8130*0a6a1f1dSLionel Sambuc (iPTR 2)), 8131*0a6a1f1dSLionel Sambuc (VMOVUPDYrm addr:$src)>; 8132*0a6a1f1dSLionel Sambuc 8133*0a6a1f1dSLionel Sambuc def : Pat<(insert_subvector 8134*0a6a1f1dSLionel Sambuc (v32i8 (insert_subvector 8135*0a6a1f1dSLionel Sambuc undef, (bc_v16i8 (loadv2i64 addr:$src)), (iPTR 0))), 8136*0a6a1f1dSLionel Sambuc (bc_v16i8 (loadv2i64 (add addr:$src, (iPTR 16)))), 8137*0a6a1f1dSLionel Sambuc (iPTR 16)), 8138*0a6a1f1dSLionel Sambuc (VMOVDQUYrm addr:$src)>; 8139*0a6a1f1dSLionel Sambuc 8140*0a6a1f1dSLionel Sambuc def : Pat<(insert_subvector 8141*0a6a1f1dSLionel Sambuc (v16i16 (insert_subvector 8142*0a6a1f1dSLionel Sambuc undef, (bc_v8i16 (loadv2i64 addr:$src)), (iPTR 0))), 8143*0a6a1f1dSLionel Sambuc (bc_v8i16 (loadv2i64 (add addr:$src, (iPTR 16)))), 8144*0a6a1f1dSLionel Sambuc (iPTR 8)), 8145*0a6a1f1dSLionel Sambuc (VMOVDQUYrm addr:$src)>; 8146*0a6a1f1dSLionel Sambuc 8147*0a6a1f1dSLionel Sambuc def : Pat<(insert_subvector 8148*0a6a1f1dSLionel Sambuc (v8i32 (insert_subvector 8149*0a6a1f1dSLionel Sambuc undef, (bc_v4i32 (loadv2i64 addr:$src)), (iPTR 0))), 8150*0a6a1f1dSLionel Sambuc (bc_v4i32 (loadv2i64 (add addr:$src, (iPTR 16)))), 8151*0a6a1f1dSLionel Sambuc (iPTR 4)), 8152*0a6a1f1dSLionel Sambuc (VMOVDQUYrm addr:$src)>; 8153*0a6a1f1dSLionel Sambuc 8154*0a6a1f1dSLionel Sambuc def : Pat<(insert_subvector 8155*0a6a1f1dSLionel Sambuc (v4i64 (insert_subvector undef, (loadv2i64 addr:$src), (iPTR 0))), 8156*0a6a1f1dSLionel Sambuc (loadv2i64 (add addr:$src, (iPTR 16))), 8157*0a6a1f1dSLionel Sambuc (iPTR 2)), 8158*0a6a1f1dSLionel Sambuc (VMOVDQUYrm addr:$src)>; 8159*0a6a1f1dSLionel Sambuc} 8160*0a6a1f1dSLionel Sambuc 8161f4a2713aSLionel Sambuclet Predicates = [HasAVX1Only] in { 8162f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), 8163f4a2713aSLionel Sambuc (iPTR imm)), 8164f4a2713aSLionel Sambuc (VINSERTF128rr VR256:$src1, VR128:$src2, 8165f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8166f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), 8167f4a2713aSLionel Sambuc (iPTR imm)), 8168f4a2713aSLionel Sambuc (VINSERTF128rr VR256:$src1, VR128:$src2, 8169f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8170f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), 8171f4a2713aSLionel Sambuc (iPTR imm)), 8172f4a2713aSLionel Sambuc (VINSERTF128rr VR256:$src1, VR128:$src2, 8173f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8174f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), 8175f4a2713aSLionel Sambuc (iPTR imm)), 8176f4a2713aSLionel Sambuc (VINSERTF128rr VR256:$src1, VR128:$src2, 8177f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8178f4a2713aSLionel Sambuc 8179f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), 8180f4a2713aSLionel Sambuc (iPTR imm)), 8181f4a2713aSLionel Sambuc (VINSERTF128rm VR256:$src1, addr:$src2, 8182f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8183f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), 8184f4a2713aSLionel Sambuc (bc_v4i32 (loadv2i64 addr:$src2)), 8185f4a2713aSLionel Sambuc (iPTR imm)), 8186f4a2713aSLionel Sambuc (VINSERTF128rm VR256:$src1, addr:$src2, 8187f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8188f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), 8189f4a2713aSLionel Sambuc (bc_v16i8 (loadv2i64 addr:$src2)), 8190f4a2713aSLionel Sambuc (iPTR imm)), 8191f4a2713aSLionel Sambuc (VINSERTF128rm VR256:$src1, addr:$src2, 8192f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8193f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), 8194f4a2713aSLionel Sambuc (bc_v8i16 (loadv2i64 addr:$src2)), 8195f4a2713aSLionel Sambuc (iPTR imm)), 8196f4a2713aSLionel Sambuc (VINSERTF128rm VR256:$src1, addr:$src2, 8197f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8198f4a2713aSLionel Sambuc} 8199f4a2713aSLionel Sambuc 8200f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8201f4a2713aSLionel Sambuc// VEXTRACTF128 - Extract packed floating-point values 8202f4a2713aSLionel Sambuc// 8203*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 8204f4a2713aSLionel Sambucdef VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), 8205f4a2713aSLionel Sambuc (ins VR256:$src1, i8imm:$src2), 8206f4a2713aSLionel Sambuc "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8207*0a6a1f1dSLionel Sambuc []>, Sched<[WriteFShuffle]>, VEX, VEX_L; 8208f4a2713aSLionel Sambuclet mayStore = 1 in 8209f4a2713aSLionel Sambucdef VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), 8210f4a2713aSLionel Sambuc (ins f128mem:$dst, VR256:$src1, i8imm:$src2), 8211f4a2713aSLionel Sambuc "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8212*0a6a1f1dSLionel Sambuc []>, Sched<[WriteStore]>, VEX, VEX_L; 8213f4a2713aSLionel Sambuc} 8214f4a2713aSLionel Sambuc 8215f4a2713aSLionel Sambuc// AVX1 patterns 8216f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 8217f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8218f4a2713aSLionel Sambuc (v4f32 (VEXTRACTF128rr 8219f4a2713aSLionel Sambuc (v8f32 VR256:$src1), 8220f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8221f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8222f4a2713aSLionel Sambuc (v2f64 (VEXTRACTF128rr 8223f4a2713aSLionel Sambuc (v4f64 VR256:$src1), 8224f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8225f4a2713aSLionel Sambuc 8226f4a2713aSLionel Sambucdef : Pat<(store (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1), 8227f4a2713aSLionel Sambuc (iPTR imm))), addr:$dst), 8228f4a2713aSLionel Sambuc (VEXTRACTF128mr addr:$dst, VR256:$src1, 8229f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext))>; 8230f4a2713aSLionel Sambucdef : Pat<(store (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1), 8231f4a2713aSLionel Sambuc (iPTR imm))), addr:$dst), 8232f4a2713aSLionel Sambuc (VEXTRACTF128mr addr:$dst, VR256:$src1, 8233f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext))>; 8234f4a2713aSLionel Sambuc} 8235f4a2713aSLionel Sambuc 8236f4a2713aSLionel Sambuclet Predicates = [HasAVX1Only] in { 8237f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8238f4a2713aSLionel Sambuc (v2i64 (VEXTRACTF128rr 8239f4a2713aSLionel Sambuc (v4i64 VR256:$src1), 8240f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8241f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8242f4a2713aSLionel Sambuc (v4i32 (VEXTRACTF128rr 8243f4a2713aSLionel Sambuc (v8i32 VR256:$src1), 8244f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8245f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8246f4a2713aSLionel Sambuc (v8i16 (VEXTRACTF128rr 8247f4a2713aSLionel Sambuc (v16i16 VR256:$src1), 8248f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8249f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8250f4a2713aSLionel Sambuc (v16i8 (VEXTRACTF128rr 8251f4a2713aSLionel Sambuc (v32i8 VR256:$src1), 8252f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8253f4a2713aSLionel Sambuc 8254f4a2713aSLionel Sambucdef : Pat<(alignedstore (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1), 8255f4a2713aSLionel Sambuc (iPTR imm))), addr:$dst), 8256f4a2713aSLionel Sambuc (VEXTRACTF128mr addr:$dst, VR256:$src1, 8257f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext))>; 8258f4a2713aSLionel Sambucdef : Pat<(alignedstore (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1), 8259f4a2713aSLionel Sambuc (iPTR imm))), addr:$dst), 8260f4a2713aSLionel Sambuc (VEXTRACTF128mr addr:$dst, VR256:$src1, 8261f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext))>; 8262f4a2713aSLionel Sambucdef : Pat<(alignedstore (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1), 8263f4a2713aSLionel Sambuc (iPTR imm))), addr:$dst), 8264f4a2713aSLionel Sambuc (VEXTRACTF128mr addr:$dst, VR256:$src1, 8265f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext))>; 8266f4a2713aSLionel Sambucdef : Pat<(alignedstore (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1), 8267f4a2713aSLionel Sambuc (iPTR imm))), addr:$dst), 8268f4a2713aSLionel Sambuc (VEXTRACTF128mr addr:$dst, VR256:$src1, 8269f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext))>; 8270f4a2713aSLionel Sambuc} 8271f4a2713aSLionel Sambuc 8272f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8273f4a2713aSLionel Sambuc// VMASKMOV - Conditional SIMD Packed Loads and Stores 8274f4a2713aSLionel Sambuc// 8275f4a2713aSLionel Sambucmulticlass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, 8276f4a2713aSLionel Sambuc Intrinsic IntLd, Intrinsic IntLd256, 8277f4a2713aSLionel Sambuc Intrinsic IntSt, Intrinsic IntSt256> { 8278f4a2713aSLionel Sambuc def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), 8279f4a2713aSLionel Sambuc (ins VR128:$src1, f128mem:$src2), 8280f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8281f4a2713aSLionel Sambuc [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, 8282f4a2713aSLionel Sambuc VEX_4V; 8283f4a2713aSLionel Sambuc def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), 8284f4a2713aSLionel Sambuc (ins VR256:$src1, f256mem:$src2), 8285f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8286f4a2713aSLionel Sambuc [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 8287f4a2713aSLionel Sambuc VEX_4V, VEX_L; 8288f4a2713aSLionel Sambuc def mr : AVX8I<opc_mr, MRMDestMem, (outs), 8289f4a2713aSLionel Sambuc (ins f128mem:$dst, VR128:$src1, VR128:$src2), 8290f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8291f4a2713aSLionel Sambuc [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; 8292f4a2713aSLionel Sambuc def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), 8293f4a2713aSLionel Sambuc (ins f256mem:$dst, VR256:$src1, VR256:$src2), 8294f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8295f4a2713aSLionel Sambuc [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L; 8296f4a2713aSLionel Sambuc} 8297f4a2713aSLionel Sambuc 8298f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in 8299f4a2713aSLionel Sambucdefm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", 8300f4a2713aSLionel Sambuc int_x86_avx_maskload_ps, 8301f4a2713aSLionel Sambuc int_x86_avx_maskload_ps_256, 8302f4a2713aSLionel Sambuc int_x86_avx_maskstore_ps, 8303f4a2713aSLionel Sambuc int_x86_avx_maskstore_ps_256>; 8304f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in 8305f4a2713aSLionel Sambucdefm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", 8306f4a2713aSLionel Sambuc int_x86_avx_maskload_pd, 8307f4a2713aSLionel Sambuc int_x86_avx_maskload_pd_256, 8308f4a2713aSLionel Sambuc int_x86_avx_maskstore_pd, 8309f4a2713aSLionel Sambuc int_x86_avx_maskstore_pd_256>; 8310f4a2713aSLionel Sambuc 8311f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8312f4a2713aSLionel Sambuc// VPERMIL - Permute Single and Double Floating-Point Values 8313f4a2713aSLionel Sambuc// 8314f4a2713aSLionel Sambucmulticlass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, 8315f4a2713aSLionel Sambuc RegisterClass RC, X86MemOperand x86memop_f, 8316f4a2713aSLionel Sambuc X86MemOperand x86memop_i, PatFrag i_frag, 8317f4a2713aSLionel Sambuc Intrinsic IntVar, ValueType vt> { 8318f4a2713aSLionel Sambuc def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), 8319f4a2713aSLionel Sambuc (ins RC:$src1, RC:$src2), 8320f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8321*0a6a1f1dSLionel Sambuc [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V, 8322*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffle]>; 8323f4a2713aSLionel Sambuc def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), 8324f4a2713aSLionel Sambuc (ins RC:$src1, x86memop_i:$src2), 8325f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8326f4a2713aSLionel Sambuc [(set RC:$dst, (IntVar RC:$src1, 8327*0a6a1f1dSLionel Sambuc (bitconvert (i_frag addr:$src2))))]>, VEX_4V, 8328*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffleLd, ReadAfterLd]>; 8329f4a2713aSLionel Sambuc 8330f4a2713aSLionel Sambuc def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), 8331f4a2713aSLionel Sambuc (ins RC:$src1, i8imm:$src2), 8332f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8333*0a6a1f1dSLionel Sambuc [(set RC:$dst, (vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX, 8334*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffle]>; 8335f4a2713aSLionel Sambuc def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), 8336f4a2713aSLionel Sambuc (ins x86memop_f:$src1, i8imm:$src2), 8337f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8338f4a2713aSLionel Sambuc [(set RC:$dst, 8339*0a6a1f1dSLionel Sambuc (vt (X86VPermilpi (memop addr:$src1), (i8 imm:$src2))))]>, VEX, 8340*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffleLd]>; 8341f4a2713aSLionel Sambuc} 8342f4a2713aSLionel Sambuc 8343f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in { 8344f4a2713aSLionel Sambuc defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, 8345f4a2713aSLionel Sambuc loadv2i64, int_x86_avx_vpermilvar_ps, v4f32>; 8346f4a2713aSLionel Sambuc defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, 8347f4a2713aSLionel Sambuc loadv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L; 8348f4a2713aSLionel Sambuc} 8349f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in { 8350f4a2713aSLionel Sambuc defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, 8351f4a2713aSLionel Sambuc loadv2i64, int_x86_avx_vpermilvar_pd, v2f64>; 8352f4a2713aSLionel Sambuc defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, 8353f4a2713aSLionel Sambuc loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L; 8354f4a2713aSLionel Sambuc} 8355f4a2713aSLionel Sambuc 8356f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 8357*0a6a1f1dSLionel Sambucdef : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))), 8358*0a6a1f1dSLionel Sambuc (VPERMILPSYrr VR256:$src1, VR256:$src2)>; 8359*0a6a1f1dSLionel Sambucdef : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))), 8360*0a6a1f1dSLionel Sambuc (VPERMILPSYrm VR256:$src1, addr:$src2)>; 8361*0a6a1f1dSLionel Sambucdef : Pat<(v4f64 (X86VPermilpv VR256:$src1, (v4i64 VR256:$src2))), 8362*0a6a1f1dSLionel Sambuc (VPERMILPDYrr VR256:$src1, VR256:$src2)>; 8363*0a6a1f1dSLionel Sambucdef : Pat<(v4f64 (X86VPermilpv VR256:$src1, (loadv4i64 addr:$src2))), 8364*0a6a1f1dSLionel Sambuc (VPERMILPDYrm VR256:$src1, addr:$src2)>; 8365*0a6a1f1dSLionel Sambuc 8366*0a6a1f1dSLionel Sambucdef : Pat<(v8i32 (X86VPermilpi VR256:$src1, (i8 imm:$imm))), 8367f4a2713aSLionel Sambuc (VPERMILPSYri VR256:$src1, imm:$imm)>; 8368*0a6a1f1dSLionel Sambucdef : Pat<(v4i64 (X86VPermilpi VR256:$src1, (i8 imm:$imm))), 8369f4a2713aSLionel Sambuc (VPERMILPDYri VR256:$src1, imm:$imm)>; 8370*0a6a1f1dSLionel Sambucdef : Pat<(v8i32 (X86VPermilpi (bc_v8i32 (loadv4i64 addr:$src1)), 8371f4a2713aSLionel Sambuc (i8 imm:$imm))), 8372f4a2713aSLionel Sambuc (VPERMILPSYmi addr:$src1, imm:$imm)>; 8373*0a6a1f1dSLionel Sambucdef : Pat<(v4i64 (X86VPermilpi (loadv4i64 addr:$src1), (i8 imm:$imm))), 8374f4a2713aSLionel Sambuc (VPERMILPDYmi addr:$src1, imm:$imm)>; 8375f4a2713aSLionel Sambuc 8376*0a6a1f1dSLionel Sambucdef : Pat<(v4f32 (X86VPermilpv VR128:$src1, (v4i32 VR128:$src2))), 8377*0a6a1f1dSLionel Sambuc (VPERMILPSrr VR128:$src1, VR128:$src2)>; 8378*0a6a1f1dSLionel Sambucdef : Pat<(v4f32 (X86VPermilpv VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)))), 8379*0a6a1f1dSLionel Sambuc (VPERMILPSrm VR128:$src1, addr:$src2)>; 8380*0a6a1f1dSLionel Sambucdef : Pat<(v2f64 (X86VPermilpv VR128:$src1, (v2i64 VR128:$src2))), 8381*0a6a1f1dSLionel Sambuc (VPERMILPDrr VR128:$src1, VR128:$src2)>; 8382*0a6a1f1dSLionel Sambucdef : Pat<(v2f64 (X86VPermilpv VR128:$src1, (loadv2i64 addr:$src2))), 8383*0a6a1f1dSLionel Sambuc (VPERMILPDrm VR128:$src1, addr:$src2)>; 8384*0a6a1f1dSLionel Sambuc 8385*0a6a1f1dSLionel Sambucdef : Pat<(v2i64 (X86VPermilpi VR128:$src1, (i8 imm:$imm))), 8386f4a2713aSLionel Sambuc (VPERMILPDri VR128:$src1, imm:$imm)>; 8387*0a6a1f1dSLionel Sambucdef : Pat<(v2i64 (X86VPermilpi (loadv2i64 addr:$src1), (i8 imm:$imm))), 8388f4a2713aSLionel Sambuc (VPERMILPDmi addr:$src1, imm:$imm)>; 8389f4a2713aSLionel Sambuc} 8390f4a2713aSLionel Sambuc 8391f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8392f4a2713aSLionel Sambuc// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks 8393f4a2713aSLionel Sambuc// 8394f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in { 8395f4a2713aSLionel Sambucdef VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), 8396f4a2713aSLionel Sambuc (ins VR256:$src1, VR256:$src2, i8imm:$src3), 8397f4a2713aSLionel Sambuc "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8398f4a2713aSLionel Sambuc [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, 8399*0a6a1f1dSLionel Sambuc (i8 imm:$src3))))]>, VEX_4V, VEX_L, 8400*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffle]>; 8401f4a2713aSLionel Sambucdef VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), 8402f4a2713aSLionel Sambuc (ins VR256:$src1, f256mem:$src2, i8imm:$src3), 8403f4a2713aSLionel Sambuc "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8404f4a2713aSLionel Sambuc [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv8f32 addr:$src2), 8405*0a6a1f1dSLionel Sambuc (i8 imm:$src3)))]>, VEX_4V, VEX_L, 8406*0a6a1f1dSLionel Sambuc Sched<[WriteFShuffleLd, ReadAfterLd]>; 8407f4a2713aSLionel Sambuc} 8408f4a2713aSLionel Sambuc 8409f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 8410f4a2713aSLionel Sambucdef : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8411f4a2713aSLionel Sambuc (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8412f4a2713aSLionel Sambucdef : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, 8413f4a2713aSLionel Sambuc (loadv4f64 addr:$src2), (i8 imm:$imm))), 8414f4a2713aSLionel Sambuc (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8415f4a2713aSLionel Sambuc} 8416f4a2713aSLionel Sambuc 8417f4a2713aSLionel Sambuclet Predicates = [HasAVX1Only] in { 8418f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8419f4a2713aSLionel Sambuc (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8420f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8421f4a2713aSLionel Sambuc (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8422f4a2713aSLionel Sambucdef : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8423f4a2713aSLionel Sambuc (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8424f4a2713aSLionel Sambucdef : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8425f4a2713aSLionel Sambuc (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8426f4a2713aSLionel Sambuc 8427f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, 8428f4a2713aSLionel Sambuc (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 8429f4a2713aSLionel Sambuc (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8430f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, 8431f4a2713aSLionel Sambuc (loadv4i64 addr:$src2), (i8 imm:$imm))), 8432f4a2713aSLionel Sambuc (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8433f4a2713aSLionel Sambucdef : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, 8434f4a2713aSLionel Sambuc (bc_v32i8 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 8435f4a2713aSLionel Sambuc (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8436f4a2713aSLionel Sambucdef : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, 8437f4a2713aSLionel Sambuc (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 8438f4a2713aSLionel Sambuc (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8439f4a2713aSLionel Sambuc} 8440f4a2713aSLionel Sambuc 8441f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8442f4a2713aSLionel Sambuc// VZERO - Zero YMM registers 8443f4a2713aSLionel Sambuc// 8444f4a2713aSLionel Sambuclet Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, 8445f4a2713aSLionel Sambuc YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { 8446f4a2713aSLionel Sambuc // Zero All YMM registers 8447f4a2713aSLionel Sambuc def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", 8448*0a6a1f1dSLionel Sambuc [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, Requires<[HasAVX]>; 8449f4a2713aSLionel Sambuc 8450f4a2713aSLionel Sambuc // Zero Upper bits of YMM registers 8451f4a2713aSLionel Sambuc def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", 8452*0a6a1f1dSLionel Sambuc [(int_x86_avx_vzeroupper)]>, PS, VEX, Requires<[HasAVX]>; 8453f4a2713aSLionel Sambuc} 8454f4a2713aSLionel Sambuc 8455f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8456f4a2713aSLionel Sambuc// Half precision conversion instructions 8457f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8458f4a2713aSLionel Sambucmulticlass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { 8459f4a2713aSLionel Sambuc def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 8460f4a2713aSLionel Sambuc "vcvtph2ps\t{$src, $dst|$dst, $src}", 8461f4a2713aSLionel Sambuc [(set RC:$dst, (Int VR128:$src))]>, 8462*0a6a1f1dSLionel Sambuc T8PD, VEX, Sched<[WriteCvtF2F]>; 8463*0a6a1f1dSLionel Sambuc let hasSideEffects = 0, mayLoad = 1 in 8464f4a2713aSLionel Sambuc def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 8465*0a6a1f1dSLionel Sambuc "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8PD, VEX, 8466*0a6a1f1dSLionel Sambuc Sched<[WriteCvtF2FLd]>; 8467f4a2713aSLionel Sambuc} 8468f4a2713aSLionel Sambuc 8469f4a2713aSLionel Sambucmulticlass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { 8470f4a2713aSLionel Sambuc def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), 8471f4a2713aSLionel Sambuc (ins RC:$src1, i32i8imm:$src2), 8472f4a2713aSLionel Sambuc "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8473f4a2713aSLionel Sambuc [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>, 8474*0a6a1f1dSLionel Sambuc TAPD, VEX, Sched<[WriteCvtF2F]>; 8475*0a6a1f1dSLionel Sambuc let hasSideEffects = 0, mayStore = 1, 8476*0a6a1f1dSLionel Sambuc SchedRW = [WriteCvtF2FLd, WriteRMW] in 8477f4a2713aSLionel Sambuc def mr : Ii8<0x1D, MRMDestMem, (outs), 8478f4a2713aSLionel Sambuc (ins x86memop:$dst, RC:$src1, i32i8imm:$src2), 8479f4a2713aSLionel Sambuc "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8480*0a6a1f1dSLionel Sambuc TAPD, VEX; 8481f4a2713aSLionel Sambuc} 8482f4a2713aSLionel Sambuc 8483f4a2713aSLionel Sambuclet Predicates = [HasF16C] in { 8484f4a2713aSLionel Sambuc defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>; 8485f4a2713aSLionel Sambuc defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L; 8486f4a2713aSLionel Sambuc defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>; 8487f4a2713aSLionel Sambuc defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L; 8488*0a6a1f1dSLionel Sambuc 8489*0a6a1f1dSLionel Sambuc // Pattern match vcvtph2ps of a scalar i64 load. 8490*0a6a1f1dSLionel Sambuc def : Pat<(int_x86_vcvtph2ps_128 (vzmovl_v2i64 addr:$src)), 8491*0a6a1f1dSLionel Sambuc (VCVTPH2PSrm addr:$src)>; 8492*0a6a1f1dSLionel Sambuc def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)), 8493*0a6a1f1dSLionel Sambuc (VCVTPH2PSrm addr:$src)>; 8494*0a6a1f1dSLionel Sambuc} 8495*0a6a1f1dSLionel Sambuc 8496*0a6a1f1dSLionel Sambuc// Patterns for matching conversions from float to half-float and vice versa. 8497*0a6a1f1dSLionel Sambuclet Predicates = [HasF16C] in { 8498*0a6a1f1dSLionel Sambuc def : Pat<(fp_to_f16 FR32:$src), 8499*0a6a1f1dSLionel Sambuc (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr 8500*0a6a1f1dSLionel Sambuc (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>; 8501*0a6a1f1dSLionel Sambuc 8502*0a6a1f1dSLionel Sambuc def : Pat<(f16_to_fp GR16:$src), 8503*0a6a1f1dSLionel Sambuc (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr 8504*0a6a1f1dSLionel Sambuc (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)), FR32)) >; 8505*0a6a1f1dSLionel Sambuc 8506*0a6a1f1dSLionel Sambuc def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))), 8507*0a6a1f1dSLionel Sambuc (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr 8508*0a6a1f1dSLionel Sambuc (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >; 8509f4a2713aSLionel Sambuc} 8510f4a2713aSLionel Sambuc 8511f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8512f4a2713aSLionel Sambuc// AVX2 Instructions 8513f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8514f4a2713aSLionel Sambuc 8515f4a2713aSLionel Sambuc/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate 8516f4a2713aSLionel Sambucmulticlass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr, 8517f4a2713aSLionel Sambuc Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 8518f4a2713aSLionel Sambuc X86MemOperand x86memop> { 8519f4a2713aSLionel Sambuc let isCommutable = 1 in 8520f4a2713aSLionel Sambuc def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst), 8521*0a6a1f1dSLionel Sambuc (ins RC:$src1, RC:$src2, i8imm:$src3), 8522f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 8523f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 8524f4a2713aSLionel Sambuc [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, 8525*0a6a1f1dSLionel Sambuc Sched<[WriteBlend]>, VEX_4V; 8526f4a2713aSLionel Sambuc def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), 8527*0a6a1f1dSLionel Sambuc (ins RC:$src1, x86memop:$src2, i8imm:$src3), 8528f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 8529f4a2713aSLionel Sambuc "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 8530f4a2713aSLionel Sambuc [(set RC:$dst, 8531f4a2713aSLionel Sambuc (IntId RC:$src1, 8532f4a2713aSLionel Sambuc (bitconvert (memop_frag addr:$src2)), imm:$src3))]>, 8533*0a6a1f1dSLionel Sambuc Sched<[WriteBlendLd, ReadAfterLd]>, VEX_4V; 8534f4a2713aSLionel Sambuc} 8535f4a2713aSLionel Sambuc 8536f4a2713aSLionel Sambucdefm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, 8537f4a2713aSLionel Sambuc VR128, loadv2i64, i128mem>; 8538f4a2713aSLionel Sambucdefm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, 8539f4a2713aSLionel Sambuc VR256, loadv4i64, i256mem>, VEX_L; 8540f4a2713aSLionel Sambuc 8541f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), 8542f4a2713aSLionel Sambuc imm:$mask)), 8543f4a2713aSLionel Sambuc (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>; 8544f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), 8545f4a2713aSLionel Sambuc imm:$mask)), 8546f4a2713aSLionel Sambuc (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>; 8547f4a2713aSLionel Sambuc 8548f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8549f4a2713aSLionel Sambuc// VPBROADCAST - Load from memory and broadcast to all elements of the 8550f4a2713aSLionel Sambuc// destination operand 8551f4a2713aSLionel Sambuc// 8552f4a2713aSLionel Sambucmulticlass avx2_broadcast<bits<8> opc, string OpcodeStr, 8553f4a2713aSLionel Sambuc X86MemOperand x86memop, PatFrag ld_frag, 8554f4a2713aSLionel Sambuc Intrinsic Int128, Intrinsic Int256> { 8555f4a2713aSLionel Sambuc def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 8556f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8557*0a6a1f1dSLionel Sambuc [(set VR128:$dst, (Int128 VR128:$src))]>, 8558*0a6a1f1dSLionel Sambuc Sched<[WriteShuffle]>, VEX; 8559f4a2713aSLionel Sambuc def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 8560f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8561f4a2713aSLionel Sambuc [(set VR128:$dst, 8562*0a6a1f1dSLionel Sambuc (Int128 (scalar_to_vector (ld_frag addr:$src))))]>, 8563*0a6a1f1dSLionel Sambuc Sched<[WriteLoad]>, VEX; 8564f4a2713aSLionel Sambuc def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 8565f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8566*0a6a1f1dSLionel Sambuc [(set VR256:$dst, (Int256 VR128:$src))]>, 8567*0a6a1f1dSLionel Sambuc Sched<[WriteShuffle256]>, VEX, VEX_L; 8568f4a2713aSLionel Sambuc def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src), 8569f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8570f4a2713aSLionel Sambuc [(set VR256:$dst, 8571f4a2713aSLionel Sambuc (Int256 (scalar_to_vector (ld_frag addr:$src))))]>, 8572*0a6a1f1dSLionel Sambuc Sched<[WriteLoad]>, VEX, VEX_L; 8573f4a2713aSLionel Sambuc} 8574f4a2713aSLionel Sambuc 8575f4a2713aSLionel Sambucdefm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, 8576f4a2713aSLionel Sambuc int_x86_avx2_pbroadcastb_128, 8577f4a2713aSLionel Sambuc int_x86_avx2_pbroadcastb_256>; 8578f4a2713aSLionel Sambucdefm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16, 8579f4a2713aSLionel Sambuc int_x86_avx2_pbroadcastw_128, 8580f4a2713aSLionel Sambuc int_x86_avx2_pbroadcastw_256>; 8581f4a2713aSLionel Sambucdefm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, 8582f4a2713aSLionel Sambuc int_x86_avx2_pbroadcastd_128, 8583f4a2713aSLionel Sambuc int_x86_avx2_pbroadcastd_256>; 8584f4a2713aSLionel Sambucdefm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, 8585f4a2713aSLionel Sambuc int_x86_avx2_pbroadcastq_128, 8586f4a2713aSLionel Sambuc int_x86_avx2_pbroadcastq_256>; 8587f4a2713aSLionel Sambuc 8588f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 8589f4a2713aSLionel Sambuc def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))), 8590f4a2713aSLionel Sambuc (VPBROADCASTBrm addr:$src)>; 8591f4a2713aSLionel Sambuc def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))), 8592f4a2713aSLionel Sambuc (VPBROADCASTBYrm addr:$src)>; 8593f4a2713aSLionel Sambuc def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), 8594f4a2713aSLionel Sambuc (VPBROADCASTWrm addr:$src)>; 8595f4a2713aSLionel Sambuc def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), 8596f4a2713aSLionel Sambuc (VPBROADCASTWYrm addr:$src)>; 8597f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 8598f4a2713aSLionel Sambuc (VPBROADCASTDrm addr:$src)>; 8599f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 8600f4a2713aSLionel Sambuc (VPBROADCASTDYrm addr:$src)>; 8601f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))), 8602f4a2713aSLionel Sambuc (VPBROADCASTQrm addr:$src)>; 8603f4a2713aSLionel Sambuc def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), 8604f4a2713aSLionel Sambuc (VPBROADCASTQYrm addr:$src)>; 8605f4a2713aSLionel Sambuc 8606f4a2713aSLionel Sambuc def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))), 8607f4a2713aSLionel Sambuc (VPBROADCASTBrr VR128:$src)>; 8608f4a2713aSLionel Sambuc def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))), 8609f4a2713aSLionel Sambuc (VPBROADCASTBYrr VR128:$src)>; 8610f4a2713aSLionel Sambuc def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))), 8611f4a2713aSLionel Sambuc (VPBROADCASTWrr VR128:$src)>; 8612f4a2713aSLionel Sambuc def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))), 8613f4a2713aSLionel Sambuc (VPBROADCASTWYrr VR128:$src)>; 8614f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))), 8615f4a2713aSLionel Sambuc (VPBROADCASTDrr VR128:$src)>; 8616f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))), 8617f4a2713aSLionel Sambuc (VPBROADCASTDYrr VR128:$src)>; 8618f4a2713aSLionel Sambuc def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))), 8619f4a2713aSLionel Sambuc (VPBROADCASTQrr VR128:$src)>; 8620f4a2713aSLionel Sambuc def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))), 8621f4a2713aSLionel Sambuc (VPBROADCASTQYrr VR128:$src)>; 8622f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))), 8623f4a2713aSLionel Sambuc (VBROADCASTSSrr VR128:$src)>; 8624f4a2713aSLionel Sambuc def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))), 8625f4a2713aSLionel Sambuc (VBROADCASTSSYrr VR128:$src)>; 8626f4a2713aSLionel Sambuc def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))), 8627f4a2713aSLionel Sambuc (VPBROADCASTQrr VR128:$src)>; 8628f4a2713aSLionel Sambuc def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))), 8629f4a2713aSLionel Sambuc (VBROADCASTSDYrr VR128:$src)>; 8630f4a2713aSLionel Sambuc 8631*0a6a1f1dSLionel Sambuc // Provide aliases for broadcast from the same regitser class that 8632*0a6a1f1dSLionel Sambuc // automatically does the extract. 8633*0a6a1f1dSLionel Sambuc def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))), 8634*0a6a1f1dSLionel Sambuc (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), 8635*0a6a1f1dSLionel Sambuc sub_xmm)))>; 8636*0a6a1f1dSLionel Sambuc def : Pat<(v16i16 (X86VBroadcast (v16i16 VR256:$src))), 8637*0a6a1f1dSLionel Sambuc (VPBROADCASTWYrr (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), 8638*0a6a1f1dSLionel Sambuc sub_xmm)))>; 8639*0a6a1f1dSLionel Sambuc def : Pat<(v8i32 (X86VBroadcast (v8i32 VR256:$src))), 8640*0a6a1f1dSLionel Sambuc (VPBROADCASTDYrr (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), 8641*0a6a1f1dSLionel Sambuc sub_xmm)))>; 8642*0a6a1f1dSLionel Sambuc def : Pat<(v4i64 (X86VBroadcast (v4i64 VR256:$src))), 8643*0a6a1f1dSLionel Sambuc (VPBROADCASTQYrr (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), 8644*0a6a1f1dSLionel Sambuc sub_xmm)))>; 8645*0a6a1f1dSLionel Sambuc def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))), 8646*0a6a1f1dSLionel Sambuc (VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), 8647*0a6a1f1dSLionel Sambuc sub_xmm)))>; 8648*0a6a1f1dSLionel Sambuc def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256:$src))), 8649*0a6a1f1dSLionel Sambuc (VBROADCASTSDYrr (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), 8650*0a6a1f1dSLionel Sambuc sub_xmm)))>; 8651*0a6a1f1dSLionel Sambuc 8652f4a2713aSLionel Sambuc // Provide fallback in case the load node that is used in the patterns above 8653f4a2713aSLionel Sambuc // is used by additional users, which prevents the pattern selection. 8654f4a2713aSLionel Sambuc let AddedComplexity = 20 in { 8655f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 8656f4a2713aSLionel Sambuc (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; 8657f4a2713aSLionel Sambuc def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 8658f4a2713aSLionel Sambuc (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>; 8659f4a2713aSLionel Sambuc def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 8660f4a2713aSLionel Sambuc (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>; 8661f4a2713aSLionel Sambuc 8662f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 8663f4a2713aSLionel Sambuc (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>; 8664f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 8665f4a2713aSLionel Sambuc (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>; 8666f4a2713aSLionel Sambuc def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 8667f4a2713aSLionel Sambuc (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>; 8668*0a6a1f1dSLionel Sambuc 8669*0a6a1f1dSLionel Sambuc def : Pat<(v16i8 (X86VBroadcast GR8:$src)), 8670*0a6a1f1dSLionel Sambuc (VPBROADCASTBrr (COPY_TO_REGCLASS 8671*0a6a1f1dSLionel Sambuc (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), 8672*0a6a1f1dSLionel Sambuc VR128))>; 8673*0a6a1f1dSLionel Sambuc def : Pat<(v32i8 (X86VBroadcast GR8:$src)), 8674*0a6a1f1dSLionel Sambuc (VPBROADCASTBYrr (COPY_TO_REGCLASS 8675*0a6a1f1dSLionel Sambuc (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), 8676*0a6a1f1dSLionel Sambuc VR128))>; 8677*0a6a1f1dSLionel Sambuc 8678*0a6a1f1dSLionel Sambuc def : Pat<(v8i16 (X86VBroadcast GR16:$src)), 8679*0a6a1f1dSLionel Sambuc (VPBROADCASTWrr (COPY_TO_REGCLASS 8680*0a6a1f1dSLionel Sambuc (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)), 8681*0a6a1f1dSLionel Sambuc VR128))>; 8682*0a6a1f1dSLionel Sambuc def : Pat<(v16i16 (X86VBroadcast GR16:$src)), 8683*0a6a1f1dSLionel Sambuc (VPBROADCASTWYrr (COPY_TO_REGCLASS 8684*0a6a1f1dSLionel Sambuc (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)), 8685*0a6a1f1dSLionel Sambuc VR128))>; 8686*0a6a1f1dSLionel Sambuc 8687*0a6a1f1dSLionel Sambuc // The patterns for VPBROADCASTD are not needed because they would match 8688*0a6a1f1dSLionel Sambuc // the exact same thing as VBROADCASTSS patterns. 8689*0a6a1f1dSLionel Sambuc 8690*0a6a1f1dSLionel Sambuc def : Pat<(v2i64 (X86VBroadcast GR64:$src)), 8691*0a6a1f1dSLionel Sambuc (VPBROADCASTQrr (COPY_TO_REGCLASS GR64:$src, VR128))>; 8692*0a6a1f1dSLionel Sambuc // The v4i64 pattern is not needed because VBROADCASTSDYrr already match. 8693f4a2713aSLionel Sambuc } 8694f4a2713aSLionel Sambuc} 8695f4a2713aSLionel Sambuc 8696f4a2713aSLionel Sambuc// AVX1 broadcast patterns 8697f4a2713aSLionel Sambuclet Predicates = [HasAVX1Only] in { 8698f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 8699f4a2713aSLionel Sambuc (VBROADCASTSSYrm addr:$src)>; 8700f4a2713aSLionel Sambucdef : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), 8701f4a2713aSLionel Sambuc (VBROADCASTSDYrm addr:$src)>; 8702f4a2713aSLionel Sambucdef : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 8703f4a2713aSLionel Sambuc (VBROADCASTSSrm addr:$src)>; 8704f4a2713aSLionel Sambuc} 8705f4a2713aSLionel Sambuc 8706f4a2713aSLionel Sambuclet Predicates = [HasAVX] in { 8707f4a2713aSLionel Sambuc // Provide fallback in case the load node that is used in the patterns above 8708f4a2713aSLionel Sambuc // is used by additional users, which prevents the pattern selection. 8709f4a2713aSLionel Sambuc let AddedComplexity = 20 in { 8710f4a2713aSLionel Sambuc // 128bit broadcasts: 8711f4a2713aSLionel Sambuc def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 8712f4a2713aSLionel Sambuc (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>; 8713f4a2713aSLionel Sambuc def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 8714f4a2713aSLionel Sambuc (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 8715f4a2713aSLionel Sambuc (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm), 8716f4a2713aSLionel Sambuc (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>; 8717f4a2713aSLionel Sambuc def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 8718f4a2713aSLionel Sambuc (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 8719f4a2713aSLionel Sambuc (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm), 8720f4a2713aSLionel Sambuc (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>; 8721f4a2713aSLionel Sambuc 8722f4a2713aSLionel Sambuc def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 8723f4a2713aSLionel Sambuc (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>; 8724f4a2713aSLionel Sambuc def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 8725f4a2713aSLionel Sambuc (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 8726f4a2713aSLionel Sambuc (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm), 8727f4a2713aSLionel Sambuc (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>; 8728f4a2713aSLionel Sambuc def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 8729f4a2713aSLionel Sambuc (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), 8730f4a2713aSLionel Sambuc (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm), 8731f4a2713aSLionel Sambuc (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>; 8732f4a2713aSLionel Sambuc } 8733*0a6a1f1dSLionel Sambuc 8734*0a6a1f1dSLionel Sambuc def : Pat<(v2f64 (X86VBroadcast f64:$src)), 8735*0a6a1f1dSLionel Sambuc (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128))>; 8736f4a2713aSLionel Sambuc} 8737f4a2713aSLionel Sambuc 8738f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8739f4a2713aSLionel Sambuc// VPERM - Permute instructions 8740f4a2713aSLionel Sambuc// 8741f4a2713aSLionel Sambuc 8742f4a2713aSLionel Sambucmulticlass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 8743*0a6a1f1dSLionel Sambuc ValueType OpVT, X86FoldableSchedWrite Sched> { 8744f4a2713aSLionel Sambuc def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 8745f4a2713aSLionel Sambuc (ins VR256:$src1, VR256:$src2), 8746f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 8747f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8748f4a2713aSLionel Sambuc [(set VR256:$dst, 8749f4a2713aSLionel Sambuc (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, 8750*0a6a1f1dSLionel Sambuc Sched<[Sched]>, VEX_4V, VEX_L; 8751f4a2713aSLionel Sambuc def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 8752f4a2713aSLionel Sambuc (ins VR256:$src1, i256mem:$src2), 8753f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 8754f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8755f4a2713aSLionel Sambuc [(set VR256:$dst, 8756f4a2713aSLionel Sambuc (OpVT (X86VPermv VR256:$src1, 8757f4a2713aSLionel Sambuc (bitconvert (mem_frag addr:$src2)))))]>, 8758*0a6a1f1dSLionel Sambuc Sched<[Sched.Folded, ReadAfterLd]>, VEX_4V, VEX_L; 8759f4a2713aSLionel Sambuc} 8760f4a2713aSLionel Sambuc 8761*0a6a1f1dSLionel Sambucdefm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256>; 8762f4a2713aSLionel Sambuclet ExeDomain = SSEPackedSingle in 8763*0a6a1f1dSLionel Sambucdefm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256>; 8764f4a2713aSLionel Sambuc 8765f4a2713aSLionel Sambucmulticlass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 8766*0a6a1f1dSLionel Sambuc ValueType OpVT, X86FoldableSchedWrite Sched> { 8767f4a2713aSLionel Sambuc def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), 8768f4a2713aSLionel Sambuc (ins VR256:$src1, i8imm:$src2), 8769f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 8770f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8771f4a2713aSLionel Sambuc [(set VR256:$dst, 8772f4a2713aSLionel Sambuc (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, 8773*0a6a1f1dSLionel Sambuc Sched<[Sched]>, VEX, VEX_L; 8774f4a2713aSLionel Sambuc def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), 8775f4a2713aSLionel Sambuc (ins i256mem:$src1, i8imm:$src2), 8776f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 8777f4a2713aSLionel Sambuc "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8778f4a2713aSLionel Sambuc [(set VR256:$dst, 8779f4a2713aSLionel Sambuc (OpVT (X86VPermi (mem_frag addr:$src1), 8780*0a6a1f1dSLionel Sambuc (i8 imm:$src2))))]>, 8781*0a6a1f1dSLionel Sambuc Sched<[Sched.Folded, ReadAfterLd]>, VEX, VEX_L; 8782f4a2713aSLionel Sambuc} 8783f4a2713aSLionel Sambuc 8784*0a6a1f1dSLionel Sambucdefm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64, 8785*0a6a1f1dSLionel Sambuc WriteShuffle256>, VEX_W; 8786f4a2713aSLionel Sambuclet ExeDomain = SSEPackedDouble in 8787*0a6a1f1dSLionel Sambucdefm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64, 8788*0a6a1f1dSLionel Sambuc WriteFShuffle256>, VEX_W; 8789f4a2713aSLionel Sambuc 8790f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8791f4a2713aSLionel Sambuc// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks 8792f4a2713aSLionel Sambuc// 8793f4a2713aSLionel Sambucdef VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), 8794f4a2713aSLionel Sambuc (ins VR256:$src1, VR256:$src2, i8imm:$src3), 8795f4a2713aSLionel Sambuc "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8796f4a2713aSLionel Sambuc [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, 8797*0a6a1f1dSLionel Sambuc (i8 imm:$src3))))]>, Sched<[WriteShuffle256]>, 8798*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L; 8799f4a2713aSLionel Sambucdef VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), 8800f4a2713aSLionel Sambuc (ins VR256:$src1, f256mem:$src2, i8imm:$src3), 8801f4a2713aSLionel Sambuc "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8802f4a2713aSLionel Sambuc [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2), 8803*0a6a1f1dSLionel Sambuc (i8 imm:$src3)))]>, 8804*0a6a1f1dSLionel Sambuc Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L; 8805f4a2713aSLionel Sambuc 8806f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 8807f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8808f4a2713aSLionel Sambuc (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8809f4a2713aSLionel Sambucdef : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8810f4a2713aSLionel Sambuc (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8811f4a2713aSLionel Sambucdef : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8812f4a2713aSLionel Sambuc (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8813f4a2713aSLionel Sambuc 8814f4a2713aSLionel Sambucdef : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (loadv4i64 addr:$src2)), 8815f4a2713aSLionel Sambuc (i8 imm:$imm))), 8816f4a2713aSLionel Sambuc (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 8817f4a2713aSLionel Sambucdef : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, 8818f4a2713aSLionel Sambuc (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 8819f4a2713aSLionel Sambuc (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 8820f4a2713aSLionel Sambucdef : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)), 8821f4a2713aSLionel Sambuc (i8 imm:$imm))), 8822f4a2713aSLionel Sambuc (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 8823f4a2713aSLionel Sambuc} 8824f4a2713aSLionel Sambuc 8825f4a2713aSLionel Sambuc 8826f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8827f4a2713aSLionel Sambuc// VINSERTI128 - Insert packed integer values 8828f4a2713aSLionel Sambuc// 8829*0a6a1f1dSLionel Sambuclet hasSideEffects = 0 in { 8830f4a2713aSLionel Sambucdef VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), 8831f4a2713aSLionel Sambuc (ins VR256:$src1, VR128:$src2, i8imm:$src3), 8832f4a2713aSLionel Sambuc "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8833*0a6a1f1dSLionel Sambuc []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L; 8834f4a2713aSLionel Sambuclet mayLoad = 1 in 8835f4a2713aSLionel Sambucdef VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), 8836f4a2713aSLionel Sambuc (ins VR256:$src1, i128mem:$src2, i8imm:$src3), 8837f4a2713aSLionel Sambuc "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8838*0a6a1f1dSLionel Sambuc []>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L; 8839f4a2713aSLionel Sambuc} 8840f4a2713aSLionel Sambuc 8841f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 8842f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), 8843f4a2713aSLionel Sambuc (iPTR imm)), 8844f4a2713aSLionel Sambuc (VINSERTI128rr VR256:$src1, VR128:$src2, 8845f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8846f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), 8847f4a2713aSLionel Sambuc (iPTR imm)), 8848f4a2713aSLionel Sambuc (VINSERTI128rr VR256:$src1, VR128:$src2, 8849f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8850f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), 8851f4a2713aSLionel Sambuc (iPTR imm)), 8852f4a2713aSLionel Sambuc (VINSERTI128rr VR256:$src1, VR128:$src2, 8853f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8854f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), 8855f4a2713aSLionel Sambuc (iPTR imm)), 8856f4a2713aSLionel Sambuc (VINSERTI128rr VR256:$src1, VR128:$src2, 8857f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8858f4a2713aSLionel Sambuc 8859f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), 8860f4a2713aSLionel Sambuc (iPTR imm)), 8861f4a2713aSLionel Sambuc (VINSERTI128rm VR256:$src1, addr:$src2, 8862f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8863f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), 8864f4a2713aSLionel Sambuc (bc_v4i32 (loadv2i64 addr:$src2)), 8865f4a2713aSLionel Sambuc (iPTR imm)), 8866f4a2713aSLionel Sambuc (VINSERTI128rm VR256:$src1, addr:$src2, 8867f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8868f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), 8869f4a2713aSLionel Sambuc (bc_v16i8 (loadv2i64 addr:$src2)), 8870f4a2713aSLionel Sambuc (iPTR imm)), 8871f4a2713aSLionel Sambuc (VINSERTI128rm VR256:$src1, addr:$src2, 8872f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8873f4a2713aSLionel Sambucdef : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), 8874f4a2713aSLionel Sambuc (bc_v8i16 (loadv2i64 addr:$src2)), 8875f4a2713aSLionel Sambuc (iPTR imm)), 8876f4a2713aSLionel Sambuc (VINSERTI128rm VR256:$src1, addr:$src2, 8877f4a2713aSLionel Sambuc (INSERT_get_vinsert128_imm VR256:$ins))>; 8878f4a2713aSLionel Sambuc} 8879f4a2713aSLionel Sambuc 8880f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8881f4a2713aSLionel Sambuc// VEXTRACTI128 - Extract packed integer values 8882f4a2713aSLionel Sambuc// 8883f4a2713aSLionel Sambucdef VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), 8884f4a2713aSLionel Sambuc (ins VR256:$src1, i8imm:$src2), 8885f4a2713aSLionel Sambuc "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8886f4a2713aSLionel Sambuc [(set VR128:$dst, 8887f4a2713aSLionel Sambuc (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>, 8888*0a6a1f1dSLionel Sambuc Sched<[WriteShuffle256]>, VEX, VEX_L; 8889*0a6a1f1dSLionel Sambuclet hasSideEffects = 0, mayStore = 1 in 8890f4a2713aSLionel Sambucdef VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), 8891f4a2713aSLionel Sambuc (ins i128mem:$dst, VR256:$src1, i8imm:$src2), 8892f4a2713aSLionel Sambuc "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8893*0a6a1f1dSLionel Sambuc Sched<[WriteStore]>, VEX, VEX_L; 8894f4a2713aSLionel Sambuc 8895f4a2713aSLionel Sambuclet Predicates = [HasAVX2] in { 8896f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8897f4a2713aSLionel Sambuc (v2i64 (VEXTRACTI128rr 8898f4a2713aSLionel Sambuc (v4i64 VR256:$src1), 8899f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8900f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8901f4a2713aSLionel Sambuc (v4i32 (VEXTRACTI128rr 8902f4a2713aSLionel Sambuc (v8i32 VR256:$src1), 8903f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8904f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8905f4a2713aSLionel Sambuc (v8i16 (VEXTRACTI128rr 8906f4a2713aSLionel Sambuc (v16i16 VR256:$src1), 8907f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8908f4a2713aSLionel Sambucdef : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8909f4a2713aSLionel Sambuc (v16i8 (VEXTRACTI128rr 8910f4a2713aSLionel Sambuc (v32i8 VR256:$src1), 8911f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8912f4a2713aSLionel Sambuc 8913f4a2713aSLionel Sambucdef : Pat<(store (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1), 8914f4a2713aSLionel Sambuc (iPTR imm))), addr:$dst), 8915f4a2713aSLionel Sambuc (VEXTRACTI128mr addr:$dst, VR256:$src1, 8916f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext))>; 8917f4a2713aSLionel Sambucdef : Pat<(store (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1), 8918f4a2713aSLionel Sambuc (iPTR imm))), addr:$dst), 8919f4a2713aSLionel Sambuc (VEXTRACTI128mr addr:$dst, VR256:$src1, 8920f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext))>; 8921f4a2713aSLionel Sambucdef : Pat<(store (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1), 8922f4a2713aSLionel Sambuc (iPTR imm))), addr:$dst), 8923f4a2713aSLionel Sambuc (VEXTRACTI128mr addr:$dst, VR256:$src1, 8924f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext))>; 8925f4a2713aSLionel Sambucdef : Pat<(store (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1), 8926f4a2713aSLionel Sambuc (iPTR imm))), addr:$dst), 8927f4a2713aSLionel Sambuc (VEXTRACTI128mr addr:$dst, VR256:$src1, 8928f4a2713aSLionel Sambuc (EXTRACT_get_vextract128_imm VR128:$ext))>; 8929f4a2713aSLionel Sambuc} 8930f4a2713aSLionel Sambuc 8931f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 8932f4a2713aSLionel Sambuc// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores 8933f4a2713aSLionel Sambuc// 8934f4a2713aSLionel Sambucmulticlass avx2_pmovmask<string OpcodeStr, 8935f4a2713aSLionel Sambuc Intrinsic IntLd128, Intrinsic IntLd256, 8936f4a2713aSLionel Sambuc Intrinsic IntSt128, Intrinsic IntSt256> { 8937f4a2713aSLionel Sambuc def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), 8938f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src2), 8939f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8940f4a2713aSLionel Sambuc [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V; 8941f4a2713aSLionel Sambuc def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), 8942f4a2713aSLionel Sambuc (ins VR256:$src1, i256mem:$src2), 8943f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8944f4a2713aSLionel Sambuc [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 8945f4a2713aSLionel Sambuc VEX_4V, VEX_L; 8946f4a2713aSLionel Sambuc def mr : AVX28I<0x8e, MRMDestMem, (outs), 8947f4a2713aSLionel Sambuc (ins i128mem:$dst, VR128:$src1, VR128:$src2), 8948f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8949f4a2713aSLionel Sambuc [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; 8950f4a2713aSLionel Sambuc def Ymr : AVX28I<0x8e, MRMDestMem, (outs), 8951f4a2713aSLionel Sambuc (ins i256mem:$dst, VR256:$src1, VR256:$src2), 8952f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8953f4a2713aSLionel Sambuc [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L; 8954f4a2713aSLionel Sambuc} 8955f4a2713aSLionel Sambuc 8956f4a2713aSLionel Sambucdefm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", 8957f4a2713aSLionel Sambuc int_x86_avx2_maskload_d, 8958f4a2713aSLionel Sambuc int_x86_avx2_maskload_d_256, 8959f4a2713aSLionel Sambuc int_x86_avx2_maskstore_d, 8960f4a2713aSLionel Sambuc int_x86_avx2_maskstore_d_256>; 8961f4a2713aSLionel Sambucdefm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", 8962f4a2713aSLionel Sambuc int_x86_avx2_maskload_q, 8963f4a2713aSLionel Sambuc int_x86_avx2_maskload_q_256, 8964f4a2713aSLionel Sambuc int_x86_avx2_maskstore_q, 8965f4a2713aSLionel Sambuc int_x86_avx2_maskstore_q_256>, VEX_W; 8966f4a2713aSLionel Sambuc 8967*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)), 8968*0a6a1f1dSLionel Sambuc (VMASKMOVPSYmr addr:$ptr, VR256:$mask, VR256:$src)>; 8969*0a6a1f1dSLionel Sambuc 8970*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)), 8971*0a6a1f1dSLionel Sambuc (VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>; 8972*0a6a1f1dSLionel Sambuc 8973*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)), 8974*0a6a1f1dSLionel Sambuc (VMASKMOVPSmr addr:$ptr, VR128:$mask, VR128:$src)>; 8975*0a6a1f1dSLionel Sambuc 8976*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)), 8977*0a6a1f1dSLionel Sambuc (VPMASKMOVDmr addr:$ptr, VR128:$mask, VR128:$src)>; 8978*0a6a1f1dSLionel Sambuc 8979*0a6a1f1dSLionel Sambucdef: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)), 8980*0a6a1f1dSLionel Sambuc (VMASKMOVPSYrm VR256:$mask, addr:$ptr)>; 8981*0a6a1f1dSLionel Sambuc 8982*0a6a1f1dSLionel Sambucdef: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), 8983*0a6a1f1dSLionel Sambuc (bc_v8f32 (v8i32 immAllZerosV)))), 8984*0a6a1f1dSLionel Sambuc (VMASKMOVPSYrm VR256:$mask, addr:$ptr)>; 8985*0a6a1f1dSLionel Sambuc 8986*0a6a1f1dSLionel Sambucdef: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src0))), 8987*0a6a1f1dSLionel Sambuc (VBLENDVPSYrr VR256:$src0, (VMASKMOVPSYrm VR256:$mask, addr:$ptr), 8988*0a6a1f1dSLionel Sambuc VR256:$mask)>; 8989*0a6a1f1dSLionel Sambuc 8990*0a6a1f1dSLionel Sambucdef: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)), 8991*0a6a1f1dSLionel Sambuc (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>; 8992*0a6a1f1dSLionel Sambuc 8993*0a6a1f1dSLionel Sambucdef: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 immAllZerosV))), 8994*0a6a1f1dSLionel Sambuc (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>; 8995*0a6a1f1dSLionel Sambuc 8996*0a6a1f1dSLionel Sambucdef: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src0))), 8997*0a6a1f1dSLionel Sambuc (VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr), 8998*0a6a1f1dSLionel Sambuc VR256:$mask)>; 8999*0a6a1f1dSLionel Sambuc 9000*0a6a1f1dSLionel Sambucdef: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)), 9001*0a6a1f1dSLionel Sambuc (VMASKMOVPSrm VR128:$mask, addr:$ptr)>; 9002*0a6a1f1dSLionel Sambuc 9003*0a6a1f1dSLionel Sambucdef: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), 9004*0a6a1f1dSLionel Sambuc (bc_v4f32 (v4i32 immAllZerosV)))), 9005*0a6a1f1dSLionel Sambuc (VMASKMOVPSrm VR128:$mask, addr:$ptr)>; 9006*0a6a1f1dSLionel Sambuc 9007*0a6a1f1dSLionel Sambucdef: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src0))), 9008*0a6a1f1dSLionel Sambuc (VBLENDVPSrr VR128:$src0, (VMASKMOVPSrm VR128:$mask, addr:$ptr), 9009*0a6a1f1dSLionel Sambuc VR128:$mask)>; 9010*0a6a1f1dSLionel Sambuc 9011*0a6a1f1dSLionel Sambucdef: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)), 9012*0a6a1f1dSLionel Sambuc (VPMASKMOVDrm VR128:$mask, addr:$ptr)>; 9013*0a6a1f1dSLionel Sambuc 9014*0a6a1f1dSLionel Sambucdef: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 immAllZerosV))), 9015*0a6a1f1dSLionel Sambuc (VPMASKMOVDrm VR128:$mask, addr:$ptr)>; 9016*0a6a1f1dSLionel Sambuc 9017*0a6a1f1dSLionel Sambucdef: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src0))), 9018*0a6a1f1dSLionel Sambuc (VBLENDVPSrr VR128:$src0, (VPMASKMOVDrm VR128:$mask, addr:$ptr), 9019*0a6a1f1dSLionel Sambuc VR128:$mask)>; 9020*0a6a1f1dSLionel Sambuc 9021*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)), 9022*0a6a1f1dSLionel Sambuc (VMASKMOVPDYmr addr:$ptr, VR256:$mask, VR256:$src)>; 9023*0a6a1f1dSLionel Sambuc 9024*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)), 9025*0a6a1f1dSLionel Sambuc (VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>; 9026*0a6a1f1dSLionel Sambuc 9027*0a6a1f1dSLionel Sambucdef: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)), 9028*0a6a1f1dSLionel Sambuc (VMASKMOVPDYrm VR256:$mask, addr:$ptr)>; 9029*0a6a1f1dSLionel Sambuc 9030*0a6a1f1dSLionel Sambucdef: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), 9031*0a6a1f1dSLionel Sambuc (v4f64 immAllZerosV))), 9032*0a6a1f1dSLionel Sambuc (VMASKMOVPDYrm VR256:$mask, addr:$ptr)>; 9033*0a6a1f1dSLionel Sambuc 9034*0a6a1f1dSLionel Sambucdef: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src0))), 9035*0a6a1f1dSLionel Sambuc (VBLENDVPDYrr VR256:$src0, (VMASKMOVPDYrm VR256:$mask, addr:$ptr), 9036*0a6a1f1dSLionel Sambuc VR256:$mask)>; 9037*0a6a1f1dSLionel Sambuc 9038*0a6a1f1dSLionel Sambucdef: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)), 9039*0a6a1f1dSLionel Sambuc (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>; 9040*0a6a1f1dSLionel Sambuc 9041*0a6a1f1dSLionel Sambucdef: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), 9042*0a6a1f1dSLionel Sambuc (bc_v4i64 (v8i32 immAllZerosV)))), 9043*0a6a1f1dSLionel Sambuc (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>; 9044*0a6a1f1dSLionel Sambuc 9045*0a6a1f1dSLionel Sambucdef: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src0))), 9046*0a6a1f1dSLionel Sambuc (VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr), 9047*0a6a1f1dSLionel Sambuc VR256:$mask)>; 9048*0a6a1f1dSLionel Sambuc 9049*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)), 9050*0a6a1f1dSLionel Sambuc (VMASKMOVPDmr addr:$ptr, VR128:$mask, VR128:$src)>; 9051*0a6a1f1dSLionel Sambuc 9052*0a6a1f1dSLionel Sambucdef: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)), 9053*0a6a1f1dSLionel Sambuc (VPMASKMOVQmr addr:$ptr, VR128:$mask, VR128:$src)>; 9054*0a6a1f1dSLionel Sambuc 9055*0a6a1f1dSLionel Sambucdef: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)), 9056*0a6a1f1dSLionel Sambuc (VMASKMOVPDrm VR128:$mask, addr:$ptr)>; 9057*0a6a1f1dSLionel Sambuc 9058*0a6a1f1dSLionel Sambucdef: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), 9059*0a6a1f1dSLionel Sambuc (v2f64 immAllZerosV))), 9060*0a6a1f1dSLionel Sambuc (VMASKMOVPDrm VR128:$mask, addr:$ptr)>; 9061*0a6a1f1dSLionel Sambuc 9062*0a6a1f1dSLionel Sambucdef: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src0))), 9063*0a6a1f1dSLionel Sambuc (VBLENDVPDrr VR128:$src0, (VMASKMOVPDrm VR128:$mask, addr:$ptr), 9064*0a6a1f1dSLionel Sambuc VR128:$mask)>; 9065*0a6a1f1dSLionel Sambuc 9066*0a6a1f1dSLionel Sambucdef: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)), 9067*0a6a1f1dSLionel Sambuc (VPMASKMOVQrm VR128:$mask, addr:$ptr)>; 9068*0a6a1f1dSLionel Sambuc 9069*0a6a1f1dSLionel Sambucdef: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), 9070*0a6a1f1dSLionel Sambuc (bc_v2i64 (v4i32 immAllZerosV)))), 9071*0a6a1f1dSLionel Sambuc (VPMASKMOVQrm VR128:$mask, addr:$ptr)>; 9072*0a6a1f1dSLionel Sambuc 9073*0a6a1f1dSLionel Sambucdef: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src0))), 9074*0a6a1f1dSLionel Sambuc (VBLENDVPDrr VR128:$src0, (VPMASKMOVQrm VR128:$mask, addr:$ptr), 9075*0a6a1f1dSLionel Sambuc VR128:$mask)>; 9076f4a2713aSLionel Sambuc 9077f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 9078f4a2713aSLionel Sambuc// Variable Bit Shifts 9079f4a2713aSLionel Sambuc// 9080f4a2713aSLionel Sambucmulticlass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 9081f4a2713aSLionel Sambuc ValueType vt128, ValueType vt256> { 9082f4a2713aSLionel Sambuc def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), 9083f4a2713aSLionel Sambuc (ins VR128:$src1, VR128:$src2), 9084f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 9085f4a2713aSLionel Sambuc [(set VR128:$dst, 9086f4a2713aSLionel Sambuc (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, 9087*0a6a1f1dSLionel Sambuc VEX_4V, Sched<[WriteVarVecShift]>; 9088f4a2713aSLionel Sambuc def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), 9089f4a2713aSLionel Sambuc (ins VR128:$src1, i128mem:$src2), 9090f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 9091f4a2713aSLionel Sambuc [(set VR128:$dst, 9092f4a2713aSLionel Sambuc (vt128 (OpNode VR128:$src1, 9093f4a2713aSLionel Sambuc (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>, 9094*0a6a1f1dSLionel Sambuc VEX_4V, Sched<[WriteVarVecShiftLd, ReadAfterLd]>; 9095f4a2713aSLionel Sambuc def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 9096f4a2713aSLionel Sambuc (ins VR256:$src1, VR256:$src2), 9097f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 9098f4a2713aSLionel Sambuc [(set VR256:$dst, 9099f4a2713aSLionel Sambuc (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, 9100*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L, Sched<[WriteVarVecShift]>; 9101f4a2713aSLionel Sambuc def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 9102f4a2713aSLionel Sambuc (ins VR256:$src1, i256mem:$src2), 9103f4a2713aSLionel Sambuc !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 9104f4a2713aSLionel Sambuc [(set VR256:$dst, 9105f4a2713aSLionel Sambuc (vt256 (OpNode VR256:$src1, 9106f4a2713aSLionel Sambuc (vt256 (bitconvert (loadv4i64 addr:$src2))))))]>, 9107*0a6a1f1dSLionel Sambuc VEX_4V, VEX_L, Sched<[WriteVarVecShiftLd, ReadAfterLd]>; 9108f4a2713aSLionel Sambuc} 9109f4a2713aSLionel Sambuc 9110f4a2713aSLionel Sambucdefm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>; 9111f4a2713aSLionel Sambucdefm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; 9112f4a2713aSLionel Sambucdefm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; 9113f4a2713aSLionel Sambucdefm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; 9114f4a2713aSLionel Sambucdefm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; 9115f4a2713aSLionel Sambuc 9116f4a2713aSLionel Sambuc//===----------------------------------------------------------------------===// 9117f4a2713aSLionel Sambuc// VGATHER - GATHER Operations 9118f4a2713aSLionel Sambucmulticlass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256, 9119f4a2713aSLionel Sambuc X86MemOperand memop128, X86MemOperand memop256> { 9120f4a2713aSLionel Sambuc def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst, VR128:$mask_wb), 9121f4a2713aSLionel Sambuc (ins VR128:$src1, memop128:$src2, VR128:$mask), 9122f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 9123f4a2713aSLionel Sambuc "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 9124f4a2713aSLionel Sambuc []>, VEX_4VOp3; 9125f4a2713aSLionel Sambuc def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst, RC256:$mask_wb), 9126f4a2713aSLionel Sambuc (ins RC256:$src1, memop256:$src2, RC256:$mask), 9127f4a2713aSLionel Sambuc !strconcat(OpcodeStr, 9128f4a2713aSLionel Sambuc "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 9129f4a2713aSLionel Sambuc []>, VEX_4VOp3, VEX_L; 9130f4a2713aSLionel Sambuc} 9131f4a2713aSLionel Sambuc 9132f4a2713aSLionel Sambuclet mayLoad = 1, Constraints 9133f4a2713aSLionel Sambuc = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb" 9134f4a2713aSLionel Sambuc in { 9135f4a2713aSLionel Sambuc defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", VR256, vx64mem, vx64mem>, VEX_W; 9136f4a2713aSLionel Sambuc defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", VR256, vx64mem, vy64mem>, VEX_W; 9137f4a2713aSLionel Sambuc defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", VR256, vx32mem, vy32mem>; 9138f4a2713aSLionel Sambuc defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", VR128, vx32mem, vy32mem>; 9139*0a6a1f1dSLionel Sambuc 9140*0a6a1f1dSLionel Sambuc let ExeDomain = SSEPackedDouble in { 9141*0a6a1f1dSLionel Sambuc defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W; 9142*0a6a1f1dSLionel Sambuc defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W; 9143*0a6a1f1dSLionel Sambuc } 9144*0a6a1f1dSLionel Sambuc 9145*0a6a1f1dSLionel Sambuc let ExeDomain = SSEPackedSingle in { 9146*0a6a1f1dSLionel Sambuc defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>; 9147*0a6a1f1dSLionel Sambuc defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>; 9148*0a6a1f1dSLionel Sambuc } 9149f4a2713aSLionel Sambuc} 9150