1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE,SSE-32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE,SSE-64 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE41,SSE41-32 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE41,SSE41-64 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=AVX,AVX1,AVX-32,AVX1-32 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=AVX,AVX1,AVX-64,AVX1-64 8; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX,AVX512F,AVX-32 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX,AVX512F,AVX-64,AVX512F-64 10; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512vl -O3 | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-32,AVX512VL-32 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl -O3 | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-64,AVX512VL-64 12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 | FileCheck %s --check-prefixes=AVX,AVX512DQ,AVX512DQ-32 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 | FileCheck %s --check-prefixes=AVX,AVX512DQ,AVX512DQ-64 14; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=AVX,AVX512DQVL,AVX512DQVL-32 15; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=AVX,AVX512DQVL,AVX512DQVL-64 16 17declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32>, metadata, metadata) 18declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32>, metadata, metadata) 19declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64>, metadata, metadata) 20declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64>, metadata, metadata) 21declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i1(<4 x i1>, metadata, metadata) 22declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i1(<4 x i1>, metadata, metadata) 23declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i8(<4 x i8>, metadata, metadata) 24declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i8(<4 x i8>, metadata, metadata) 25declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i16(<4 x i16>, metadata, metadata) 26declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i16(<4 x i16>, metadata, metadata) 27declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32>, metadata, metadata) 28declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32>, metadata, metadata) 29declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i1(<2 x i1>, metadata, metadata) 30declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i1(<2 x i1>, metadata, metadata) 31declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i8(<2 x i8>, metadata, metadata) 32declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i8(<2 x i8>, metadata, metadata) 33declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i16(<2 x i16>, metadata, metadata) 34declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i16(<2 x i16>, metadata, metadata) 35declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32>, metadata, metadata) 36declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32>, metadata, metadata) 37declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64>, metadata, metadata) 38declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64>, metadata, metadata) 39 40define <2 x float> @sitofp_v2i32_v2f32(<2 x i32> %x) #0 { 41; SSE-LABEL: sitofp_v2i32_v2f32: 42; SSE: # %bb.0: 43; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 44; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 45; SSE-NEXT: ret{{[l|q]}} 46; 47; SSE41-LABEL: sitofp_v2i32_v2f32: 48; SSE41: # %bb.0: 49; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 50; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 51; SSE41-NEXT: ret{{[l|q]}} 52; 53; AVX-LABEL: sitofp_v2i32_v2f32: 54; AVX: # %bb.0: 55; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 56; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 57; AVX-NEXT: ret{{[l|q]}} 58 %result = call <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32> %x, 59 metadata !"round.dynamic", 60 metadata !"fpexcept.strict") #0 61 ret <2 x float> %result 62} 63 64define <2 x float> @uitofp_v2i32_v2f32(<2 x i32> %x) #0 { 65; SSE-LABEL: uitofp_v2i32_v2f32: 66; SSE: # %bb.0: 67; SSE-NEXT: xorpd %xmm1, %xmm1 68; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 69; SSE-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 70; SSE-NEXT: orpd %xmm1, %xmm0 71; SSE-NEXT: subpd %xmm1, %xmm0 72; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 73; SSE-NEXT: ret{{[l|q]}} 74; 75; SSE41-LABEL: uitofp_v2i32_v2f32: 76; SSE41: # %bb.0: 77; SSE41-NEXT: xorpd %xmm1, %xmm1 78; SSE41-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 79; SSE41-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 80; SSE41-NEXT: orpd %xmm1, %xmm0 81; SSE41-NEXT: subpd %xmm1, %xmm0 82; SSE41-NEXT: cvtpd2ps %xmm0, %xmm0 83; SSE41-NEXT: ret{{[l|q]}} 84; 85; AVX1-LABEL: uitofp_v2i32_v2f32: 86; AVX1: # %bb.0: 87; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 88; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 89; AVX1-NEXT: # xmm1 = mem[0,0] 90; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 91; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 92; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 93; AVX1-NEXT: ret{{[l|q]}} 94; 95; AVX512F-LABEL: uitofp_v2i32_v2f32: 96; AVX512F: # %bb.0: 97; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 98; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 99; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 100; AVX512F-NEXT: vzeroupper 101; AVX512F-NEXT: ret{{[l|q]}} 102; 103; AVX512VL-LABEL: uitofp_v2i32_v2f32: 104; AVX512VL: # %bb.0: 105; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 106; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0 107; AVX512VL-NEXT: ret{{[l|q]}} 108; 109; AVX512DQ-LABEL: uitofp_v2i32_v2f32: 110; AVX512DQ: # %bb.0: 111; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 112; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 113; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 114; AVX512DQ-NEXT: vzeroupper 115; AVX512DQ-NEXT: ret{{[l|q]}} 116; 117; AVX512DQVL-LABEL: uitofp_v2i32_v2f32: 118; AVX512DQVL: # %bb.0: 119; AVX512DQVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 120; AVX512DQVL-NEXT: vcvtudq2ps %xmm0, %xmm0 121; AVX512DQVL-NEXT: ret{{[l|q]}} 122 %result = call <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32> %x, 123 metadata !"round.dynamic", 124 metadata !"fpexcept.strict") #0 125 ret <2 x float> %result 126} 127 128define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 { 129; SSE-32-LABEL: sitofp_v2i64_v2f32: 130; SSE-32: # %bb.0: 131; SSE-32-NEXT: pushl %ebp 132; SSE-32-NEXT: .cfi_def_cfa_offset 8 133; SSE-32-NEXT: .cfi_offset %ebp, -8 134; SSE-32-NEXT: movl %esp, %ebp 135; SSE-32-NEXT: .cfi_def_cfa_register %ebp 136; SSE-32-NEXT: andl $-8, %esp 137; SSE-32-NEXT: subl $24, %esp 138; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 139; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 140; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 141; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) 142; SSE-32-NEXT: fstps (%esp) 143; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) 144; SSE-32-NEXT: fstps {{[0-9]+}}(%esp) 145; SSE-32-NEXT: wait 146; SSE-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 147; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 148; SSE-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 149; SSE-32-NEXT: movl %ebp, %esp 150; SSE-32-NEXT: popl %ebp 151; SSE-32-NEXT: .cfi_def_cfa %esp, 4 152; SSE-32-NEXT: retl 153; 154; SSE-64-LABEL: sitofp_v2i64_v2f32: 155; SSE-64: # %bb.0: 156; SSE-64-NEXT: movq %xmm0, %rax 157; SSE-64-NEXT: cvtsi2ss %rax, %xmm1 158; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 159; SSE-64-NEXT: movq %xmm0, %rax 160; SSE-64-NEXT: xorps %xmm0, %xmm0 161; SSE-64-NEXT: cvtsi2ss %rax, %xmm0 162; SSE-64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 163; SSE-64-NEXT: movaps %xmm1, %xmm0 164; SSE-64-NEXT: retq 165; 166; SSE41-32-LABEL: sitofp_v2i64_v2f32: 167; SSE41-32: # %bb.0: 168; SSE41-32-NEXT: pushl %ebp 169; SSE41-32-NEXT: .cfi_def_cfa_offset 8 170; SSE41-32-NEXT: .cfi_offset %ebp, -8 171; SSE41-32-NEXT: movl %esp, %ebp 172; SSE41-32-NEXT: .cfi_def_cfa_register %ebp 173; SSE41-32-NEXT: andl $-8, %esp 174; SSE41-32-NEXT: subl $24, %esp 175; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 176; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 177; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 178; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) 179; SSE41-32-NEXT: fstps (%esp) 180; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) 181; SSE41-32-NEXT: fstps {{[0-9]+}}(%esp) 182; SSE41-32-NEXT: wait 183; SSE41-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 184; SSE41-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 185; SSE41-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 186; SSE41-32-NEXT: movl %ebp, %esp 187; SSE41-32-NEXT: popl %ebp 188; SSE41-32-NEXT: .cfi_def_cfa %esp, 4 189; SSE41-32-NEXT: retl 190; 191; SSE41-64-LABEL: sitofp_v2i64_v2f32: 192; SSE41-64: # %bb.0: 193; SSE41-64-NEXT: movq %xmm0, %rax 194; SSE41-64-NEXT: cvtsi2ss %rax, %xmm1 195; SSE41-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 196; SSE41-64-NEXT: movq %xmm0, %rax 197; SSE41-64-NEXT: xorps %xmm0, %xmm0 198; SSE41-64-NEXT: cvtsi2ss %rax, %xmm0 199; SSE41-64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 200; SSE41-64-NEXT: movaps %xmm1, %xmm0 201; SSE41-64-NEXT: retq 202; 203; AVX-32-LABEL: sitofp_v2i64_v2f32: 204; AVX-32: # %bb.0: 205; AVX-32-NEXT: pushl %ebp 206; AVX-32-NEXT: .cfi_def_cfa_offset 8 207; AVX-32-NEXT: .cfi_offset %ebp, -8 208; AVX-32-NEXT: movl %esp, %ebp 209; AVX-32-NEXT: .cfi_def_cfa_register %ebp 210; AVX-32-NEXT: andl $-8, %esp 211; AVX-32-NEXT: subl $24, %esp 212; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 213; AVX-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3] 214; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 215; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 216; AVX-32-NEXT: fstps (%esp) 217; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 218; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) 219; AVX-32-NEXT: wait 220; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 221; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 222; AVX-32-NEXT: movl %ebp, %esp 223; AVX-32-NEXT: popl %ebp 224; AVX-32-NEXT: .cfi_def_cfa %esp, 4 225; AVX-32-NEXT: retl 226; 227; AVX-64-LABEL: sitofp_v2i64_v2f32: 228; AVX-64: # %bb.0: 229; AVX-64-NEXT: vpextrq $1, %xmm0, %rax 230; AVX-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 231; AVX-64-NEXT: vmovq %xmm0, %rax 232; AVX-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 233; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 234; AVX-64-NEXT: retq 235; 236; AVX512DQ-32-LABEL: sitofp_v2i64_v2f32: 237; AVX512DQ-32: # %bb.0: 238; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 239; AVX512DQ-32-NEXT: vcvtqq2ps %zmm0, %ymm1 240; AVX512DQ-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3] 241; AVX512DQ-32-NEXT: vcvtqq2ps %zmm0, %ymm0 242; AVX512DQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero 243; AVX512DQ-32-NEXT: vzeroupper 244; AVX512DQ-32-NEXT: retl 245; 246; AVX512DQ-64-LABEL: sitofp_v2i64_v2f32: 247; AVX512DQ-64: # %bb.0: 248; AVX512DQ-64-NEXT: vpextrq $1, %xmm0, %rax 249; AVX512DQ-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 250; AVX512DQ-64-NEXT: vmovq %xmm0, %rax 251; AVX512DQ-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 252; AVX512DQ-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 253; AVX512DQ-64-NEXT: retq 254; 255; AVX512DQVL-LABEL: sitofp_v2i64_v2f32: 256; AVX512DQVL: # %bb.0: 257; AVX512DQVL-NEXT: vcvtqq2ps %xmm0, %xmm0 258; AVX512DQVL-NEXT: ret{{[l|q]}} 259 %result = call <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64> %x, 260 metadata !"round.dynamic", 261 metadata !"fpexcept.strict") #0 262 ret <2 x float> %result 263} 264 265define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 { 266; SSE-32-LABEL: uitofp_v2i64_v2f32: 267; SSE-32: # %bb.0: 268; SSE-32-NEXT: pushl %ebp 269; SSE-32-NEXT: .cfi_def_cfa_offset 8 270; SSE-32-NEXT: .cfi_offset %ebp, -8 271; SSE-32-NEXT: movl %esp, %ebp 272; SSE-32-NEXT: .cfi_def_cfa_register %ebp 273; SSE-32-NEXT: andl $-8, %esp 274; SSE-32-NEXT: subl $24, %esp 275; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 276; SSE-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp) 277; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 278; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] 279; SSE-32-NEXT: movd %xmm1, %eax 280; SSE-32-NEXT: shrl $31, %eax 281; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) 282; SSE-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 283; SSE-32-NEXT: fstps (%esp) 284; SSE-32-NEXT: wait 285; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 286; SSE-32-NEXT: movd %xmm0, %eax 287; SSE-32-NEXT: shrl $31, %eax 288; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) 289; SSE-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 290; SSE-32-NEXT: fstps {{[0-9]+}}(%esp) 291; SSE-32-NEXT: wait 292; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 293; SSE-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 294; SSE-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 295; SSE-32-NEXT: movl %ebp, %esp 296; SSE-32-NEXT: popl %ebp 297; SSE-32-NEXT: .cfi_def_cfa %esp, 4 298; SSE-32-NEXT: retl 299; 300; SSE-64-LABEL: uitofp_v2i64_v2f32: 301; SSE-64: # %bb.0: 302; SSE-64-NEXT: movdqa %xmm0, %xmm1 303; SSE-64-NEXT: movq %xmm0, %rax 304; SSE-64-NEXT: movq %rax, %rcx 305; SSE-64-NEXT: shrq %rcx 306; SSE-64-NEXT: movl %eax, %edx 307; SSE-64-NEXT: andl $1, %edx 308; SSE-64-NEXT: orq %rcx, %rdx 309; SSE-64-NEXT: testq %rax, %rax 310; SSE-64-NEXT: cmovnsq %rax, %rdx 311; SSE-64-NEXT: xorps %xmm0, %xmm0 312; SSE-64-NEXT: cvtsi2ss %rdx, %xmm0 313; SSE-64-NEXT: jns .LBB3_2 314; SSE-64-NEXT: # %bb.1: 315; SSE-64-NEXT: addss %xmm0, %xmm0 316; SSE-64-NEXT: .LBB3_2: 317; SSE-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 318; SSE-64-NEXT: movq %xmm1, %rax 319; SSE-64-NEXT: movq %rax, %rcx 320; SSE-64-NEXT: shrq %rcx 321; SSE-64-NEXT: movl %eax, %edx 322; SSE-64-NEXT: andl $1, %edx 323; SSE-64-NEXT: orq %rcx, %rdx 324; SSE-64-NEXT: testq %rax, %rax 325; SSE-64-NEXT: cmovnsq %rax, %rdx 326; SSE-64-NEXT: xorps %xmm1, %xmm1 327; SSE-64-NEXT: cvtsi2ss %rdx, %xmm1 328; SSE-64-NEXT: jns .LBB3_4 329; SSE-64-NEXT: # %bb.3: 330; SSE-64-NEXT: addss %xmm1, %xmm1 331; SSE-64-NEXT: .LBB3_4: 332; SSE-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 333; SSE-64-NEXT: retq 334; 335; SSE41-32-LABEL: uitofp_v2i64_v2f32: 336; SSE41-32: # %bb.0: 337; SSE41-32-NEXT: pushl %ebp 338; SSE41-32-NEXT: .cfi_def_cfa_offset 8 339; SSE41-32-NEXT: .cfi_offset %ebp, -8 340; SSE41-32-NEXT: movl %esp, %ebp 341; SSE41-32-NEXT: .cfi_def_cfa_register %ebp 342; SSE41-32-NEXT: andl $-8, %esp 343; SSE41-32-NEXT: subl $24, %esp 344; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 345; SSE41-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp) 346; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 347; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3] 348; SSE41-32-NEXT: movd %xmm1, %eax 349; SSE41-32-NEXT: shrl $31, %eax 350; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) 351; SSE41-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 352; SSE41-32-NEXT: fstps (%esp) 353; SSE41-32-NEXT: wait 354; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 355; SSE41-32-NEXT: movd %xmm0, %eax 356; SSE41-32-NEXT: shrl $31, %eax 357; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) 358; SSE41-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 359; SSE41-32-NEXT: fstps {{[0-9]+}}(%esp) 360; SSE41-32-NEXT: wait 361; SSE41-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 362; SSE41-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 363; SSE41-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 364; SSE41-32-NEXT: movl %ebp, %esp 365; SSE41-32-NEXT: popl %ebp 366; SSE41-32-NEXT: .cfi_def_cfa %esp, 4 367; SSE41-32-NEXT: retl 368; 369; SSE41-64-LABEL: uitofp_v2i64_v2f32: 370; SSE41-64: # %bb.0: 371; SSE41-64-NEXT: movdqa %xmm0, %xmm1 372; SSE41-64-NEXT: movq %xmm0, %rax 373; SSE41-64-NEXT: movq %rax, %rcx 374; SSE41-64-NEXT: shrq %rcx 375; SSE41-64-NEXT: movl %eax, %edx 376; SSE41-64-NEXT: andl $1, %edx 377; SSE41-64-NEXT: orq %rcx, %rdx 378; SSE41-64-NEXT: testq %rax, %rax 379; SSE41-64-NEXT: cmovnsq %rax, %rdx 380; SSE41-64-NEXT: xorps %xmm0, %xmm0 381; SSE41-64-NEXT: cvtsi2ss %rdx, %xmm0 382; SSE41-64-NEXT: jns .LBB3_2 383; SSE41-64-NEXT: # %bb.1: 384; SSE41-64-NEXT: addss %xmm0, %xmm0 385; SSE41-64-NEXT: .LBB3_2: 386; SSE41-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 387; SSE41-64-NEXT: movq %xmm1, %rax 388; SSE41-64-NEXT: movq %rax, %rcx 389; SSE41-64-NEXT: shrq %rcx 390; SSE41-64-NEXT: movl %eax, %edx 391; SSE41-64-NEXT: andl $1, %edx 392; SSE41-64-NEXT: orq %rcx, %rdx 393; SSE41-64-NEXT: testq %rax, %rax 394; SSE41-64-NEXT: cmovnsq %rax, %rdx 395; SSE41-64-NEXT: xorps %xmm1, %xmm1 396; SSE41-64-NEXT: cvtsi2ss %rdx, %xmm1 397; SSE41-64-NEXT: jns .LBB3_4 398; SSE41-64-NEXT: # %bb.3: 399; SSE41-64-NEXT: addss %xmm1, %xmm1 400; SSE41-64-NEXT: .LBB3_4: 401; SSE41-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 402; SSE41-64-NEXT: retq 403; 404; AVX-32-LABEL: uitofp_v2i64_v2f32: 405; AVX-32: # %bb.0: 406; AVX-32-NEXT: pushl %ebp 407; AVX-32-NEXT: .cfi_def_cfa_offset 8 408; AVX-32-NEXT: .cfi_offset %ebp, -8 409; AVX-32-NEXT: movl %esp, %ebp 410; AVX-32-NEXT: .cfi_def_cfa_register %ebp 411; AVX-32-NEXT: andl $-8, %esp 412; AVX-32-NEXT: subl $24, %esp 413; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 414; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 415; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 416; AVX-32-NEXT: vextractps $1, %xmm0, %eax 417; AVX-32-NEXT: shrl $31, %eax 418; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 419; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 420; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) 421; AVX-32-NEXT: wait 422; AVX-32-NEXT: vextractps $3, %xmm0, %eax 423; AVX-32-NEXT: shrl $31, %eax 424; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 425; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 426; AVX-32-NEXT: fstps (%esp) 427; AVX-32-NEXT: wait 428; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 429; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 430; AVX-32-NEXT: movl %ebp, %esp 431; AVX-32-NEXT: popl %ebp 432; AVX-32-NEXT: .cfi_def_cfa %esp, 4 433; AVX-32-NEXT: retl 434; 435; AVX1-64-LABEL: uitofp_v2i64_v2f32: 436; AVX1-64: # %bb.0: 437; AVX1-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 438; AVX1-64-NEXT: vpsrlq $1, %xmm0, %xmm2 439; AVX1-64-NEXT: vpor %xmm1, %xmm2, %xmm1 440; AVX1-64-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 441; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax 442; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 443; AVX1-64-NEXT: vmovq %xmm1, %rax 444; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 445; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero 446; AVX1-64-NEXT: vaddps %xmm1, %xmm1, %xmm2 447; AVX1-64-NEXT: vpxor %xmm3, %xmm3, %xmm3 448; AVX1-64-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 449; AVX1-64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 450; AVX1-64-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 451; AVX1-64-NEXT: retq 452; 453; AVX512F-64-LABEL: uitofp_v2i64_v2f32: 454; AVX512F-64: # %bb.0: 455; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax 456; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 457; AVX512F-64-NEXT: vmovq %xmm0, %rax 458; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 459; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 460; AVX512F-64-NEXT: retq 461; 462; AVX512VL-64-LABEL: uitofp_v2i64_v2f32: 463; AVX512VL-64: # %bb.0: 464; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax 465; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 466; AVX512VL-64-NEXT: vmovq %xmm0, %rax 467; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 468; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 469; AVX512VL-64-NEXT: retq 470; 471; AVX512DQ-32-LABEL: uitofp_v2i64_v2f32: 472; AVX512DQ-32: # %bb.0: 473; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 474; AVX512DQ-32-NEXT: vcvtuqq2ps %zmm0, %ymm1 475; AVX512DQ-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3] 476; AVX512DQ-32-NEXT: vcvtuqq2ps %zmm0, %ymm0 477; AVX512DQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero 478; AVX512DQ-32-NEXT: vzeroupper 479; AVX512DQ-32-NEXT: retl 480; 481; AVX512DQ-64-LABEL: uitofp_v2i64_v2f32: 482; AVX512DQ-64: # %bb.0: 483; AVX512DQ-64-NEXT: vpextrq $1, %xmm0, %rax 484; AVX512DQ-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 485; AVX512DQ-64-NEXT: vmovq %xmm0, %rax 486; AVX512DQ-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 487; AVX512DQ-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 488; AVX512DQ-64-NEXT: retq 489; 490; AVX512DQVL-LABEL: uitofp_v2i64_v2f32: 491; AVX512DQVL: # %bb.0: 492; AVX512DQVL-NEXT: vcvtuqq2ps %xmm0, %xmm0 493; AVX512DQVL-NEXT: ret{{[l|q]}} 494 %result = call <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64> %x, 495 metadata !"round.dynamic", 496 metadata !"fpexcept.strict") #0 497 ret <2 x float> %result 498} 499 500define <4 x float> @sitofp_v4i1_v4f32(<4 x i1> %x) #0 { 501; SSE-LABEL: sitofp_v4i1_v4f32: 502; SSE: # %bb.0: 503; SSE-NEXT: pslld $31, %xmm0 504; SSE-NEXT: psrad $31, %xmm0 505; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 506; SSE-NEXT: ret{{[l|q]}} 507; 508; SSE41-LABEL: sitofp_v4i1_v4f32: 509; SSE41: # %bb.0: 510; SSE41-NEXT: pslld $31, %xmm0 511; SSE41-NEXT: psrad $31, %xmm0 512; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 513; SSE41-NEXT: ret{{[l|q]}} 514; 515; AVX-LABEL: sitofp_v4i1_v4f32: 516; AVX: # %bb.0: 517; AVX-NEXT: vpslld $31, %xmm0, %xmm0 518; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 519; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 520; AVX-NEXT: ret{{[l|q]}} 521 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i1(<4 x i1> %x, 522 metadata !"round.dynamic", 523 metadata !"fpexcept.strict") #0 524 ret <4 x float> %result 525} 526 527define <4 x float> @uitofp_v4i1_v4f32(<4 x i1> %x) #0 { 528; SSE-32-LABEL: uitofp_v4i1_v4f32: 529; SSE-32: # %bb.0: 530; SSE-32-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 531; SSE-32-NEXT: cvtdq2ps %xmm0, %xmm0 532; SSE-32-NEXT: retl 533; 534; SSE-64-LABEL: uitofp_v4i1_v4f32: 535; SSE-64: # %bb.0: 536; SSE-64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 537; SSE-64-NEXT: cvtdq2ps %xmm0, %xmm0 538; SSE-64-NEXT: retq 539; 540; SSE41-32-LABEL: uitofp_v4i1_v4f32: 541; SSE41-32: # %bb.0: 542; SSE41-32-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 543; SSE41-32-NEXT: cvtdq2ps %xmm0, %xmm0 544; SSE41-32-NEXT: retl 545; 546; SSE41-64-LABEL: uitofp_v4i1_v4f32: 547; SSE41-64: # %bb.0: 548; SSE41-64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 549; SSE41-64-NEXT: cvtdq2ps %xmm0, %xmm0 550; SSE41-64-NEXT: retq 551; 552; AVX1-32-LABEL: uitofp_v4i1_v4f32: 553; AVX1-32: # %bb.0: 554; AVX1-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 555; AVX1-32-NEXT: vcvtdq2ps %xmm0, %xmm0 556; AVX1-32-NEXT: retl 557; 558; AVX1-64-LABEL: uitofp_v4i1_v4f32: 559; AVX1-64: # %bb.0: 560; AVX1-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 561; AVX1-64-NEXT: vcvtdq2ps %xmm0, %xmm0 562; AVX1-64-NEXT: retq 563; 564; AVX512F-LABEL: uitofp_v4i1_v4f32: 565; AVX512F: # %bb.0: 566; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] 567; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0 568; AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0 569; AVX512F-NEXT: ret{{[l|q]}} 570; 571; AVX512VL-32-LABEL: uitofp_v4i1_v4f32: 572; AVX512VL-32: # %bb.0: 573; AVX512VL-32-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0 574; AVX512VL-32-NEXT: vcvtdq2ps %xmm0, %xmm0 575; AVX512VL-32-NEXT: retl 576; 577; AVX512VL-64-LABEL: uitofp_v4i1_v4f32: 578; AVX512VL-64: # %bb.0: 579; AVX512VL-64-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 580; AVX512VL-64-NEXT: vcvtdq2ps %xmm0, %xmm0 581; AVX512VL-64-NEXT: retq 582; 583; AVX512DQ-LABEL: uitofp_v4i1_v4f32: 584; AVX512DQ: # %bb.0: 585; AVX512DQ-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] 586; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0 587; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0 588; AVX512DQ-NEXT: ret{{[l|q]}} 589; 590; AVX512DQVL-32-LABEL: uitofp_v4i1_v4f32: 591; AVX512DQVL-32: # %bb.0: 592; AVX512DQVL-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0 593; AVX512DQVL-32-NEXT: vcvtdq2ps %xmm0, %xmm0 594; AVX512DQVL-32-NEXT: retl 595; 596; AVX512DQVL-64-LABEL: uitofp_v4i1_v4f32: 597; AVX512DQVL-64: # %bb.0: 598; AVX512DQVL-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 599; AVX512DQVL-64-NEXT: vcvtdq2ps %xmm0, %xmm0 600; AVX512DQVL-64-NEXT: retq 601 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i1(<4 x i1> %x, 602 metadata !"round.dynamic", 603 metadata !"fpexcept.strict") #0 604 ret <4 x float> %result 605} 606 607define <4 x float> @sitofp_v4i8_v4f32(<4 x i8> %x) #0 { 608; SSE-LABEL: sitofp_v4i8_v4f32: 609; SSE: # %bb.0: 610; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 611; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 612; SSE-NEXT: psrad $24, %xmm0 613; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 614; SSE-NEXT: ret{{[l|q]}} 615; 616; SSE41-LABEL: sitofp_v4i8_v4f32: 617; SSE41: # %bb.0: 618; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 619; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 620; SSE41-NEXT: psrad $24, %xmm0 621; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 622; SSE41-NEXT: ret{{[l|q]}} 623; 624; AVX-LABEL: sitofp_v4i8_v4f32: 625; AVX: # %bb.0: 626; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 627; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 628; AVX-NEXT: ret{{[l|q]}} 629 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i8(<4 x i8> %x, 630 metadata !"round.dynamic", 631 metadata !"fpexcept.strict") #0 632 ret <4 x float> %result 633} 634 635define <4 x float> @uitofp_v4i8_v4f32(<4 x i8> %x) #0 { 636; SSE-LABEL: uitofp_v4i8_v4f32: 637; SSE: # %bb.0: 638; SSE-NEXT: pxor %xmm1, %xmm1 639; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 640; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 641; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 642; SSE-NEXT: ret{{[l|q]}} 643; 644; SSE41-LABEL: uitofp_v4i8_v4f32: 645; SSE41: # %bb.0: 646; SSE41-NEXT: pxor %xmm1, %xmm1 647; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 648; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 649; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 650; SSE41-NEXT: ret{{[l|q]}} 651; 652; AVX-LABEL: uitofp_v4i8_v4f32: 653; AVX: # %bb.0: 654; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 655; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 656; AVX-NEXT: ret{{[l|q]}} 657 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i8(<4 x i8> %x, 658 metadata !"round.dynamic", 659 metadata !"fpexcept.strict") #0 660 ret <4 x float> %result 661} 662 663define <4 x float> @sitofp_v4i16_v4f32(<4 x i16> %x) #0 { 664; SSE-LABEL: sitofp_v4i16_v4f32: 665; SSE: # %bb.0: 666; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 667; SSE-NEXT: psrad $16, %xmm0 668; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 669; SSE-NEXT: ret{{[l|q]}} 670; 671; SSE41-LABEL: sitofp_v4i16_v4f32: 672; SSE41: # %bb.0: 673; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 674; SSE41-NEXT: psrad $16, %xmm0 675; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 676; SSE41-NEXT: ret{{[l|q]}} 677; 678; AVX-LABEL: sitofp_v4i16_v4f32: 679; AVX: # %bb.0: 680; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 681; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 682; AVX-NEXT: ret{{[l|q]}} 683 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i16(<4 x i16> %x, 684 metadata !"round.dynamic", 685 metadata !"fpexcept.strict") #0 686 ret <4 x float> %result 687} 688 689define <4 x float> @uitofp_v4i16_v4f32(<4 x i16> %x) #0 { 690; SSE-LABEL: uitofp_v4i16_v4f32: 691; SSE: # %bb.0: 692; SSE-NEXT: pxor %xmm1, %xmm1 693; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 694; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 695; SSE-NEXT: ret{{[l|q]}} 696; 697; SSE41-LABEL: uitofp_v4i16_v4f32: 698; SSE41: # %bb.0: 699; SSE41-NEXT: pxor %xmm1, %xmm1 700; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 701; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 702; SSE41-NEXT: ret{{[l|q]}} 703; 704; AVX-LABEL: uitofp_v4i16_v4f32: 705; AVX: # %bb.0: 706; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 707; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 708; AVX-NEXT: ret{{[l|q]}} 709 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i16(<4 x i16> %x, 710 metadata !"round.dynamic", 711 metadata !"fpexcept.strict") #0 712 ret <4 x float> %result 713} 714 715define <4 x float> @sitofp_v4i32_v4f32(<4 x i32> %x) #0 { 716; SSE-LABEL: sitofp_v4i32_v4f32: 717; SSE: # %bb.0: 718; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 719; SSE-NEXT: ret{{[l|q]}} 720; 721; SSE41-LABEL: sitofp_v4i32_v4f32: 722; SSE41: # %bb.0: 723; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 724; SSE41-NEXT: ret{{[l|q]}} 725; 726; AVX-LABEL: sitofp_v4i32_v4f32: 727; AVX: # %bb.0: 728; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 729; AVX-NEXT: ret{{[l|q]}} 730 %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32> %x, 731 metadata !"round.dynamic", 732 metadata !"fpexcept.strict") #0 733 ret <4 x float> %result 734} 735 736define <4 x float> @uitofp_v4i32_v4f32(<4 x i32> %x) #0 { 737; SSE-32-LABEL: uitofp_v4i32_v4f32: 738; SSE-32: # %bb.0: 739; SSE-32-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] 740; SSE-32-NEXT: pand %xmm0, %xmm1 741; SSE-32-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 742; SSE-32-NEXT: psrld $16, %xmm0 743; SSE-32-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 744; SSE-32-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 745; SSE-32-NEXT: addps %xmm1, %xmm0 746; SSE-32-NEXT: retl 747; 748; SSE-64-LABEL: uitofp_v4i32_v4f32: 749; SSE-64: # %bb.0: 750; SSE-64-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] 751; SSE-64-NEXT: pand %xmm0, %xmm1 752; SSE-64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 753; SSE-64-NEXT: psrld $16, %xmm0 754; SSE-64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 755; SSE-64-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 756; SSE-64-NEXT: addps %xmm1, %xmm0 757; SSE-64-NEXT: retq 758; 759; SSE41-32-LABEL: uitofp_v4i32_v4f32: 760; SSE41-32: # %bb.0: 761; SSE41-32-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] 762; SSE41-32-NEXT: pand %xmm0, %xmm1 763; SSE41-32-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 764; SSE41-32-NEXT: psrld $16, %xmm0 765; SSE41-32-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 766; SSE41-32-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 767; SSE41-32-NEXT: addps %xmm1, %xmm0 768; SSE41-32-NEXT: retl 769; 770; SSE41-64-LABEL: uitofp_v4i32_v4f32: 771; SSE41-64: # %bb.0: 772; SSE41-64-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] 773; SSE41-64-NEXT: pand %xmm0, %xmm1 774; SSE41-64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 775; SSE41-64-NEXT: psrld $16, %xmm0 776; SSE41-64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 777; SSE41-64-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 778; SSE41-64-NEXT: addps %xmm1, %xmm0 779; SSE41-64-NEXT: retq 780; 781; AVX1-32-LABEL: uitofp_v4i32_v4f32: 782; AVX1-32: # %bb.0: 783; AVX1-32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 784; AVX1-32-NEXT: vpsrld $16, %xmm0, %xmm0 785; AVX1-32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 786; AVX1-32-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 787; AVX1-32-NEXT: vaddps %xmm0, %xmm1, %xmm0 788; AVX1-32-NEXT: retl 789; 790; AVX1-64-LABEL: uitofp_v4i32_v4f32: 791; AVX1-64: # %bb.0: 792; AVX1-64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 793; AVX1-64-NEXT: vpsrld $16, %xmm0, %xmm0 794; AVX1-64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 795; AVX1-64-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 796; AVX1-64-NEXT: vaddps %xmm0, %xmm1, %xmm0 797; AVX1-64-NEXT: retq 798; 799; AVX512F-LABEL: uitofp_v4i32_v4f32: 800; AVX512F: # %bb.0: 801; AVX512F-NEXT: vmovaps %xmm0, %xmm0 802; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 803; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 804; AVX512F-NEXT: vzeroupper 805; AVX512F-NEXT: ret{{[l|q]}} 806; 807; AVX512VL-LABEL: uitofp_v4i32_v4f32: 808; AVX512VL: # %bb.0: 809; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0 810; AVX512VL-NEXT: ret{{[l|q]}} 811; 812; AVX512DQ-LABEL: uitofp_v4i32_v4f32: 813; AVX512DQ: # %bb.0: 814; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 815; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 816; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 817; AVX512DQ-NEXT: vzeroupper 818; AVX512DQ-NEXT: ret{{[l|q]}} 819; 820; AVX512DQVL-LABEL: uitofp_v4i32_v4f32: 821; AVX512DQVL: # %bb.0: 822; AVX512DQVL-NEXT: vcvtudq2ps %xmm0, %xmm0 823; AVX512DQVL-NEXT: ret{{[l|q]}} 824 %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x, 825 metadata !"round.dynamic", 826 metadata !"fpexcept.strict") #0 827 ret <4 x float> %result 828} 829 830define <2 x double> @sitofp_v2i1_v2f64(<2 x i1> %x) #0 { 831; SSE-LABEL: sitofp_v2i1_v2f64: 832; SSE: # %bb.0: 833; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 834; SSE-NEXT: pslld $31, %xmm0 835; SSE-NEXT: psrad $31, %xmm0 836; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 837; SSE-NEXT: ret{{[l|q]}} 838; 839; SSE41-LABEL: sitofp_v2i1_v2f64: 840; SSE41: # %bb.0: 841; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 842; SSE41-NEXT: pslld $31, %xmm0 843; SSE41-NEXT: psrad $31, %xmm0 844; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 845; SSE41-NEXT: ret{{[l|q]}} 846; 847; AVX-LABEL: sitofp_v2i1_v2f64: 848; AVX: # %bb.0: 849; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 850; AVX-NEXT: vpslld $31, %xmm0, %xmm0 851; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 852; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 853; AVX-NEXT: ret{{[l|q]}} 854 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i1(<2 x i1> %x, 855 metadata !"round.dynamic", 856 metadata !"fpexcept.strict") #0 857 ret <2 x double> %result 858} 859 860define <2 x double> @uitofp_v2i1_v2f64(<2 x i1> %x) #0 { 861; SSE-32-LABEL: uitofp_v2i1_v2f64: 862; SSE-32: # %bb.0: 863; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 864; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 865; SSE-32-NEXT: cvtdq2pd %xmm0, %xmm0 866; SSE-32-NEXT: retl 867; 868; SSE-64-LABEL: uitofp_v2i1_v2f64: 869; SSE-64: # %bb.0: 870; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 871; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 872; SSE-64-NEXT: cvtdq2pd %xmm0, %xmm0 873; SSE-64-NEXT: retq 874; 875; SSE41-32-LABEL: uitofp_v2i1_v2f64: 876; SSE41-32: # %bb.0: 877; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 878; SSE41-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 879; SSE41-32-NEXT: cvtdq2pd %xmm0, %xmm0 880; SSE41-32-NEXT: retl 881; 882; SSE41-64-LABEL: uitofp_v2i1_v2f64: 883; SSE41-64: # %bb.0: 884; SSE41-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 885; SSE41-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 886; SSE41-64-NEXT: cvtdq2pd %xmm0, %xmm0 887; SSE41-64-NEXT: retq 888; 889; AVX1-32-LABEL: uitofp_v2i1_v2f64: 890; AVX1-32: # %bb.0: 891; AVX1-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 892; AVX1-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 893; AVX1-32-NEXT: vcvtdq2pd %xmm0, %xmm0 894; AVX1-32-NEXT: retl 895; 896; AVX1-64-LABEL: uitofp_v2i1_v2f64: 897; AVX1-64: # %bb.0: 898; AVX1-64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 899; AVX1-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 900; AVX1-64-NEXT: vcvtdq2pd %xmm0, %xmm0 901; AVX1-64-NEXT: retq 902; 903; AVX512F-LABEL: uitofp_v2i1_v2f64: 904; AVX512F: # %bb.0: 905; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 906; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] 907; AVX512F-NEXT: vandps %xmm1, %xmm0, %xmm0 908; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0 909; AVX512F-NEXT: ret{{[l|q]}} 910; 911; AVX512VL-32-LABEL: uitofp_v2i1_v2f64: 912; AVX512VL-32: # %bb.0: 913; AVX512VL-32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 914; AVX512VL-32-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0 915; AVX512VL-32-NEXT: vcvtdq2pd %xmm0, %xmm0 916; AVX512VL-32-NEXT: retl 917; 918; AVX512VL-64-LABEL: uitofp_v2i1_v2f64: 919; AVX512VL-64: # %bb.0: 920; AVX512VL-64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 921; AVX512VL-64-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 922; AVX512VL-64-NEXT: vcvtdq2pd %xmm0, %xmm0 923; AVX512VL-64-NEXT: retq 924; 925; AVX512DQ-LABEL: uitofp_v2i1_v2f64: 926; AVX512DQ: # %bb.0: 927; AVX512DQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 928; AVX512DQ-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] 929; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0 930; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0 931; AVX512DQ-NEXT: ret{{[l|q]}} 932; 933; AVX512DQVL-32-LABEL: uitofp_v2i1_v2f64: 934; AVX512DQVL-32: # %bb.0: 935; AVX512DQVL-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 936; AVX512DQVL-32-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0 937; AVX512DQVL-32-NEXT: vcvtdq2pd %xmm0, %xmm0 938; AVX512DQVL-32-NEXT: retl 939; 940; AVX512DQVL-64-LABEL: uitofp_v2i1_v2f64: 941; AVX512DQVL-64: # %bb.0: 942; AVX512DQVL-64-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,2,3] 943; AVX512DQVL-64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 944; AVX512DQVL-64-NEXT: vcvtdq2pd %xmm0, %xmm0 945; AVX512DQVL-64-NEXT: retq 946 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i1(<2 x i1> %x, 947 metadata !"round.dynamic", 948 metadata !"fpexcept.strict") #0 949 ret <2 x double> %result 950} 951 952define <2 x double> @sitofp_v2i8_v2f64(<2 x i8> %x) #0 { 953; SSE-LABEL: sitofp_v2i8_v2f64: 954; SSE: # %bb.0: 955; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 956; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 957; SSE-NEXT: psrad $24, %xmm0 958; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 959; SSE-NEXT: ret{{[l|q]}} 960; 961; SSE41-LABEL: sitofp_v2i8_v2f64: 962; SSE41: # %bb.0: 963; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 964; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 965; SSE41-NEXT: psrad $24, %xmm0 966; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 967; SSE41-NEXT: ret{{[l|q]}} 968; 969; AVX-LABEL: sitofp_v2i8_v2f64: 970; AVX: # %bb.0: 971; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 972; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 973; AVX-NEXT: ret{{[l|q]}} 974 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i8(<2 x i8> %x, 975 metadata !"round.dynamic", 976 metadata !"fpexcept.strict") #0 977 ret <2 x double> %result 978} 979 980define <2 x double> @uitofp_v2i8_v2f64(<2 x i8> %x) #0 { 981; SSE-LABEL: uitofp_v2i8_v2f64: 982; SSE: # %bb.0: 983; SSE-NEXT: pxor %xmm1, %xmm1 984; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 985; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 986; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 987; SSE-NEXT: ret{{[l|q]}} 988; 989; SSE41-LABEL: uitofp_v2i8_v2f64: 990; SSE41: # %bb.0: 991; SSE41-NEXT: pxor %xmm1, %xmm1 992; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 993; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 994; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 995; SSE41-NEXT: ret{{[l|q]}} 996; 997; AVX-LABEL: uitofp_v2i8_v2f64: 998; AVX: # %bb.0: 999; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1000; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 1001; AVX-NEXT: ret{{[l|q]}} 1002 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i8(<2 x i8> %x, 1003 metadata !"round.dynamic", 1004 metadata !"fpexcept.strict") #0 1005 ret <2 x double> %result 1006} 1007 1008define <2 x double> @sitofp_v2i16_v2f64(<2 x i16> %x) #0 { 1009; SSE-LABEL: sitofp_v2i16_v2f64: 1010; SSE: # %bb.0: 1011; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1012; SSE-NEXT: psrad $16, %xmm0 1013; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 1014; SSE-NEXT: ret{{[l|q]}} 1015; 1016; SSE41-LABEL: sitofp_v2i16_v2f64: 1017; SSE41: # %bb.0: 1018; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1019; SSE41-NEXT: psrad $16, %xmm0 1020; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 1021; SSE41-NEXT: ret{{[l|q]}} 1022; 1023; AVX-LABEL: sitofp_v2i16_v2f64: 1024; AVX: # %bb.0: 1025; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 1026; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 1027; AVX-NEXT: ret{{[l|q]}} 1028 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i16(<2 x i16> %x, 1029 metadata !"round.dynamic", 1030 metadata !"fpexcept.strict") #0 1031 ret <2 x double> %result 1032} 1033 1034define <2 x double> @uitofp_v2i16_v2f64(<2 x i16> %x) #0 { 1035; SSE-LABEL: uitofp_v2i16_v2f64: 1036; SSE: # %bb.0: 1037; SSE-NEXT: pxor %xmm1, %xmm1 1038; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1039; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 1040; SSE-NEXT: ret{{[l|q]}} 1041; 1042; SSE41-LABEL: uitofp_v2i16_v2f64: 1043; SSE41: # %bb.0: 1044; SSE41-NEXT: pxor %xmm1, %xmm1 1045; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1046; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 1047; SSE41-NEXT: ret{{[l|q]}} 1048; 1049; AVX-LABEL: uitofp_v2i16_v2f64: 1050; AVX: # %bb.0: 1051; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1052; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 1053; AVX-NEXT: ret{{[l|q]}} 1054 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i16(<2 x i16> %x, 1055 metadata !"round.dynamic", 1056 metadata !"fpexcept.strict") #0 1057 ret <2 x double> %result 1058} 1059 1060define <2 x double> @sitofp_v2i32_v2f64(<2 x i32> %x) #0 { 1061; SSE-LABEL: sitofp_v2i32_v2f64: 1062; SSE: # %bb.0: 1063; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 1064; SSE-NEXT: ret{{[l|q]}} 1065; 1066; SSE41-LABEL: sitofp_v2i32_v2f64: 1067; SSE41: # %bb.0: 1068; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 1069; SSE41-NEXT: ret{{[l|q]}} 1070; 1071; AVX-LABEL: sitofp_v2i32_v2f64: 1072; AVX: # %bb.0: 1073; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 1074; AVX-NEXT: ret{{[l|q]}} 1075 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32> %x, 1076 metadata !"round.dynamic", 1077 metadata !"fpexcept.strict") #0 1078 ret <2 x double> %result 1079} 1080 1081define <2 x double> @uitofp_v2i32_v2f64(<2 x i32> %x) #0 { 1082; SSE-LABEL: uitofp_v2i32_v2f64: 1083; SSE: # %bb.0: 1084; SSE-NEXT: xorpd %xmm1, %xmm1 1085; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1086; SSE-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 1087; SSE-NEXT: orpd %xmm1, %xmm0 1088; SSE-NEXT: subpd %xmm1, %xmm0 1089; SSE-NEXT: ret{{[l|q]}} 1090; 1091; SSE41-LABEL: uitofp_v2i32_v2f64: 1092; SSE41: # %bb.0: 1093; SSE41-NEXT: xorpd %xmm1, %xmm1 1094; SSE41-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1095; SSE41-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 1096; SSE41-NEXT: orpd %xmm1, %xmm0 1097; SSE41-NEXT: subpd %xmm1, %xmm0 1098; SSE41-NEXT: ret{{[l|q]}} 1099; 1100; AVX1-LABEL: uitofp_v2i32_v2f64: 1101; AVX1: # %bb.0: 1102; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1103; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 1104; AVX1-NEXT: # xmm1 = mem[0,0] 1105; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 1106; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 1107; AVX1-NEXT: ret{{[l|q]}} 1108; 1109; AVX512F-LABEL: uitofp_v2i32_v2f64: 1110; AVX512F: # %bb.0: 1111; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1112; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 1113; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1114; AVX512F-NEXT: vzeroupper 1115; AVX512F-NEXT: ret{{[l|q]}} 1116; 1117; AVX512VL-LABEL: uitofp_v2i32_v2f64: 1118; AVX512VL: # %bb.0: 1119; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 1120; AVX512VL-NEXT: ret{{[l|q]}} 1121; 1122; AVX512DQ-LABEL: uitofp_v2i32_v2f64: 1123; AVX512DQ: # %bb.0: 1124; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1125; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 1126; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1127; AVX512DQ-NEXT: vzeroupper 1128; AVX512DQ-NEXT: ret{{[l|q]}} 1129; 1130; AVX512DQVL-LABEL: uitofp_v2i32_v2f64: 1131; AVX512DQVL: # %bb.0: 1132; AVX512DQVL-NEXT: vcvtudq2pd %xmm0, %xmm0 1133; AVX512DQVL-NEXT: ret{{[l|q]}} 1134 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x, 1135 metadata !"round.dynamic", 1136 metadata !"fpexcept.strict") #0 1137 ret <2 x double> %result 1138} 1139 1140define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 { 1141; SSE-32-LABEL: sitofp_v2i64_v2f64: 1142; SSE-32: # %bb.0: 1143; SSE-32-NEXT: pushl %ebp 1144; SSE-32-NEXT: .cfi_def_cfa_offset 8 1145; SSE-32-NEXT: .cfi_offset %ebp, -8 1146; SSE-32-NEXT: movl %esp, %ebp 1147; SSE-32-NEXT: .cfi_def_cfa_register %ebp 1148; SSE-32-NEXT: andl $-8, %esp 1149; SSE-32-NEXT: subl $32, %esp 1150; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 1151; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1152; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 1153; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) 1154; SSE-32-NEXT: fstpl {{[0-9]+}}(%esp) 1155; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) 1156; SSE-32-NEXT: fstpl (%esp) 1157; SSE-32-NEXT: wait 1158; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1159; SSE-32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 1160; SSE-32-NEXT: movl %ebp, %esp 1161; SSE-32-NEXT: popl %ebp 1162; SSE-32-NEXT: .cfi_def_cfa %esp, 4 1163; SSE-32-NEXT: retl 1164; 1165; SSE-64-LABEL: sitofp_v2i64_v2f64: 1166; SSE-64: # %bb.0: 1167; SSE-64-NEXT: movq %xmm0, %rax 1168; SSE-64-NEXT: cvtsi2sd %rax, %xmm1 1169; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1170; SSE-64-NEXT: movq %xmm0, %rax 1171; SSE-64-NEXT: xorps %xmm0, %xmm0 1172; SSE-64-NEXT: cvtsi2sd %rax, %xmm0 1173; SSE-64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1174; SSE-64-NEXT: movapd %xmm1, %xmm0 1175; SSE-64-NEXT: retq 1176; 1177; SSE41-32-LABEL: sitofp_v2i64_v2f64: 1178; SSE41-32: # %bb.0: 1179; SSE41-32-NEXT: pushl %ebp 1180; SSE41-32-NEXT: .cfi_def_cfa_offset 8 1181; SSE41-32-NEXT: .cfi_offset %ebp, -8 1182; SSE41-32-NEXT: movl %esp, %ebp 1183; SSE41-32-NEXT: .cfi_def_cfa_register %ebp 1184; SSE41-32-NEXT: andl $-8, %esp 1185; SSE41-32-NEXT: subl $32, %esp 1186; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 1187; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1188; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 1189; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) 1190; SSE41-32-NEXT: fstpl {{[0-9]+}}(%esp) 1191; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) 1192; SSE41-32-NEXT: fstpl (%esp) 1193; SSE41-32-NEXT: wait 1194; SSE41-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1195; SSE41-32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 1196; SSE41-32-NEXT: movl %ebp, %esp 1197; SSE41-32-NEXT: popl %ebp 1198; SSE41-32-NEXT: .cfi_def_cfa %esp, 4 1199; SSE41-32-NEXT: retl 1200; 1201; SSE41-64-LABEL: sitofp_v2i64_v2f64: 1202; SSE41-64: # %bb.0: 1203; SSE41-64-NEXT: movq %xmm0, %rax 1204; SSE41-64-NEXT: cvtsi2sd %rax, %xmm1 1205; SSE41-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1206; SSE41-64-NEXT: movq %xmm0, %rax 1207; SSE41-64-NEXT: xorps %xmm0, %xmm0 1208; SSE41-64-NEXT: cvtsi2sd %rax, %xmm0 1209; SSE41-64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1210; SSE41-64-NEXT: movapd %xmm1, %xmm0 1211; SSE41-64-NEXT: retq 1212; 1213; AVX-32-LABEL: sitofp_v2i64_v2f64: 1214; AVX-32: # %bb.0: 1215; AVX-32-NEXT: pushl %ebp 1216; AVX-32-NEXT: .cfi_def_cfa_offset 8 1217; AVX-32-NEXT: .cfi_offset %ebp, -8 1218; AVX-32-NEXT: movl %esp, %ebp 1219; AVX-32-NEXT: .cfi_def_cfa_register %ebp 1220; AVX-32-NEXT: andl $-8, %esp 1221; AVX-32-NEXT: subl $32, %esp 1222; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 1223; AVX-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3] 1224; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 1225; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 1226; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) 1227; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 1228; AVX-32-NEXT: fstpl (%esp) 1229; AVX-32-NEXT: wait 1230; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1231; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 1232; AVX-32-NEXT: movl %ebp, %esp 1233; AVX-32-NEXT: popl %ebp 1234; AVX-32-NEXT: .cfi_def_cfa %esp, 4 1235; AVX-32-NEXT: retl 1236; 1237; AVX-64-LABEL: sitofp_v2i64_v2f64: 1238; AVX-64: # %bb.0: 1239; AVX-64-NEXT: vpextrq $1, %xmm0, %rax 1240; AVX-64-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 1241; AVX-64-NEXT: vmovq %xmm0, %rax 1242; AVX-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 1243; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1244; AVX-64-NEXT: retq 1245; 1246; AVX512DQ-LABEL: sitofp_v2i64_v2f64: 1247; AVX512DQ: # %bb.0: 1248; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 1249; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 1250; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1251; AVX512DQ-NEXT: vzeroupper 1252; AVX512DQ-NEXT: ret{{[l|q]}} 1253; 1254; AVX512DQVL-LABEL: sitofp_v2i64_v2f64: 1255; AVX512DQVL: # %bb.0: 1256; AVX512DQVL-NEXT: vcvtqq2pd %xmm0, %xmm0 1257; AVX512DQVL-NEXT: ret{{[l|q]}} 1258 %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x, 1259 metadata !"round.dynamic", 1260 metadata !"fpexcept.strict") #0 1261 ret <2 x double> %result 1262} 1263 1264define <2 x double> @uitofp_v2i64_v2f64(<2 x i64> %x) #0 { 1265; SSE-32-LABEL: uitofp_v2i64_v2f64: 1266; SSE-32: # %bb.0: 1267; SSE-32-NEXT: pushl %ebp 1268; SSE-32-NEXT: .cfi_def_cfa_offset 8 1269; SSE-32-NEXT: .cfi_offset %ebp, -8 1270; SSE-32-NEXT: movl %esp, %ebp 1271; SSE-32-NEXT: .cfi_def_cfa_register %ebp 1272; SSE-32-NEXT: andl $-8, %esp 1273; SSE-32-NEXT: subl $32, %esp 1274; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 1275; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1276; SSE-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp) 1277; SSE-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1278; SSE-32-NEXT: movd %xmm1, %eax 1279; SSE-32-NEXT: shrl $31, %eax 1280; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) 1281; SSE-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 1282; SSE-32-NEXT: fstpl {{[0-9]+}}(%esp) 1283; SSE-32-NEXT: wait 1284; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 1285; SSE-32-NEXT: movd %xmm0, %eax 1286; SSE-32-NEXT: shrl $31, %eax 1287; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) 1288; SSE-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 1289; SSE-32-NEXT: fstpl (%esp) 1290; SSE-32-NEXT: wait 1291; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1292; SSE-32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 1293; SSE-32-NEXT: movl %ebp, %esp 1294; SSE-32-NEXT: popl %ebp 1295; SSE-32-NEXT: .cfi_def_cfa %esp, 4 1296; SSE-32-NEXT: retl 1297; 1298; SSE-64-LABEL: uitofp_v2i64_v2f64: 1299; SSE-64: # %bb.0: 1300; SSE-64-NEXT: movdqa %xmm0, %xmm1 1301; SSE-64-NEXT: movq %xmm0, %rax 1302; SSE-64-NEXT: movq %rax, %rcx 1303; SSE-64-NEXT: shrq %rcx 1304; SSE-64-NEXT: movl %eax, %edx 1305; SSE-64-NEXT: andl $1, %edx 1306; SSE-64-NEXT: orq %rcx, %rdx 1307; SSE-64-NEXT: testq %rax, %rax 1308; SSE-64-NEXT: cmovnsq %rax, %rdx 1309; SSE-64-NEXT: xorps %xmm0, %xmm0 1310; SSE-64-NEXT: cvtsi2sd %rdx, %xmm0 1311; SSE-64-NEXT: jns .LBB21_2 1312; SSE-64-NEXT: # %bb.1: 1313; SSE-64-NEXT: addsd %xmm0, %xmm0 1314; SSE-64-NEXT: .LBB21_2: 1315; SSE-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1316; SSE-64-NEXT: movq %xmm1, %rax 1317; SSE-64-NEXT: movq %rax, %rcx 1318; SSE-64-NEXT: shrq %rcx 1319; SSE-64-NEXT: movl %eax, %edx 1320; SSE-64-NEXT: andl $1, %edx 1321; SSE-64-NEXT: orq %rcx, %rdx 1322; SSE-64-NEXT: testq %rax, %rax 1323; SSE-64-NEXT: cmovnsq %rax, %rdx 1324; SSE-64-NEXT: xorps %xmm1, %xmm1 1325; SSE-64-NEXT: cvtsi2sd %rdx, %xmm1 1326; SSE-64-NEXT: jns .LBB21_4 1327; SSE-64-NEXT: # %bb.3: 1328; SSE-64-NEXT: addsd %xmm1, %xmm1 1329; SSE-64-NEXT: .LBB21_4: 1330; SSE-64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1331; SSE-64-NEXT: retq 1332; 1333; SSE41-32-LABEL: uitofp_v2i64_v2f64: 1334; SSE41-32: # %bb.0: 1335; SSE41-32-NEXT: pushl %ebp 1336; SSE41-32-NEXT: .cfi_def_cfa_offset 8 1337; SSE41-32-NEXT: .cfi_offset %ebp, -8 1338; SSE41-32-NEXT: movl %esp, %ebp 1339; SSE41-32-NEXT: .cfi_def_cfa_register %ebp 1340; SSE41-32-NEXT: andl $-8, %esp 1341; SSE41-32-NEXT: subl $32, %esp 1342; SSE41-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 1343; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1344; SSE41-32-NEXT: movq %xmm1, {{[0-9]+}}(%esp) 1345; SSE41-32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1346; SSE41-32-NEXT: movd %xmm1, %eax 1347; SSE41-32-NEXT: shrl $31, %eax 1348; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) 1349; SSE41-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 1350; SSE41-32-NEXT: fstpl {{[0-9]+}}(%esp) 1351; SSE41-32-NEXT: wait 1352; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 1353; SSE41-32-NEXT: movd %xmm0, %eax 1354; SSE41-32-NEXT: shrl $31, %eax 1355; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) 1356; SSE41-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 1357; SSE41-32-NEXT: fstpl (%esp) 1358; SSE41-32-NEXT: wait 1359; SSE41-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 1360; SSE41-32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 1361; SSE41-32-NEXT: movl %ebp, %esp 1362; SSE41-32-NEXT: popl %ebp 1363; SSE41-32-NEXT: .cfi_def_cfa %esp, 4 1364; SSE41-32-NEXT: retl 1365; 1366; SSE41-64-LABEL: uitofp_v2i64_v2f64: 1367; SSE41-64: # %bb.0: 1368; SSE41-64-NEXT: movdqa %xmm0, %xmm1 1369; SSE41-64-NEXT: movq %xmm0, %rax 1370; SSE41-64-NEXT: movq %rax, %rcx 1371; SSE41-64-NEXT: shrq %rcx 1372; SSE41-64-NEXT: movl %eax, %edx 1373; SSE41-64-NEXT: andl $1, %edx 1374; SSE41-64-NEXT: orq %rcx, %rdx 1375; SSE41-64-NEXT: testq %rax, %rax 1376; SSE41-64-NEXT: cmovnsq %rax, %rdx 1377; SSE41-64-NEXT: xorps %xmm0, %xmm0 1378; SSE41-64-NEXT: cvtsi2sd %rdx, %xmm0 1379; SSE41-64-NEXT: jns .LBB21_2 1380; SSE41-64-NEXT: # %bb.1: 1381; SSE41-64-NEXT: addsd %xmm0, %xmm0 1382; SSE41-64-NEXT: .LBB21_2: 1383; SSE41-64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1384; SSE41-64-NEXT: movq %xmm1, %rax 1385; SSE41-64-NEXT: movq %rax, %rcx 1386; SSE41-64-NEXT: shrq %rcx 1387; SSE41-64-NEXT: movl %eax, %edx 1388; SSE41-64-NEXT: andl $1, %edx 1389; SSE41-64-NEXT: orq %rcx, %rdx 1390; SSE41-64-NEXT: testq %rax, %rax 1391; SSE41-64-NEXT: cmovnsq %rax, %rdx 1392; SSE41-64-NEXT: xorps %xmm1, %xmm1 1393; SSE41-64-NEXT: cvtsi2sd %rdx, %xmm1 1394; SSE41-64-NEXT: jns .LBB21_4 1395; SSE41-64-NEXT: # %bb.3: 1396; SSE41-64-NEXT: addsd %xmm1, %xmm1 1397; SSE41-64-NEXT: .LBB21_4: 1398; SSE41-64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1399; SSE41-64-NEXT: retq 1400; 1401; AVX-32-LABEL: uitofp_v2i64_v2f64: 1402; AVX-32: # %bb.0: 1403; AVX-32-NEXT: pushl %ebp 1404; AVX-32-NEXT: .cfi_def_cfa_offset 8 1405; AVX-32-NEXT: .cfi_offset %ebp, -8 1406; AVX-32-NEXT: movl %esp, %ebp 1407; AVX-32-NEXT: .cfi_def_cfa_register %ebp 1408; AVX-32-NEXT: andl $-8, %esp 1409; AVX-32-NEXT: subl $32, %esp 1410; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 1411; AVX-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 1412; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 1413; AVX-32-NEXT: vextractps $1, %xmm0, %eax 1414; AVX-32-NEXT: shrl $31, %eax 1415; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 1416; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 1417; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) 1418; AVX-32-NEXT: wait 1419; AVX-32-NEXT: vextractps $3, %xmm0, %eax 1420; AVX-32-NEXT: shrl $31, %eax 1421; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) 1422; AVX-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 1423; AVX-32-NEXT: fstpl (%esp) 1424; AVX-32-NEXT: wait 1425; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1426; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 1427; AVX-32-NEXT: movl %ebp, %esp 1428; AVX-32-NEXT: popl %ebp 1429; AVX-32-NEXT: .cfi_def_cfa %esp, 4 1430; AVX-32-NEXT: retl 1431; 1432; AVX1-64-LABEL: uitofp_v2i64_v2f64: 1433; AVX1-64: # %bb.0: 1434; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax 1435; AVX1-64-NEXT: movq %rax, %rcx 1436; AVX1-64-NEXT: shrq %rcx 1437; AVX1-64-NEXT: movl %eax, %edx 1438; AVX1-64-NEXT: andl $1, %edx 1439; AVX1-64-NEXT: orq %rcx, %rdx 1440; AVX1-64-NEXT: testq %rax, %rax 1441; AVX1-64-NEXT: cmovnsq %rax, %rdx 1442; AVX1-64-NEXT: vcvtsi2sd %rdx, %xmm1, %xmm1 1443; AVX1-64-NEXT: jns .LBB21_2 1444; AVX1-64-NEXT: # %bb.1: 1445; AVX1-64-NEXT: vaddsd %xmm1, %xmm1, %xmm1 1446; AVX1-64-NEXT: .LBB21_2: 1447; AVX1-64-NEXT: vmovq %xmm0, %rax 1448; AVX1-64-NEXT: movq %rax, %rcx 1449; AVX1-64-NEXT: shrq %rcx 1450; AVX1-64-NEXT: movl %eax, %edx 1451; AVX1-64-NEXT: andl $1, %edx 1452; AVX1-64-NEXT: orq %rcx, %rdx 1453; AVX1-64-NEXT: testq %rax, %rax 1454; AVX1-64-NEXT: cmovnsq %rax, %rdx 1455; AVX1-64-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm0 1456; AVX1-64-NEXT: jns .LBB21_4 1457; AVX1-64-NEXT: # %bb.3: 1458; AVX1-64-NEXT: vaddsd %xmm0, %xmm0, %xmm0 1459; AVX1-64-NEXT: .LBB21_4: 1460; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1461; AVX1-64-NEXT: retq 1462; 1463; AVX512F-64-LABEL: uitofp_v2i64_v2f64: 1464; AVX512F-64: # %bb.0: 1465; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax 1466; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1 1467; AVX512F-64-NEXT: vmovq %xmm0, %rax 1468; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm2, %xmm0 1469; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1470; AVX512F-64-NEXT: retq 1471; 1472; AVX512VL-64-LABEL: uitofp_v2i64_v2f64: 1473; AVX512VL-64: # %bb.0: 1474; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax 1475; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1 1476; AVX512VL-64-NEXT: vmovq %xmm0, %rax 1477; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm2, %xmm0 1478; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1479; AVX512VL-64-NEXT: retq 1480; 1481; AVX512DQ-LABEL: uitofp_v2i64_v2f64: 1482; AVX512DQ: # %bb.0: 1483; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 1484; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 1485; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1486; AVX512DQ-NEXT: vzeroupper 1487; AVX512DQ-NEXT: ret{{[l|q]}} 1488; 1489; AVX512DQVL-LABEL: uitofp_v2i64_v2f64: 1490; AVX512DQVL: # %bb.0: 1491; AVX512DQVL-NEXT: vcvtuqq2pd %xmm0, %xmm0 1492; AVX512DQVL-NEXT: ret{{[l|q]}} 1493 %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x, 1494 metadata !"round.dynamic", 1495 metadata !"fpexcept.strict") #0 1496 ret <2 x double> %result 1497} 1498 1499attributes #0 = { strictfp } 1500