1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 4; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,VEX,AVX1 5; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,VEX,AVX2 6; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F 7; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL 8; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ 9; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLDQ 10; 11; 32-bit tests to make sure we're not doing anything stupid. 12; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown 13; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse 14; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 15; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse4.1 16 17; 18; Signed Integer to Double 19; 20 21define <2 x float> @sitofp_2i32_to_2f32(<2 x i32> %a) { 22; SSE-LABEL: sitofp_2i32_to_2f32: 23; SSE: # %bb.0: 24; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 25; SSE-NEXT: retq 26; 27; AVX-LABEL: sitofp_2i32_to_2f32: 28; AVX: # %bb.0: 29; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 30; AVX-NEXT: retq 31 %cvt = sitofp <2 x i32> %a to <2 x float> 32 ret <2 x float> %cvt 33} 34 35define <2 x float> @uitofp_2i32_to_2f32(<2 x i32> %a) { 36; SSE2-LABEL: uitofp_2i32_to_2f32: 37; SSE2: # %bb.0: 38; SSE2-NEXT: xorpd %xmm1, %xmm1 39; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 40; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 41; SSE2-NEXT: orpd %xmm1, %xmm0 42; SSE2-NEXT: subpd %xmm1, %xmm0 43; SSE2-NEXT: cvtpd2ps %xmm0, %xmm0 44; SSE2-NEXT: retq 45; 46; SSE41-LABEL: uitofp_2i32_to_2f32: 47; SSE41: # %bb.0: 48; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 49; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 50; SSE41-NEXT: por %xmm1, %xmm0 51; SSE41-NEXT: subpd %xmm1, %xmm0 52; SSE41-NEXT: cvtpd2ps %xmm0, %xmm0 53; SSE41-NEXT: retq 54; 55; AVX1-LABEL: uitofp_2i32_to_2f32: 56; AVX1: # %bb.0: 57; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 58; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 59; AVX1-NEXT: # xmm1 = mem[0,0] 60; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 61; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 62; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 63; AVX1-NEXT: retq 64; 65; AVX2-LABEL: uitofp_2i32_to_2f32: 66; AVX2: # %bb.0: 67; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 68; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 69; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 70; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 71; AVX2-NEXT: vcvtpd2ps %xmm0, %xmm0 72; AVX2-NEXT: retq 73; 74; AVX512F-LABEL: uitofp_2i32_to_2f32: 75; AVX512F: # %bb.0: 76; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 77; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 78; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 79; AVX512F-NEXT: vzeroupper 80; AVX512F-NEXT: retq 81; 82; AVX512VL-LABEL: uitofp_2i32_to_2f32: 83; AVX512VL: # %bb.0: 84; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0 85; AVX512VL-NEXT: retq 86; 87; AVX512DQ-LABEL: uitofp_2i32_to_2f32: 88; AVX512DQ: # %bb.0: 89; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 90; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 91; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 92; AVX512DQ-NEXT: vzeroupper 93; AVX512DQ-NEXT: retq 94; 95; AVX512VLDQ-LABEL: uitofp_2i32_to_2f32: 96; AVX512VLDQ: # %bb.0: 97; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0 98; AVX512VLDQ-NEXT: retq 99 %cvt = uitofp <2 x i32> %a to <2 x float> 100 ret <2 x float> %cvt 101} 102 103define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { 104; SSE2-LABEL: sitofp_2i64_to_2f64: 105; SSE2: # %bb.0: 106; SSE2-NEXT: movq %xmm0, %rax 107; SSE2-NEXT: cvtsi2sd %rax, %xmm1 108; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 109; SSE2-NEXT: movq %xmm0, %rax 110; SSE2-NEXT: xorps %xmm0, %xmm0 111; SSE2-NEXT: cvtsi2sd %rax, %xmm0 112; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 113; SSE2-NEXT: movapd %xmm1, %xmm0 114; SSE2-NEXT: retq 115; 116; SSE41-LABEL: sitofp_2i64_to_2f64: 117; SSE41: # %bb.0: 118; SSE41-NEXT: pextrq $1, %xmm0, %rax 119; SSE41-NEXT: cvtsi2sd %rax, %xmm1 120; SSE41-NEXT: movq %xmm0, %rax 121; SSE41-NEXT: xorps %xmm0, %xmm0 122; SSE41-NEXT: cvtsi2sd %rax, %xmm0 123; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 124; SSE41-NEXT: retq 125; 126; VEX-LABEL: sitofp_2i64_to_2f64: 127; VEX: # %bb.0: 128; VEX-NEXT: vpextrq $1, %xmm0, %rax 129; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 130; VEX-NEXT: vmovq %xmm0, %rax 131; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 132; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 133; VEX-NEXT: retq 134; 135; AVX512F-LABEL: sitofp_2i64_to_2f64: 136; AVX512F: # %bb.0: 137; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 138; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 139; AVX512F-NEXT: vmovq %xmm0, %rax 140; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 141; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 142; AVX512F-NEXT: retq 143; 144; AVX512VL-LABEL: sitofp_2i64_to_2f64: 145; AVX512VL: # %bb.0: 146; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 147; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 148; AVX512VL-NEXT: vmovq %xmm0, %rax 149; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 150; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 151; AVX512VL-NEXT: retq 152; 153; AVX512DQ-LABEL: sitofp_2i64_to_2f64: 154; AVX512DQ: # %bb.0: 155; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 156; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 157; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 158; AVX512DQ-NEXT: vzeroupper 159; AVX512DQ-NEXT: retq 160; 161; AVX512VLDQ-LABEL: sitofp_2i64_to_2f64: 162; AVX512VLDQ: # %bb.0: 163; AVX512VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0 164; AVX512VLDQ-NEXT: retq 165 %cvt = sitofp <2 x i64> %a to <2 x double> 166 ret <2 x double> %cvt 167} 168 169define <2 x double> @sitofp_2i32_to_2f64(<4 x i32> %a) { 170; SSE-LABEL: sitofp_2i32_to_2f64: 171; SSE: # %bb.0: 172; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 173; SSE-NEXT: retq 174; 175; AVX-LABEL: sitofp_2i32_to_2f64: 176; AVX: # %bb.0: 177; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 178; AVX-NEXT: retq 179 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 180 %cvt = sitofp <2 x i32> %shuf to <2 x double> 181 ret <2 x double> %cvt 182} 183 184define <2 x double> @sitofp_4i32_to_2f64(<4 x i32> %a) { 185; SSE-LABEL: sitofp_4i32_to_2f64: 186; SSE: # %bb.0: 187; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 188; SSE-NEXT: retq 189; 190; AVX-LABEL: sitofp_4i32_to_2f64: 191; AVX: # %bb.0: 192; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 193; AVX-NEXT: retq 194 %cvt = sitofp <4 x i32> %a to <4 x double> 195 %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1> 196 ret <2 x double> %shuf 197} 198 199define <2 x double> @sitofp_2i16_to_2f64(<8 x i16> %a) { 200; SSE2-LABEL: sitofp_2i16_to_2f64: 201; SSE2: # %bb.0: 202; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 203; SSE2-NEXT: psrad $16, %xmm0 204; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 205; SSE2-NEXT: retq 206; 207; SSE41-LABEL: sitofp_2i16_to_2f64: 208; SSE41: # %bb.0: 209; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 210; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 211; SSE41-NEXT: retq 212; 213; AVX-LABEL: sitofp_2i16_to_2f64: 214; AVX: # %bb.0: 215; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 216; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 217; AVX-NEXT: retq 218 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 219 %cvt = sitofp <2 x i16> %shuf to <2 x double> 220 ret <2 x double> %cvt 221} 222 223define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) { 224; SSE2-LABEL: sitofp_8i16_to_2f64: 225; SSE2: # %bb.0: 226; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 227; SSE2-NEXT: psrad $16, %xmm0 228; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 229; SSE2-NEXT: retq 230; 231; SSE41-LABEL: sitofp_8i16_to_2f64: 232; SSE41: # %bb.0: 233; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 234; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 235; SSE41-NEXT: retq 236; 237; VEX-LABEL: sitofp_8i16_to_2f64: 238; VEX: # %bb.0: 239; VEX-NEXT: vpmovsxwd %xmm0, %xmm0 240; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0 241; VEX-NEXT: retq 242; 243; AVX512-LABEL: sitofp_8i16_to_2f64: 244; AVX512: # %bb.0: 245; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 246; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 247; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 248; AVX512-NEXT: vzeroupper 249; AVX512-NEXT: retq 250 %cvt = sitofp <8 x i16> %a to <8 x double> 251 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1> 252 ret <2 x double> %shuf 253} 254 255define <2 x double> @sitofp_2i8_to_2f64(<16 x i8> %a) { 256; SSE2-LABEL: sitofp_2i8_to_2f64: 257; SSE2: # %bb.0: 258; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 259; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 260; SSE2-NEXT: psrad $24, %xmm0 261; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 262; SSE2-NEXT: retq 263; 264; SSE41-LABEL: sitofp_2i8_to_2f64: 265; SSE41: # %bb.0: 266; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 267; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 268; SSE41-NEXT: retq 269; 270; AVX-LABEL: sitofp_2i8_to_2f64: 271; AVX: # %bb.0: 272; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 273; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 274; AVX-NEXT: retq 275 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 276 %cvt = sitofp <2 x i8> %shuf to <2 x double> 277 ret <2 x double> %cvt 278} 279 280define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) { 281; SSE2-LABEL: sitofp_16i8_to_2f64: 282; SSE2: # %bb.0: 283; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 284; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 285; SSE2-NEXT: psrad $24, %xmm0 286; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 287; SSE2-NEXT: retq 288; 289; SSE41-LABEL: sitofp_16i8_to_2f64: 290; SSE41: # %bb.0: 291; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 292; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 293; SSE41-NEXT: retq 294; 295; VEX-LABEL: sitofp_16i8_to_2f64: 296; VEX: # %bb.0: 297; VEX-NEXT: vpmovsxbd %xmm0, %xmm0 298; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0 299; VEX-NEXT: retq 300; 301; AVX512-LABEL: sitofp_16i8_to_2f64: 302; AVX512: # %bb.0: 303; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 304; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 305; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 306; AVX512-NEXT: vzeroupper 307; AVX512-NEXT: retq 308 %cvt = sitofp <16 x i8> %a to <16 x double> 309 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1> 310 ret <2 x double> %shuf 311} 312 313define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { 314; SSE2-LABEL: sitofp_4i64_to_4f64: 315; SSE2: # %bb.0: 316; SSE2-NEXT: movq %xmm0, %rax 317; SSE2-NEXT: cvtsi2sd %rax, %xmm2 318; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 319; SSE2-NEXT: movq %xmm0, %rax 320; SSE2-NEXT: xorps %xmm0, %xmm0 321; SSE2-NEXT: cvtsi2sd %rax, %xmm0 322; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] 323; SSE2-NEXT: movq %xmm1, %rax 324; SSE2-NEXT: cvtsi2sd %rax, %xmm3 325; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 326; SSE2-NEXT: movq %xmm0, %rax 327; SSE2-NEXT: xorps %xmm0, %xmm0 328; SSE2-NEXT: cvtsi2sd %rax, %xmm0 329; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0] 330; SSE2-NEXT: movapd %xmm2, %xmm0 331; SSE2-NEXT: movapd %xmm3, %xmm1 332; SSE2-NEXT: retq 333; 334; SSE41-LABEL: sitofp_4i64_to_4f64: 335; SSE41: # %bb.0: 336; SSE41-NEXT: pextrq $1, %xmm0, %rax 337; SSE41-NEXT: cvtsi2sd %rax, %xmm2 338; SSE41-NEXT: movq %xmm0, %rax 339; SSE41-NEXT: xorps %xmm0, %xmm0 340; SSE41-NEXT: cvtsi2sd %rax, %xmm0 341; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 342; SSE41-NEXT: pextrq $1, %xmm1, %rax 343; SSE41-NEXT: xorps %xmm2, %xmm2 344; SSE41-NEXT: cvtsi2sd %rax, %xmm2 345; SSE41-NEXT: movq %xmm1, %rax 346; SSE41-NEXT: xorps %xmm1, %xmm1 347; SSE41-NEXT: cvtsi2sd %rax, %xmm1 348; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 349; SSE41-NEXT: retq 350; 351; AVX1-LABEL: sitofp_4i64_to_4f64: 352; AVX1: # %bb.0: 353; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 354; AVX1-NEXT: vpextrq $1, %xmm1, %rax 355; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 356; AVX1-NEXT: vmovq %xmm1, %rax 357; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 358; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 359; AVX1-NEXT: vpextrq $1, %xmm0, %rax 360; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 361; AVX1-NEXT: vmovq %xmm0, %rax 362; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 363; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 364; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 365; AVX1-NEXT: retq 366; 367; AVX2-LABEL: sitofp_4i64_to_4f64: 368; AVX2: # %bb.0: 369; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 370; AVX2-NEXT: vpextrq $1, %xmm1, %rax 371; AVX2-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 372; AVX2-NEXT: vmovq %xmm1, %rax 373; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 374; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 375; AVX2-NEXT: vpextrq $1, %xmm0, %rax 376; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 377; AVX2-NEXT: vmovq %xmm0, %rax 378; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 379; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 380; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 381; AVX2-NEXT: retq 382; 383; AVX512F-LABEL: sitofp_4i64_to_4f64: 384; AVX512F: # %bb.0: 385; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 386; AVX512F-NEXT: vpextrq $1, %xmm1, %rax 387; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 388; AVX512F-NEXT: vmovq %xmm1, %rax 389; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 390; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 391; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 392; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 393; AVX512F-NEXT: vmovq %xmm0, %rax 394; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 395; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 396; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 397; AVX512F-NEXT: retq 398; 399; AVX512VL-LABEL: sitofp_4i64_to_4f64: 400; AVX512VL: # %bb.0: 401; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 402; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax 403; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 404; AVX512VL-NEXT: vmovq %xmm1, %rax 405; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 406; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 407; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 408; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 409; AVX512VL-NEXT: vmovq %xmm0, %rax 410; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 411; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 412; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 413; AVX512VL-NEXT: retq 414; 415; AVX512DQ-LABEL: sitofp_4i64_to_4f64: 416; AVX512DQ: # %bb.0: 417; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 418; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 419; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 420; AVX512DQ-NEXT: retq 421; 422; AVX512VLDQ-LABEL: sitofp_4i64_to_4f64: 423; AVX512VLDQ: # %bb.0: 424; AVX512VLDQ-NEXT: vcvtqq2pd %ymm0, %ymm0 425; AVX512VLDQ-NEXT: retq 426 %cvt = sitofp <4 x i64> %a to <4 x double> 427 ret <4 x double> %cvt 428} 429 430define <4 x double> @sitofp_4i32_to_4f64(<4 x i32> %a) { 431; SSE-LABEL: sitofp_4i32_to_4f64: 432; SSE: # %bb.0: 433; SSE-NEXT: cvtdq2pd %xmm0, %xmm2 434; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 435; SSE-NEXT: cvtdq2pd %xmm0, %xmm1 436; SSE-NEXT: movaps %xmm2, %xmm0 437; SSE-NEXT: retq 438; 439; AVX-LABEL: sitofp_4i32_to_4f64: 440; AVX: # %bb.0: 441; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 442; AVX-NEXT: retq 443 %cvt = sitofp <4 x i32> %a to <4 x double> 444 ret <4 x double> %cvt 445} 446 447define <4 x double> @sitofp_4i16_to_4f64(<8 x i16> %a) { 448; SSE2-LABEL: sitofp_4i16_to_4f64: 449; SSE2: # %bb.0: 450; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 451; SSE2-NEXT: psrad $16, %xmm1 452; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 453; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 454; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 455; SSE2-NEXT: retq 456; 457; SSE41-LABEL: sitofp_4i16_to_4f64: 458; SSE41: # %bb.0: 459; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 460; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 461; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 462; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 463; SSE41-NEXT: retq 464; 465; AVX-LABEL: sitofp_4i16_to_4f64: 466; AVX: # %bb.0: 467; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 468; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 469; AVX-NEXT: retq 470 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 471 %cvt = sitofp <4 x i16> %shuf to <4 x double> 472 ret <4 x double> %cvt 473} 474 475define <4 x double> @sitofp_8i16_to_4f64(<8 x i16> %a) { 476; SSE2-LABEL: sitofp_8i16_to_4f64: 477; SSE2: # %bb.0: 478; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 479; SSE2-NEXT: psrad $16, %xmm1 480; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 481; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 482; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 483; SSE2-NEXT: retq 484; 485; SSE41-LABEL: sitofp_8i16_to_4f64: 486; SSE41: # %bb.0: 487; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 488; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 489; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 490; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 491; SSE41-NEXT: retq 492; 493; VEX-LABEL: sitofp_8i16_to_4f64: 494; VEX: # %bb.0: 495; VEX-NEXT: vpmovsxwd %xmm0, %xmm0 496; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 497; VEX-NEXT: retq 498; 499; AVX512-LABEL: sitofp_8i16_to_4f64: 500; AVX512: # %bb.0: 501; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 502; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 503; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 504; AVX512-NEXT: retq 505 %cvt = sitofp <8 x i16> %a to <8 x double> 506 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 507 ret <4 x double> %shuf 508} 509 510define <4 x double> @sitofp_4i8_to_4f64(<16 x i8> %a) { 511; SSE2-LABEL: sitofp_4i8_to_4f64: 512; SSE2: # %bb.0: 513; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 514; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 515; SSE2-NEXT: psrad $24, %xmm1 516; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 517; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 518; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 519; SSE2-NEXT: retq 520; 521; SSE41-LABEL: sitofp_4i8_to_4f64: 522; SSE41: # %bb.0: 523; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 524; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 525; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 526; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 527; SSE41-NEXT: retq 528; 529; AVX-LABEL: sitofp_4i8_to_4f64: 530; AVX: # %bb.0: 531; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 532; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 533; AVX-NEXT: retq 534 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 535 %cvt = sitofp <4 x i8> %shuf to <4 x double> 536 ret <4 x double> %cvt 537} 538 539define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) { 540; SSE2-LABEL: sitofp_16i8_to_4f64: 541; SSE2: # %bb.0: 542; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 543; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 544; SSE2-NEXT: psrad $24, %xmm1 545; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 546; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 547; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 548; SSE2-NEXT: retq 549; 550; SSE41-LABEL: sitofp_16i8_to_4f64: 551; SSE41: # %bb.0: 552; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 553; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 554; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 555; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 556; SSE41-NEXT: retq 557; 558; VEX-LABEL: sitofp_16i8_to_4f64: 559; VEX: # %bb.0: 560; VEX-NEXT: vpmovsxbd %xmm0, %xmm0 561; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 562; VEX-NEXT: retq 563; 564; AVX512-LABEL: sitofp_16i8_to_4f64: 565; AVX512: # %bb.0: 566; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 567; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 568; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 569; AVX512-NEXT: retq 570 %cvt = sitofp <16 x i8> %a to <16 x double> 571 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 572 ret <4 x double> %shuf 573} 574 575; 576; Unsigned Integer to Double 577; 578 579define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) { 580; SSE2-LABEL: uitofp_2i64_to_2f64: 581; SSE2: # %bb.0: 582; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,4294967295] 583; SSE2-NEXT: pand %xmm0, %xmm1 584; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 585; SSE2-NEXT: psrlq $32, %xmm0 586; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 587; SSE2-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 588; SSE2-NEXT: addpd %xmm1, %xmm0 589; SSE2-NEXT: retq 590; 591; SSE41-LABEL: uitofp_2i64_to_2f64: 592; SSE41: # %bb.0: 593; SSE41-NEXT: pxor %xmm1, %xmm1 594; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 595; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 596; SSE41-NEXT: psrlq $32, %xmm0 597; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 598; SSE41-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 599; SSE41-NEXT: addpd %xmm1, %xmm0 600; SSE41-NEXT: retq 601; 602; AVX1-LABEL: uitofp_2i64_to_2f64: 603; AVX1: # %bb.0: 604; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 605; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 606; AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 607; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 608; AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 609; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 610; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 611; AVX1-NEXT: retq 612; 613; AVX2-LABEL: uitofp_2i64_to_2f64: 614; AVX2: # %bb.0: 615; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 616; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 617; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 618; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 619; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 620; AVX2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 621; AVX2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 622; AVX2-NEXT: retq 623; 624; AVX512F-LABEL: uitofp_2i64_to_2f64: 625; AVX512F: # %bb.0: 626; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 627; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 628; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 629; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0 630; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 631; AVX512F-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 632; AVX512F-NEXT: vaddpd %xmm0, %xmm1, %xmm0 633; AVX512F-NEXT: retq 634; 635; AVX512VL-LABEL: uitofp_2i64_to_2f64: 636; AVX512VL: # %bb.0: 637; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 638; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 639; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 640; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 641; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 642; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 643; AVX512VL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 644; AVX512VL-NEXT: retq 645; 646; AVX512DQ-LABEL: uitofp_2i64_to_2f64: 647; AVX512DQ: # %bb.0: 648; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 649; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 650; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 651; AVX512DQ-NEXT: vzeroupper 652; AVX512DQ-NEXT: retq 653; 654; AVX512VLDQ-LABEL: uitofp_2i64_to_2f64: 655; AVX512VLDQ: # %bb.0: 656; AVX512VLDQ-NEXT: vcvtuqq2pd %xmm0, %xmm0 657; AVX512VLDQ-NEXT: retq 658 %cvt = uitofp <2 x i64> %a to <2 x double> 659 ret <2 x double> %cvt 660} 661 662define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) { 663; SSE2-LABEL: uitofp_2i32_to_2f64: 664; SSE2: # %bb.0: 665; SSE2-NEXT: xorpd %xmm1, %xmm1 666; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 667; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 668; SSE2-NEXT: orpd %xmm1, %xmm0 669; SSE2-NEXT: subpd %xmm1, %xmm0 670; SSE2-NEXT: retq 671; 672; SSE41-LABEL: uitofp_2i32_to_2f64: 673; SSE41: # %bb.0: 674; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 675; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 676; SSE41-NEXT: por %xmm1, %xmm0 677; SSE41-NEXT: subpd %xmm1, %xmm0 678; SSE41-NEXT: retq 679; 680; AVX1-LABEL: uitofp_2i32_to_2f64: 681; AVX1: # %bb.0: 682; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 683; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 684; AVX1-NEXT: # xmm1 = mem[0,0] 685; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 686; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 687; AVX1-NEXT: retq 688; 689; AVX2-LABEL: uitofp_2i32_to_2f64: 690; AVX2: # %bb.0: 691; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 692; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 693; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 694; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 695; AVX2-NEXT: retq 696; 697; AVX512F-LABEL: uitofp_2i32_to_2f64: 698; AVX512F: # %bb.0: 699; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 700; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 701; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 702; AVX512F-NEXT: vzeroupper 703; AVX512F-NEXT: retq 704; 705; AVX512VL-LABEL: uitofp_2i32_to_2f64: 706; AVX512VL: # %bb.0: 707; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 708; AVX512VL-NEXT: retq 709; 710; AVX512DQ-LABEL: uitofp_2i32_to_2f64: 711; AVX512DQ: # %bb.0: 712; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 713; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 714; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 715; AVX512DQ-NEXT: vzeroupper 716; AVX512DQ-NEXT: retq 717; 718; AVX512VLDQ-LABEL: uitofp_2i32_to_2f64: 719; AVX512VLDQ: # %bb.0: 720; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 721; AVX512VLDQ-NEXT: retq 722 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 723 %cvt = uitofp <2 x i32> %shuf to <2 x double> 724 ret <2 x double> %cvt 725} 726 727define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) { 728; SSE2-LABEL: uitofp_4i32_to_2f64: 729; SSE2: # %bb.0: 730; SSE2-NEXT: xorpd %xmm1, %xmm1 731; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 732; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 733; SSE2-NEXT: orpd %xmm1, %xmm0 734; SSE2-NEXT: subpd %xmm1, %xmm0 735; SSE2-NEXT: retq 736; 737; SSE41-LABEL: uitofp_4i32_to_2f64: 738; SSE41: # %bb.0: 739; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 740; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 741; SSE41-NEXT: por %xmm1, %xmm0 742; SSE41-NEXT: subpd %xmm1, %xmm0 743; SSE41-NEXT: retq 744; 745; AVX1-LABEL: uitofp_4i32_to_2f64: 746; AVX1: # %bb.0: 747; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 748; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 749; AVX1-NEXT: # xmm1 = mem[0,0] 750; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 751; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 752; AVX1-NEXT: retq 753; 754; AVX2-LABEL: uitofp_4i32_to_2f64: 755; AVX2: # %bb.0: 756; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 757; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 758; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 759; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 760; AVX2-NEXT: retq 761; 762; AVX512F-LABEL: uitofp_4i32_to_2f64: 763; AVX512F: # %bb.0: 764; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 765; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 766; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 767; AVX512F-NEXT: vzeroupper 768; AVX512F-NEXT: retq 769; 770; AVX512VL-LABEL: uitofp_4i32_to_2f64: 771; AVX512VL: # %bb.0: 772; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 773; AVX512VL-NEXT: retq 774; 775; AVX512DQ-LABEL: uitofp_4i32_to_2f64: 776; AVX512DQ: # %bb.0: 777; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 778; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 779; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 780; AVX512DQ-NEXT: vzeroupper 781; AVX512DQ-NEXT: retq 782; 783; AVX512VLDQ-LABEL: uitofp_4i32_to_2f64: 784; AVX512VLDQ: # %bb.0: 785; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 786; AVX512VLDQ-NEXT: retq 787 %cvt = uitofp <4 x i32> %a to <4 x double> 788 %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1> 789 ret <2 x double> %shuf 790} 791 792define <2 x double> @uitofp_2i16_to_2f64(<8 x i16> %a) { 793; SSE2-LABEL: uitofp_2i16_to_2f64: 794; SSE2: # %bb.0: 795; SSE2-NEXT: pxor %xmm1, %xmm1 796; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 797; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 798; SSE2-NEXT: retq 799; 800; SSE41-LABEL: uitofp_2i16_to_2f64: 801; SSE41: # %bb.0: 802; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 803; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 804; SSE41-NEXT: retq 805; 806; AVX-LABEL: uitofp_2i16_to_2f64: 807; AVX: # %bb.0: 808; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 809; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 810; AVX-NEXT: retq 811 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 812 %cvt = uitofp <2 x i16> %shuf to <2 x double> 813 ret <2 x double> %cvt 814} 815 816define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) { 817; SSE2-LABEL: uitofp_8i16_to_2f64: 818; SSE2: # %bb.0: 819; SSE2-NEXT: pxor %xmm1, %xmm1 820; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 821; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 822; SSE2-NEXT: retq 823; 824; SSE41-LABEL: uitofp_8i16_to_2f64: 825; SSE41: # %bb.0: 826; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 827; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 828; SSE41-NEXT: retq 829; 830; VEX-LABEL: uitofp_8i16_to_2f64: 831; VEX: # %bb.0: 832; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 833; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0 834; VEX-NEXT: retq 835; 836; AVX512-LABEL: uitofp_8i16_to_2f64: 837; AVX512: # %bb.0: 838; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 839; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 840; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 841; AVX512-NEXT: vzeroupper 842; AVX512-NEXT: retq 843 %cvt = uitofp <8 x i16> %a to <8 x double> 844 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1> 845 ret <2 x double> %shuf 846} 847 848define <2 x double> @uitofp_2i8_to_2f64(<16 x i8> %a) { 849; SSE2-LABEL: uitofp_2i8_to_2f64: 850; SSE2: # %bb.0: 851; SSE2-NEXT: pxor %xmm1, %xmm1 852; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 853; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 854; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 855; SSE2-NEXT: retq 856; 857; SSE41-LABEL: uitofp_2i8_to_2f64: 858; SSE41: # %bb.0: 859; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 860; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 861; SSE41-NEXT: retq 862; 863; AVX-LABEL: uitofp_2i8_to_2f64: 864; AVX: # %bb.0: 865; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 866; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 867; AVX-NEXT: retq 868 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 869 %cvt = uitofp <2 x i8> %shuf to <2 x double> 870 ret <2 x double> %cvt 871} 872 873define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) { 874; SSE2-LABEL: uitofp_16i8_to_2f64: 875; SSE2: # %bb.0: 876; SSE2-NEXT: pxor %xmm1, %xmm1 877; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 878; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 879; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 880; SSE2-NEXT: retq 881; 882; SSE41-LABEL: uitofp_16i8_to_2f64: 883; SSE41: # %bb.0: 884; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 885; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 886; SSE41-NEXT: retq 887; 888; VEX-LABEL: uitofp_16i8_to_2f64: 889; VEX: # %bb.0: 890; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 891; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0 892; VEX-NEXT: retq 893; 894; AVX512-LABEL: uitofp_16i8_to_2f64: 895; AVX512: # %bb.0: 896; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 897; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 898; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 899; AVX512-NEXT: vzeroupper 900; AVX512-NEXT: retq 901 %cvt = uitofp <16 x i8> %a to <16 x double> 902 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1> 903 ret <2 x double> %shuf 904} 905 906define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) { 907; SSE2-LABEL: uitofp_4i64_to_4f64: 908; SSE2: # %bb.0: 909; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295] 910; SSE2-NEXT: movdqa %xmm0, %xmm3 911; SSE2-NEXT: pand %xmm2, %xmm3 912; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 913; SSE2-NEXT: por %xmm4, %xmm3 914; SSE2-NEXT: psrlq $32, %xmm0 915; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 916; SSE2-NEXT: por %xmm5, %xmm0 917; SSE2-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 918; SSE2-NEXT: subpd %xmm6, %xmm0 919; SSE2-NEXT: addpd %xmm3, %xmm0 920; SSE2-NEXT: pand %xmm1, %xmm2 921; SSE2-NEXT: por %xmm4, %xmm2 922; SSE2-NEXT: psrlq $32, %xmm1 923; SSE2-NEXT: por %xmm5, %xmm1 924; SSE2-NEXT: subpd %xmm6, %xmm1 925; SSE2-NEXT: addpd %xmm2, %xmm1 926; SSE2-NEXT: retq 927; 928; SSE41-LABEL: uitofp_4i64_to_4f64: 929; SSE41: # %bb.0: 930; SSE41-NEXT: pxor %xmm2, %xmm2 931; SSE41-NEXT: movdqa %xmm0, %xmm3 932; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 933; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 934; SSE41-NEXT: por %xmm4, %xmm3 935; SSE41-NEXT: psrlq $32, %xmm0 936; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 937; SSE41-NEXT: por %xmm5, %xmm0 938; SSE41-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 939; SSE41-NEXT: subpd %xmm6, %xmm0 940; SSE41-NEXT: addpd %xmm3, %xmm0 941; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 942; SSE41-NEXT: por %xmm4, %xmm2 943; SSE41-NEXT: psrlq $32, %xmm1 944; SSE41-NEXT: por %xmm5, %xmm1 945; SSE41-NEXT: subpd %xmm6, %xmm1 946; SSE41-NEXT: addpd %xmm2, %xmm1 947; SSE41-NEXT: retq 948; 949; AVX1-LABEL: uitofp_4i64_to_4f64: 950; AVX1: # %bb.0: 951; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 952; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 953; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 954; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] 955; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7] 956; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 957; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 958; AVX1-NEXT: vaddpd %ymm0, %ymm2, %ymm0 959; AVX1-NEXT: retq 960; 961; AVX2-LABEL: uitofp_4i64_to_4f64: 962; AVX2: # %bb.0: 963; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 964; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 965; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] 966; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1 967; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 968; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072] 969; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 970; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] 971; AVX2-NEXT: vsubpd %ymm2, %ymm0, %ymm0 972; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 973; AVX2-NEXT: retq 974; 975; AVX512F-LABEL: uitofp_4i64_to_4f64: 976; AVX512F: # %bb.0: 977; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 978; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 979; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] 980; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1 981; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm0 982; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072] 983; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0 984; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] 985; AVX512F-NEXT: vsubpd %ymm2, %ymm0, %ymm0 986; AVX512F-NEXT: vaddpd %ymm0, %ymm1, %ymm0 987; AVX512F-NEXT: retq 988; 989; AVX512VL-LABEL: uitofp_4i64_to_4f64: 990; AVX512VL: # %bb.0: 991; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 992; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 993; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1 994; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 995; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 996; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 997; AVX512VL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 998; AVX512VL-NEXT: retq 999; 1000; AVX512DQ-LABEL: uitofp_4i64_to_4f64: 1001; AVX512DQ: # %bb.0: 1002; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1003; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 1004; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1005; AVX512DQ-NEXT: retq 1006; 1007; AVX512VLDQ-LABEL: uitofp_4i64_to_4f64: 1008; AVX512VLDQ: # %bb.0: 1009; AVX512VLDQ-NEXT: vcvtuqq2pd %ymm0, %ymm0 1010; AVX512VLDQ-NEXT: retq 1011 %cvt = uitofp <4 x i64> %a to <4 x double> 1012 ret <4 x double> %cvt 1013} 1014 1015define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) { 1016; SSE2-LABEL: uitofp_4i32_to_4f64: 1017; SSE2: # %bb.0: 1018; SSE2-NEXT: movapd %xmm0, %xmm1 1019; SSE2-NEXT: xorpd %xmm2, %xmm2 1020; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1021; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15] 1022; SSE2-NEXT: orpd %xmm3, %xmm0 1023; SSE2-NEXT: subpd %xmm3, %xmm0 1024; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1025; SSE2-NEXT: orpd %xmm3, %xmm1 1026; SSE2-NEXT: subpd %xmm3, %xmm1 1027; SSE2-NEXT: retq 1028; 1029; SSE41-LABEL: uitofp_4i32_to_4f64: 1030; SSE41: # %bb.0: 1031; SSE41-NEXT: movdqa %xmm0, %xmm1 1032; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1033; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] 1034; SSE41-NEXT: por %xmm2, %xmm0 1035; SSE41-NEXT: subpd %xmm2, %xmm0 1036; SSE41-NEXT: pxor %xmm3, %xmm3 1037; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 1038; SSE41-NEXT: por %xmm2, %xmm1 1039; SSE41-NEXT: subpd %xmm2, %xmm1 1040; SSE41-NEXT: retq 1041; 1042; AVX1-LABEL: uitofp_4i32_to_4f64: 1043; AVX1: # %bb.0: 1044; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1045; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1046; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 1047; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1048; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] 1049; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0 1050; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 1051; AVX1-NEXT: retq 1052; 1053; AVX2-LABEL: uitofp_4i32_to_4f64: 1054; AVX2: # %bb.0: 1055; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1056; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] 1057; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1058; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 1059; AVX2-NEXT: retq 1060; 1061; AVX512F-LABEL: uitofp_4i32_to_4f64: 1062; AVX512F: # %bb.0: 1063; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1064; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 1065; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1066; AVX512F-NEXT: retq 1067; 1068; AVX512VL-LABEL: uitofp_4i32_to_4f64: 1069; AVX512VL: # %bb.0: 1070; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0 1071; AVX512VL-NEXT: retq 1072; 1073; AVX512DQ-LABEL: uitofp_4i32_to_4f64: 1074; AVX512DQ: # %bb.0: 1075; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1076; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 1077; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1078; AVX512DQ-NEXT: retq 1079; 1080; AVX512VLDQ-LABEL: uitofp_4i32_to_4f64: 1081; AVX512VLDQ: # %bb.0: 1082; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %ymm0 1083; AVX512VLDQ-NEXT: retq 1084 %cvt = uitofp <4 x i32> %a to <4 x double> 1085 ret <4 x double> %cvt 1086} 1087 1088define <4 x double> @uitofp_4i16_to_4f64(<8 x i16> %a) { 1089; SSE2-LABEL: uitofp_4i16_to_4f64: 1090; SSE2: # %bb.0: 1091; SSE2-NEXT: pxor %xmm1, %xmm1 1092; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1093; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2 1094; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1095; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1 1096; SSE2-NEXT: movaps %xmm2, %xmm0 1097; SSE2-NEXT: retq 1098; 1099; SSE41-LABEL: uitofp_4i16_to_4f64: 1100; SSE41: # %bb.0: 1101; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1102; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 1103; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1104; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 1105; SSE41-NEXT: retq 1106; 1107; AVX-LABEL: uitofp_4i16_to_4f64: 1108; AVX: # %bb.0: 1109; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1110; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 1111; AVX-NEXT: retq 1112 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1113 %cvt = uitofp <4 x i16> %shuf to <4 x double> 1114 ret <4 x double> %cvt 1115} 1116 1117define <4 x double> @uitofp_8i16_to_4f64(<8 x i16> %a) { 1118; SSE2-LABEL: uitofp_8i16_to_4f64: 1119; SSE2: # %bb.0: 1120; SSE2-NEXT: pxor %xmm1, %xmm1 1121; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1122; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2 1123; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1124; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1 1125; SSE2-NEXT: movaps %xmm2, %xmm0 1126; SSE2-NEXT: retq 1127; 1128; SSE41-LABEL: uitofp_8i16_to_4f64: 1129; SSE41: # %bb.0: 1130; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1131; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 1132; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1133; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 1134; SSE41-NEXT: retq 1135; 1136; VEX-LABEL: uitofp_8i16_to_4f64: 1137; VEX: # %bb.0: 1138; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1139; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 1140; VEX-NEXT: retq 1141; 1142; AVX512-LABEL: uitofp_8i16_to_4f64: 1143; AVX512: # %bb.0: 1144; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1145; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 1146; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1147; AVX512-NEXT: retq 1148 %cvt = uitofp <8 x i16> %a to <8 x double> 1149 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1150 ret <4 x double> %shuf 1151} 1152 1153define <4 x double> @uitofp_4i8_to_4f64(<16 x i8> %a) { 1154; SSE2-LABEL: uitofp_4i8_to_4f64: 1155; SSE2: # %bb.0: 1156; SSE2-NEXT: pxor %xmm1, %xmm1 1157; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1158; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1159; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2 1160; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1161; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1 1162; SSE2-NEXT: movaps %xmm2, %xmm0 1163; SSE2-NEXT: retq 1164; 1165; SSE41-LABEL: uitofp_4i8_to_4f64: 1166; SSE41: # %bb.0: 1167; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1168; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 1169; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1170; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 1171; SSE41-NEXT: retq 1172; 1173; AVX-LABEL: uitofp_4i8_to_4f64: 1174; AVX: # %bb.0: 1175; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1176; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 1177; AVX-NEXT: retq 1178 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1179 %cvt = uitofp <4 x i8> %shuf to <4 x double> 1180 ret <4 x double> %cvt 1181} 1182 1183define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) { 1184; SSE2-LABEL: uitofp_16i8_to_4f64: 1185; SSE2: # %bb.0: 1186; SSE2-NEXT: pxor %xmm1, %xmm1 1187; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1188; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1189; SSE2-NEXT: cvtdq2pd %xmm0, %xmm2 1190; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1191; SSE2-NEXT: cvtdq2pd %xmm0, %xmm1 1192; SSE2-NEXT: movaps %xmm2, %xmm0 1193; SSE2-NEXT: retq 1194; 1195; SSE41-LABEL: uitofp_16i8_to_4f64: 1196; SSE41: # %bb.0: 1197; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1198; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 1199; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1200; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 1201; SSE41-NEXT: retq 1202; 1203; VEX-LABEL: uitofp_16i8_to_4f64: 1204; VEX: # %bb.0: 1205; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1206; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0 1207; VEX-NEXT: retq 1208; 1209; AVX512-LABEL: uitofp_16i8_to_4f64: 1210; AVX512: # %bb.0: 1211; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1212; AVX512-NEXT: vcvtdq2pd %ymm0, %zmm0 1213; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1214; AVX512-NEXT: retq 1215 %cvt = uitofp <16 x i8> %a to <16 x double> 1216 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1217 ret <4 x double> %shuf 1218} 1219 1220; 1221; Signed Integer to Float 1222; 1223 1224define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { 1225; SSE2-LABEL: sitofp_2i64_to_4f32: 1226; SSE2: # %bb.0: 1227; SSE2-NEXT: movq %xmm0, %rax 1228; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1229; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1230; SSE2-NEXT: movq %xmm0, %rax 1231; SSE2-NEXT: xorps %xmm0, %xmm0 1232; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1233; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1234; SSE2-NEXT: movaps %xmm1, %xmm0 1235; SSE2-NEXT: retq 1236; 1237; SSE41-LABEL: sitofp_2i64_to_4f32: 1238; SSE41: # %bb.0: 1239; SSE41-NEXT: pextrq $1, %xmm0, %rax 1240; SSE41-NEXT: cvtsi2ss %rax, %xmm1 1241; SSE41-NEXT: movq %xmm0, %rax 1242; SSE41-NEXT: xorps %xmm0, %xmm0 1243; SSE41-NEXT: cvtsi2ss %rax, %xmm0 1244; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 1245; SSE41-NEXT: retq 1246; 1247; VEX-LABEL: sitofp_2i64_to_4f32: 1248; VEX: # %bb.0: 1249; VEX-NEXT: vpextrq $1, %xmm0, %rax 1250; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1251; VEX-NEXT: vmovq %xmm0, %rax 1252; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1253; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1254; VEX-NEXT: retq 1255; 1256; AVX512F-LABEL: sitofp_2i64_to_4f32: 1257; AVX512F: # %bb.0: 1258; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 1259; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1260; AVX512F-NEXT: vmovq %xmm0, %rax 1261; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1262; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1263; AVX512F-NEXT: retq 1264; 1265; AVX512VL-LABEL: sitofp_2i64_to_4f32: 1266; AVX512VL: # %bb.0: 1267; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 1268; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1269; AVX512VL-NEXT: vmovq %xmm0, %rax 1270; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1271; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1272; AVX512VL-NEXT: retq 1273; 1274; AVX512DQ-LABEL: sitofp_2i64_to_4f32: 1275; AVX512DQ: # %bb.0: 1276; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1277; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 1278; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1279; AVX512DQ-NEXT: vzeroupper 1280; AVX512DQ-NEXT: retq 1281; 1282; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32: 1283; AVX512VLDQ: # %bb.0: 1284; AVX512VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0 1285; AVX512VLDQ-NEXT: retq 1286 %cvt = sitofp <2 x i64> %a to <2 x float> 1287 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1288 ret <4 x float> %ext 1289} 1290 1291define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) { 1292; SSE2-LABEL: sitofp_2i64_to_4f32_zero: 1293; SSE2: # %bb.0: 1294; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1295; SSE2-NEXT: movq %xmm1, %rax 1296; SSE2-NEXT: xorps %xmm1, %xmm1 1297; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1298; SSE2-NEXT: movq %xmm0, %rax 1299; SSE2-NEXT: xorps %xmm0, %xmm0 1300; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1301; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1302; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1303; SSE2-NEXT: retq 1304; 1305; SSE41-LABEL: sitofp_2i64_to_4f32_zero: 1306; SSE41: # %bb.0: 1307; SSE41-NEXT: movq %xmm0, %rax 1308; SSE41-NEXT: cvtsi2ss %rax, %xmm1 1309; SSE41-NEXT: pextrq $1, %xmm0, %rax 1310; SSE41-NEXT: xorps %xmm0, %xmm0 1311; SSE41-NEXT: cvtsi2ss %rax, %xmm0 1312; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],zero,zero 1313; SSE41-NEXT: movaps %xmm1, %xmm0 1314; SSE41-NEXT: retq 1315; 1316; VEX-LABEL: sitofp_2i64_to_4f32_zero: 1317; VEX: # %bb.0: 1318; VEX-NEXT: vmovq %xmm0, %rax 1319; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1320; VEX-NEXT: vpextrq $1, %xmm0, %rax 1321; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1322; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero 1323; VEX-NEXT: retq 1324; 1325; AVX512F-LABEL: sitofp_2i64_to_4f32_zero: 1326; AVX512F: # %bb.0: 1327; AVX512F-NEXT: vmovq %xmm0, %rax 1328; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1329; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 1330; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1331; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero 1332; AVX512F-NEXT: retq 1333; 1334; AVX512VL-LABEL: sitofp_2i64_to_4f32_zero: 1335; AVX512VL: # %bb.0: 1336; AVX512VL-NEXT: vmovq %xmm0, %rax 1337; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1338; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 1339; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1340; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero 1341; AVX512VL-NEXT: retq 1342; 1343; AVX512DQ-LABEL: sitofp_2i64_to_4f32_zero: 1344; AVX512DQ: # %bb.0: 1345; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1346; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 1347; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1348; AVX512DQ-NEXT: vzeroupper 1349; AVX512DQ-NEXT: retq 1350; 1351; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32_zero: 1352; AVX512VLDQ: # %bb.0: 1353; AVX512VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0 1354; AVX512VLDQ-NEXT: retq 1355 %cvt = sitofp <2 x i64> %a to <2 x float> 1356 %ext = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1357 ret <4 x float> %ext 1358} 1359 1360define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { 1361; SSE2-LABEL: sitofp_4i64_to_4f32_undef: 1362; SSE2: # %bb.0: 1363; SSE2-NEXT: movq %xmm0, %rax 1364; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1365; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1366; SSE2-NEXT: movq %xmm0, %rax 1367; SSE2-NEXT: xorps %xmm0, %xmm0 1368; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1369; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1370; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero 1371; SSE2-NEXT: retq 1372; 1373; SSE41-LABEL: sitofp_4i64_to_4f32_undef: 1374; SSE41: # %bb.0: 1375; SSE41-NEXT: pextrq $1, %xmm0, %rax 1376; SSE41-NEXT: cvtsi2ss %rax, %xmm1 1377; SSE41-NEXT: movq %xmm0, %rax 1378; SSE41-NEXT: xorps %xmm0, %xmm0 1379; SSE41-NEXT: cvtsi2ss %rax, %xmm0 1380; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1381; SSE41-NEXT: retq 1382; 1383; VEX-LABEL: sitofp_4i64_to_4f32_undef: 1384; VEX: # %bb.0: 1385; VEX-NEXT: vpextrq $1, %xmm0, %rax 1386; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1387; VEX-NEXT: vmovq %xmm0, %rax 1388; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1389; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1390; VEX-NEXT: retq 1391; 1392; AVX512F-LABEL: sitofp_4i64_to_4f32_undef: 1393; AVX512F: # %bb.0: 1394; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 1395; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1396; AVX512F-NEXT: vmovq %xmm0, %rax 1397; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1398; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1399; AVX512F-NEXT: retq 1400; 1401; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef: 1402; AVX512VL: # %bb.0: 1403; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 1404; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1405; AVX512VL-NEXT: vmovq %xmm0, %rax 1406; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 1407; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1408; AVX512VL-NEXT: retq 1409; 1410; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef: 1411; AVX512DQ: # %bb.0: 1412; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1413; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 1414; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1415; AVX512DQ-NEXT: vzeroupper 1416; AVX512DQ-NEXT: retq 1417; 1418; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32_undef: 1419; AVX512VLDQ: # %bb.0: 1420; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1421; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0 1422; AVX512VLDQ-NEXT: vzeroupper 1423; AVX512VLDQ-NEXT: retq 1424 %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1425 %cvt = sitofp <4 x i64> %ext to <4 x float> 1426 ret <4 x float> %cvt 1427} 1428 1429define <4 x float> @sitofp_4i32_to_4f32(<4 x i32> %a) { 1430; SSE-LABEL: sitofp_4i32_to_4f32: 1431; SSE: # %bb.0: 1432; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 1433; SSE-NEXT: retq 1434; 1435; AVX-LABEL: sitofp_4i32_to_4f32: 1436; AVX: # %bb.0: 1437; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 1438; AVX-NEXT: retq 1439 %cvt = sitofp <4 x i32> %a to <4 x float> 1440 ret <4 x float> %cvt 1441} 1442 1443define <4 x float> @sitofp_4i16_to_4f32(<8 x i16> %a) { 1444; SSE2-LABEL: sitofp_4i16_to_4f32: 1445; SSE2: # %bb.0: 1446; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1447; SSE2-NEXT: psrad $16, %xmm0 1448; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 1449; SSE2-NEXT: retq 1450; 1451; SSE41-LABEL: sitofp_4i16_to_4f32: 1452; SSE41: # %bb.0: 1453; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 1454; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 1455; SSE41-NEXT: retq 1456; 1457; AVX-LABEL: sitofp_4i16_to_4f32: 1458; AVX: # %bb.0: 1459; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 1460; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 1461; AVX-NEXT: retq 1462 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1463 %cvt = sitofp <4 x i16> %shuf to <4 x float> 1464 ret <4 x float> %cvt 1465} 1466 1467define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) { 1468; SSE2-LABEL: sitofp_8i16_to_4f32: 1469; SSE2: # %bb.0: 1470; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1471; SSE2-NEXT: psrad $16, %xmm0 1472; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 1473; SSE2-NEXT: retq 1474; 1475; SSE41-LABEL: sitofp_8i16_to_4f32: 1476; SSE41: # %bb.0: 1477; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 1478; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 1479; SSE41-NEXT: retq 1480; 1481; AVX-LABEL: sitofp_8i16_to_4f32: 1482; AVX: # %bb.0: 1483; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 1484; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 1485; AVX-NEXT: retq 1486 %cvt = sitofp <8 x i16> %a to <8 x float> 1487 %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1488 ret <4 x float> %shuf 1489} 1490 1491define <4 x float> @sitofp_4i8_to_4f32(<16 x i8> %a) { 1492; SSE2-LABEL: sitofp_4i8_to_4f32: 1493; SSE2: # %bb.0: 1494; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1495; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1496; SSE2-NEXT: psrad $24, %xmm0 1497; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 1498; SSE2-NEXT: retq 1499; 1500; SSE41-LABEL: sitofp_4i8_to_4f32: 1501; SSE41: # %bb.0: 1502; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 1503; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 1504; SSE41-NEXT: retq 1505; 1506; AVX-LABEL: sitofp_4i8_to_4f32: 1507; AVX: # %bb.0: 1508; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 1509; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 1510; AVX-NEXT: retq 1511 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1512 %cvt = sitofp <4 x i8> %shuf to <4 x float> 1513 ret <4 x float> %cvt 1514} 1515 1516define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) { 1517; SSE2-LABEL: sitofp_16i8_to_4f32: 1518; SSE2: # %bb.0: 1519; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1520; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1521; SSE2-NEXT: psrad $24, %xmm0 1522; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 1523; SSE2-NEXT: retq 1524; 1525; SSE41-LABEL: sitofp_16i8_to_4f32: 1526; SSE41: # %bb.0: 1527; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 1528; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 1529; SSE41-NEXT: retq 1530; 1531; AVX-LABEL: sitofp_16i8_to_4f32: 1532; AVX: # %bb.0: 1533; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 1534; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 1535; AVX-NEXT: retq 1536 %cvt = sitofp <16 x i8> %a to <16 x float> 1537 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1538 ret <4 x float> %shuf 1539} 1540 1541define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) { 1542; SSE2-LABEL: sitofp_4i64_to_4f32: 1543; SSE2: # %bb.0: 1544; SSE2-NEXT: movq %xmm1, %rax 1545; SSE2-NEXT: cvtsi2ss %rax, %xmm2 1546; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1547; SSE2-NEXT: movq %xmm1, %rax 1548; SSE2-NEXT: xorps %xmm1, %xmm1 1549; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1550; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1551; SSE2-NEXT: movq %xmm0, %rax 1552; SSE2-NEXT: xorps %xmm1, %xmm1 1553; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1554; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1555; SSE2-NEXT: movq %xmm0, %rax 1556; SSE2-NEXT: xorps %xmm0, %xmm0 1557; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1558; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1559; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1560; SSE2-NEXT: movaps %xmm1, %xmm0 1561; SSE2-NEXT: retq 1562; 1563; SSE41-LABEL: sitofp_4i64_to_4f32: 1564; SSE41: # %bb.0: 1565; SSE41-NEXT: pextrq $1, %xmm0, %rax 1566; SSE41-NEXT: cvtsi2ss %rax, %xmm2 1567; SSE41-NEXT: movq %xmm0, %rax 1568; SSE41-NEXT: xorps %xmm0, %xmm0 1569; SSE41-NEXT: cvtsi2ss %rax, %xmm0 1570; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] 1571; SSE41-NEXT: movq %xmm1, %rax 1572; SSE41-NEXT: xorps %xmm2, %xmm2 1573; SSE41-NEXT: cvtsi2ss %rax, %xmm2 1574; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] 1575; SSE41-NEXT: pextrq $1, %xmm1, %rax 1576; SSE41-NEXT: xorps %xmm1, %xmm1 1577; SSE41-NEXT: cvtsi2ss %rax, %xmm1 1578; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 1579; SSE41-NEXT: retq 1580; 1581; AVX1-LABEL: sitofp_4i64_to_4f32: 1582; AVX1: # %bb.0: 1583; AVX1-NEXT: vpextrq $1, %xmm0, %rax 1584; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1585; AVX1-NEXT: vmovq %xmm0, %rax 1586; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 1587; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 1588; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1589; AVX1-NEXT: vmovq %xmm0, %rax 1590; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 1591; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 1592; AVX1-NEXT: vpextrq $1, %xmm0, %rax 1593; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 1594; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1595; AVX1-NEXT: vzeroupper 1596; AVX1-NEXT: retq 1597; 1598; AVX2-LABEL: sitofp_4i64_to_4f32: 1599; AVX2: # %bb.0: 1600; AVX2-NEXT: vpextrq $1, %xmm0, %rax 1601; AVX2-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1602; AVX2-NEXT: vmovq %xmm0, %rax 1603; AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 1604; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 1605; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1606; AVX2-NEXT: vmovq %xmm0, %rax 1607; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 1608; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 1609; AVX2-NEXT: vpextrq $1, %xmm0, %rax 1610; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 1611; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1612; AVX2-NEXT: vzeroupper 1613; AVX2-NEXT: retq 1614; 1615; AVX512F-LABEL: sitofp_4i64_to_4f32: 1616; AVX512F: # %bb.0: 1617; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 1618; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1619; AVX512F-NEXT: vmovq %xmm0, %rax 1620; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 1621; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 1622; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 1623; AVX512F-NEXT: vmovq %xmm0, %rax 1624; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 1625; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 1626; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 1627; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 1628; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1629; AVX512F-NEXT: vzeroupper 1630; AVX512F-NEXT: retq 1631; 1632; AVX512VL-LABEL: sitofp_4i64_to_4f32: 1633; AVX512VL: # %bb.0: 1634; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 1635; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 1636; AVX512VL-NEXT: vmovq %xmm0, %rax 1637; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 1638; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 1639; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 1640; AVX512VL-NEXT: vmovq %xmm0, %rax 1641; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 1642; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 1643; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 1644; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 1645; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1646; AVX512VL-NEXT: vzeroupper 1647; AVX512VL-NEXT: retq 1648; 1649; AVX512DQ-LABEL: sitofp_4i64_to_4f32: 1650; AVX512DQ: # %bb.0: 1651; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1652; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 1653; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1654; AVX512DQ-NEXT: vzeroupper 1655; AVX512DQ-NEXT: retq 1656; 1657; AVX512VLDQ-LABEL: sitofp_4i64_to_4f32: 1658; AVX512VLDQ: # %bb.0: 1659; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0 1660; AVX512VLDQ-NEXT: vzeroupper 1661; AVX512VLDQ-NEXT: retq 1662 %cvt = sitofp <4 x i64> %a to <4 x float> 1663 ret <4 x float> %cvt 1664} 1665 1666define <8 x float> @sitofp_8i32_to_8f32(<8 x i32> %a) { 1667; SSE-LABEL: sitofp_8i32_to_8f32: 1668; SSE: # %bb.0: 1669; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 1670; SSE-NEXT: cvtdq2ps %xmm1, %xmm1 1671; SSE-NEXT: retq 1672; 1673; AVX-LABEL: sitofp_8i32_to_8f32: 1674; AVX: # %bb.0: 1675; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 1676; AVX-NEXT: retq 1677 %cvt = sitofp <8 x i32> %a to <8 x float> 1678 ret <8 x float> %cvt 1679} 1680 1681define <8 x float> @sitofp_8i16_to_8f32(<8 x i16> %a) { 1682; SSE2-LABEL: sitofp_8i16_to_8f32: 1683; SSE2: # %bb.0: 1684; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1685; SSE2-NEXT: psrad $16, %xmm1 1686; SSE2-NEXT: cvtdq2ps %xmm1, %xmm2 1687; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 1688; SSE2-NEXT: psrad $16, %xmm0 1689; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1 1690; SSE2-NEXT: movaps %xmm2, %xmm0 1691; SSE2-NEXT: retq 1692; 1693; SSE41-LABEL: sitofp_8i16_to_8f32: 1694; SSE41: # %bb.0: 1695; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 1696; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2 1697; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1698; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 1699; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1 1700; SSE41-NEXT: movaps %xmm2, %xmm0 1701; SSE41-NEXT: retq 1702; 1703; AVX1-LABEL: sitofp_8i16_to_8f32: 1704; AVX1: # %bb.0: 1705; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 1706; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1707; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 1708; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1709; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1710; AVX1-NEXT: retq 1711; 1712; AVX2-LABEL: sitofp_8i16_to_8f32: 1713; AVX2: # %bb.0: 1714; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 1715; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1716; AVX2-NEXT: retq 1717; 1718; AVX512-LABEL: sitofp_8i16_to_8f32: 1719; AVX512: # %bb.0: 1720; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 1721; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 1722; AVX512-NEXT: retq 1723 %cvt = sitofp <8 x i16> %a to <8 x float> 1724 ret <8 x float> %cvt 1725} 1726 1727define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) { 1728; SSE2-LABEL: sitofp_8i8_to_8f32: 1729; SSE2: # %bb.0: 1730; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1731; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1732; SSE2-NEXT: psrad $24, %xmm0 1733; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 1734; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 1735; SSE2-NEXT: psrad $24, %xmm1 1736; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 1737; SSE2-NEXT: retq 1738; 1739; SSE41-LABEL: sitofp_8i8_to_8f32: 1740; SSE41: # %bb.0: 1741; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 1742; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2 1743; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1744; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 1745; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1 1746; SSE41-NEXT: movaps %xmm2, %xmm0 1747; SSE41-NEXT: retq 1748; 1749; AVX1-LABEL: sitofp_8i8_to_8f32: 1750; AVX1: # %bb.0: 1751; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 1752; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1753; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 1754; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1755; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1756; AVX1-NEXT: retq 1757; 1758; AVX2-LABEL: sitofp_8i8_to_8f32: 1759; AVX2: # %bb.0: 1760; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 1761; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1762; AVX2-NEXT: retq 1763; 1764; AVX512-LABEL: sitofp_8i8_to_8f32: 1765; AVX512: # %bb.0: 1766; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0 1767; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 1768; AVX512-NEXT: retq 1769 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1770 %cvt = sitofp <8 x i8> %shuf to <8 x float> 1771 ret <8 x float> %cvt 1772} 1773 1774define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) { 1775; SSE2-LABEL: sitofp_16i8_to_8f32: 1776; SSE2: # %bb.0: 1777; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1778; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1779; SSE2-NEXT: psrad $24, %xmm0 1780; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 1781; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 1782; SSE2-NEXT: psrad $24, %xmm1 1783; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 1784; SSE2-NEXT: retq 1785; 1786; SSE41-LABEL: sitofp_16i8_to_8f32: 1787; SSE41: # %bb.0: 1788; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 1789; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2 1790; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1791; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 1792; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1 1793; SSE41-NEXT: movaps %xmm2, %xmm0 1794; SSE41-NEXT: retq 1795; 1796; AVX1-LABEL: sitofp_16i8_to_8f32: 1797; AVX1: # %bb.0: 1798; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 1799; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 1800; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 1801; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1802; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1803; AVX1-NEXT: retq 1804; 1805; AVX2-LABEL: sitofp_16i8_to_8f32: 1806; AVX2: # %bb.0: 1807; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 1808; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1809; AVX2-NEXT: retq 1810; 1811; AVX512-LABEL: sitofp_16i8_to_8f32: 1812; AVX512: # %bb.0: 1813; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 1814; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0 1815; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1816; AVX512-NEXT: retq 1817 %cvt = sitofp <16 x i8> %a to <16 x float> 1818 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1819 ret <8 x float> %shuf 1820} 1821 1822; 1823; Unsigned Integer to Float 1824; 1825 1826define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { 1827; SSE2-LABEL: uitofp_2i64_to_4f32: 1828; SSE2: # %bb.0: 1829; SSE2-NEXT: movdqa %xmm0, %xmm1 1830; SSE2-NEXT: movq %xmm0, %rax 1831; SSE2-NEXT: testq %rax, %rax 1832; SSE2-NEXT: js .LBB41_1 1833; SSE2-NEXT: # %bb.2: 1834; SSE2-NEXT: xorps %xmm0, %xmm0 1835; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1836; SSE2-NEXT: jmp .LBB41_3 1837; SSE2-NEXT: .LBB41_1: 1838; SSE2-NEXT: movq %rax, %rcx 1839; SSE2-NEXT: shrq %rcx 1840; SSE2-NEXT: andl $1, %eax 1841; SSE2-NEXT: orq %rcx, %rax 1842; SSE2-NEXT: xorps %xmm0, %xmm0 1843; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1844; SSE2-NEXT: addss %xmm0, %xmm0 1845; SSE2-NEXT: .LBB41_3: 1846; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 1847; SSE2-NEXT: movq %xmm1, %rax 1848; SSE2-NEXT: testq %rax, %rax 1849; SSE2-NEXT: js .LBB41_4 1850; SSE2-NEXT: # %bb.5: 1851; SSE2-NEXT: xorps %xmm1, %xmm1 1852; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1853; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1854; SSE2-NEXT: retq 1855; SSE2-NEXT: .LBB41_4: 1856; SSE2-NEXT: movq %rax, %rcx 1857; SSE2-NEXT: shrq %rcx 1858; SSE2-NEXT: andl $1, %eax 1859; SSE2-NEXT: orq %rcx, %rax 1860; SSE2-NEXT: xorps %xmm1, %xmm1 1861; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1862; SSE2-NEXT: addss %xmm1, %xmm1 1863; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1864; SSE2-NEXT: retq 1865; 1866; SSE41-LABEL: uitofp_2i64_to_4f32: 1867; SSE41: # %bb.0: 1868; SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [1,1] 1869; SSE41-NEXT: pand %xmm0, %xmm1 1870; SSE41-NEXT: movdqa %xmm0, %xmm2 1871; SSE41-NEXT: psrlq $1, %xmm2 1872; SSE41-NEXT: por %xmm1, %xmm2 1873; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 1874; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm0 1875; SSE41-NEXT: pextrq $1, %xmm0, %rax 1876; SSE41-NEXT: cvtsi2ss %rax, %xmm3 1877; SSE41-NEXT: movq %xmm0, %rax 1878; SSE41-NEXT: xorps %xmm2, %xmm2 1879; SSE41-NEXT: cvtsi2ss %rax, %xmm2 1880; SSE41-NEXT: insertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],zero,zero 1881; SSE41-NEXT: movaps %xmm2, %xmm3 1882; SSE41-NEXT: addps %xmm2, %xmm3 1883; SSE41-NEXT: movdqa %xmm1, %xmm0 1884; SSE41-NEXT: blendvps %xmm0, %xmm3, %xmm2 1885; SSE41-NEXT: movaps %xmm2, %xmm0 1886; SSE41-NEXT: retq 1887; 1888; VEX-LABEL: uitofp_2i64_to_4f32: 1889; VEX: # %bb.0: 1890; VEX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1891; VEX-NEXT: vpsrlq $1, %xmm0, %xmm2 1892; VEX-NEXT: vpor %xmm1, %xmm2, %xmm1 1893; VEX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 1894; VEX-NEXT: vpextrq $1, %xmm1, %rax 1895; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 1896; VEX-NEXT: vmovq %xmm1, %rax 1897; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 1898; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero 1899; VEX-NEXT: vaddps %xmm1, %xmm1, %xmm2 1900; VEX-NEXT: vpxor %xmm3, %xmm3, %xmm3 1901; VEX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 1902; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 1903; VEX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 1904; VEX-NEXT: retq 1905; 1906; AVX512F-LABEL: uitofp_2i64_to_4f32: 1907; AVX512F: # %bb.0: 1908; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 1909; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 1910; AVX512F-NEXT: vmovq %xmm0, %rax 1911; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 1912; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1913; AVX512F-NEXT: retq 1914; 1915; AVX512VL-LABEL: uitofp_2i64_to_4f32: 1916; AVX512VL: # %bb.0: 1917; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 1918; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 1919; AVX512VL-NEXT: vmovq %xmm0, %rax 1920; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 1921; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 1922; AVX512VL-NEXT: retq 1923; 1924; AVX512DQ-LABEL: uitofp_2i64_to_4f32: 1925; AVX512DQ: # %bb.0: 1926; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1927; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 1928; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1929; AVX512DQ-NEXT: vzeroupper 1930; AVX512DQ-NEXT: retq 1931; 1932; AVX512VLDQ-LABEL: uitofp_2i64_to_4f32: 1933; AVX512VLDQ: # %bb.0: 1934; AVX512VLDQ-NEXT: vcvtuqq2ps %xmm0, %xmm0 1935; AVX512VLDQ-NEXT: retq 1936 %cvt = uitofp <2 x i64> %a to <2 x float> 1937 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1938 ret <4 x float> %ext 1939} 1940 1941define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) { 1942; SSE2-LABEL: uitofp_2i64_to_2f32: 1943; SSE2: # %bb.0: 1944; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1945; SSE2-NEXT: movq %xmm1, %rax 1946; SSE2-NEXT: testq %rax, %rax 1947; SSE2-NEXT: js .LBB42_1 1948; SSE2-NEXT: # %bb.2: 1949; SSE2-NEXT: xorps %xmm1, %xmm1 1950; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1951; SSE2-NEXT: jmp .LBB42_3 1952; SSE2-NEXT: .LBB42_1: 1953; SSE2-NEXT: movq %rax, %rcx 1954; SSE2-NEXT: shrq %rcx 1955; SSE2-NEXT: andl $1, %eax 1956; SSE2-NEXT: orq %rcx, %rax 1957; SSE2-NEXT: xorps %xmm1, %xmm1 1958; SSE2-NEXT: cvtsi2ss %rax, %xmm1 1959; SSE2-NEXT: addss %xmm1, %xmm1 1960; SSE2-NEXT: .LBB42_3: 1961; SSE2-NEXT: movq %xmm0, %rax 1962; SSE2-NEXT: testq %rax, %rax 1963; SSE2-NEXT: js .LBB42_4 1964; SSE2-NEXT: # %bb.5: 1965; SSE2-NEXT: xorps %xmm0, %xmm0 1966; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1967; SSE2-NEXT: jmp .LBB42_6 1968; SSE2-NEXT: .LBB42_4: 1969; SSE2-NEXT: movq %rax, %rcx 1970; SSE2-NEXT: shrq %rcx 1971; SSE2-NEXT: andl $1, %eax 1972; SSE2-NEXT: orq %rcx, %rax 1973; SSE2-NEXT: xorps %xmm0, %xmm0 1974; SSE2-NEXT: cvtsi2ss %rax, %xmm0 1975; SSE2-NEXT: addss %xmm0, %xmm0 1976; SSE2-NEXT: .LBB42_6: 1977; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1978; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1979; SSE2-NEXT: retq 1980; 1981; SSE41-LABEL: uitofp_2i64_to_2f32: 1982; SSE41: # %bb.0: 1983; SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [1,1] 1984; SSE41-NEXT: pand %xmm0, %xmm1 1985; SSE41-NEXT: movdqa %xmm0, %xmm2 1986; SSE41-NEXT: psrlq $1, %xmm2 1987; SSE41-NEXT: por %xmm1, %xmm2 1988; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3] 1989; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm0 1990; SSE41-NEXT: pextrq $1, %xmm0, %rax 1991; SSE41-NEXT: xorps %xmm2, %xmm2 1992; SSE41-NEXT: cvtsi2ss %rax, %xmm2 1993; SSE41-NEXT: movq %xmm0, %rax 1994; SSE41-NEXT: cvtsi2ss %rax, %xmm3 1995; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0],xmm2[0],zero,zero 1996; SSE41-NEXT: movaps %xmm3, %xmm2 1997; SSE41-NEXT: addps %xmm3, %xmm2 1998; SSE41-NEXT: movdqa %xmm1, %xmm0 1999; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3 2000; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm3[0],zero 2001; SSE41-NEXT: retq 2002; 2003; VEX-LABEL: uitofp_2i64_to_2f32: 2004; VEX: # %bb.0: 2005; VEX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2006; VEX-NEXT: vpsrlq $1, %xmm0, %xmm2 2007; VEX-NEXT: vpor %xmm1, %xmm2, %xmm1 2008; VEX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 2009; VEX-NEXT: vpextrq $1, %xmm1, %rax 2010; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 2011; VEX-NEXT: vmovq %xmm1, %rax 2012; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 2013; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero 2014; VEX-NEXT: vaddps %xmm1, %xmm1, %xmm2 2015; VEX-NEXT: vpxor %xmm3, %xmm3, %xmm3 2016; VEX-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 2017; VEX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 2018; VEX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 2019; VEX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2020; VEX-NEXT: retq 2021; 2022; AVX512F-LABEL: uitofp_2i64_to_2f32: 2023; AVX512F: # %bb.0: 2024; AVX512F-NEXT: vmovq %xmm0, %rax 2025; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 2026; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 2027; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 2028; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero 2029; AVX512F-NEXT: retq 2030; 2031; AVX512VL-LABEL: uitofp_2i64_to_2f32: 2032; AVX512VL: # %bb.0: 2033; AVX512VL-NEXT: vmovq %xmm0, %rax 2034; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 2035; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 2036; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 2037; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero 2038; AVX512VL-NEXT: retq 2039; 2040; AVX512DQ-LABEL: uitofp_2i64_to_2f32: 2041; AVX512DQ: # %bb.0: 2042; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2043; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 2044; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2045; AVX512DQ-NEXT: vzeroupper 2046; AVX512DQ-NEXT: retq 2047; 2048; AVX512VLDQ-LABEL: uitofp_2i64_to_2f32: 2049; AVX512VLDQ: # %bb.0: 2050; AVX512VLDQ-NEXT: vcvtuqq2ps %xmm0, %xmm0 2051; AVX512VLDQ-NEXT: retq 2052 %cvt = uitofp <2 x i64> %a to <2 x float> 2053 %ext = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2054 ret <4 x float> %ext 2055} 2056 2057define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { 2058; SSE2-LABEL: uitofp_4i64_to_4f32_undef: 2059; SSE2: # %bb.0: 2060; SSE2-NEXT: movq %xmm0, %rax 2061; SSE2-NEXT: testq %rax, %rax 2062; SSE2-NEXT: js .LBB43_1 2063; SSE2-NEXT: # %bb.2: 2064; SSE2-NEXT: cvtsi2ss %rax, %xmm1 2065; SSE2-NEXT: jmp .LBB43_3 2066; SSE2-NEXT: .LBB43_1: 2067; SSE2-NEXT: movq %rax, %rcx 2068; SSE2-NEXT: shrq %rcx 2069; SSE2-NEXT: andl $1, %eax 2070; SSE2-NEXT: orq %rcx, %rax 2071; SSE2-NEXT: cvtsi2ss %rax, %xmm1 2072; SSE2-NEXT: addss %xmm1, %xmm1 2073; SSE2-NEXT: .LBB43_3: 2074; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 2075; SSE2-NEXT: movq %xmm0, %rax 2076; SSE2-NEXT: testq %rax, %rax 2077; SSE2-NEXT: js .LBB43_4 2078; SSE2-NEXT: # %bb.5: 2079; SSE2-NEXT: xorps %xmm0, %xmm0 2080; SSE2-NEXT: cvtsi2ss %rax, %xmm0 2081; SSE2-NEXT: jmp .LBB43_6 2082; SSE2-NEXT: .LBB43_4: 2083; SSE2-NEXT: movq %rax, %rcx 2084; SSE2-NEXT: shrq %rcx 2085; SSE2-NEXT: andl $1, %eax 2086; SSE2-NEXT: orq %rcx, %rax 2087; SSE2-NEXT: xorps %xmm0, %xmm0 2088; SSE2-NEXT: cvtsi2ss %rax, %xmm0 2089; SSE2-NEXT: addss %xmm0, %xmm0 2090; SSE2-NEXT: .LBB43_6: 2091; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2092; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero 2093; SSE2-NEXT: retq 2094; 2095; SSE41-LABEL: uitofp_4i64_to_4f32_undef: 2096; SSE41: # %bb.0: 2097; SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [1,1] 2098; SSE41-NEXT: pand %xmm0, %xmm1 2099; SSE41-NEXT: movdqa %xmm0, %xmm2 2100; SSE41-NEXT: psrlq $1, %xmm2 2101; SSE41-NEXT: por %xmm1, %xmm2 2102; SSE41-NEXT: movdqa %xmm0, %xmm1 2103; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 2104; SSE41-NEXT: pextrq $1, %xmm1, %rax 2105; SSE41-NEXT: xorps %xmm2, %xmm2 2106; SSE41-NEXT: cvtsi2ss %rax, %xmm2 2107; SSE41-NEXT: movq %xmm1, %rax 2108; SSE41-NEXT: xorps %xmm1, %xmm1 2109; SSE41-NEXT: cvtsi2ss %rax, %xmm1 2110; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero 2111; SSE41-NEXT: movaps %xmm1, %xmm2 2112; SSE41-NEXT: addps %xmm1, %xmm2 2113; SSE41-NEXT: xorps %xmm3, %xmm3 2114; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm3[2,3] 2115; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 2116; SSE41-NEXT: movaps %xmm1, %xmm0 2117; SSE41-NEXT: retq 2118; 2119; AVX1-LABEL: uitofp_4i64_to_4f32_undef: 2120; AVX1: # %bb.0: 2121; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2122; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 2123; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm2 2124; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1 2125; AVX1-NEXT: vmovaps %xmm0, %xmm2 2126; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm1 2127; AVX1-NEXT: vpextrq $1, %xmm1, %rax 2128; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 2129; AVX1-NEXT: vmovq %xmm1, %rax 2130; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 2131; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 2132; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 2133; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 2134; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] 2135; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2 2136; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 2137; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 2138; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 2139; AVX1-NEXT: vzeroupper 2140; AVX1-NEXT: retq 2141; 2142; AVX2-LABEL: uitofp_4i64_to_4f32_undef: 2143; AVX2: # %bb.0: 2144; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2145; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] 2146; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm1 2147; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm2 2148; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 2149; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1 2150; AVX2-NEXT: vpextrq $1, %xmm1, %rax 2151; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 2152; AVX2-NEXT: vmovq %xmm1, %rax 2153; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3 2154; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2155; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 2156; AVX2-NEXT: vmovq %xmm1, %rax 2157; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 2158; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 2159; AVX2-NEXT: vpextrq $1, %xmm1, %rax 2160; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1 2161; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0] 2162; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2 2163; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2164; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 2165; AVX2-NEXT: vzeroupper 2166; AVX2-NEXT: retq 2167; 2168; AVX512F-LABEL: uitofp_4i64_to_4f32_undef: 2169; AVX512F: # %bb.0: 2170; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 2171; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 2172; AVX512F-NEXT: vmovq %xmm0, %rax 2173; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 2174; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 2175; AVX512F-NEXT: retq 2176; 2177; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef: 2178; AVX512VL: # %bb.0: 2179; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 2180; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 2181; AVX512VL-NEXT: vmovq %xmm0, %rax 2182; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 2183; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero 2184; AVX512VL-NEXT: retq 2185; 2186; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef: 2187; AVX512DQ: # %bb.0: 2188; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2189; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 2190; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2191; AVX512DQ-NEXT: vzeroupper 2192; AVX512DQ-NEXT: retq 2193; 2194; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32_undef: 2195; AVX512VLDQ: # %bb.0: 2196; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2197; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0 2198; AVX512VLDQ-NEXT: vzeroupper 2199; AVX512VLDQ-NEXT: retq 2200 %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2201 %cvt = uitofp <4 x i64> %ext to <4 x float> 2202 ret <4 x float> %cvt 2203} 2204 2205define <4 x float> @uitofp_4i32_to_4f32(<4 x i32> %a) { 2206; SSE2-LABEL: uitofp_4i32_to_4f32: 2207; SSE2: # %bb.0: 2208; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] 2209; SSE2-NEXT: pand %xmm0, %xmm1 2210; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 2211; SSE2-NEXT: psrld $16, %xmm0 2212; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2213; SSE2-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2214; SSE2-NEXT: addps %xmm1, %xmm0 2215; SSE2-NEXT: retq 2216; 2217; SSE41-LABEL: uitofp_4i32_to_4f32: 2218; SSE41: # %bb.0: 2219; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200] 2220; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 2221; SSE41-NEXT: psrld $16, %xmm0 2222; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 2223; SSE41-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 2224; SSE41-NEXT: addps %xmm1, %xmm0 2225; SSE41-NEXT: retq 2226; 2227; AVX1-LABEL: uitofp_4i32_to_4f32: 2228; AVX1: # %bb.0: 2229; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 2230; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 2231; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 2232; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2233; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0 2234; AVX1-NEXT: retq 2235; 2236; AVX2-LABEL: uitofp_4i32_to_4f32: 2237; AVX2: # %bb.0: 2238; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200] 2239; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 2240; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0 2241; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1392508928,1392508928,1392508928,1392508928] 2242; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 2243; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 2244; AVX2-NEXT: vsubps %xmm2, %xmm0, %xmm0 2245; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0 2246; AVX2-NEXT: retq 2247; 2248; AVX512F-LABEL: uitofp_4i32_to_4f32: 2249; AVX512F: # %bb.0: 2250; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2251; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 2252; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2253; AVX512F-NEXT: vzeroupper 2254; AVX512F-NEXT: retq 2255; 2256; AVX512VL-LABEL: uitofp_4i32_to_4f32: 2257; AVX512VL: # %bb.0: 2258; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0 2259; AVX512VL-NEXT: retq 2260; 2261; AVX512DQ-LABEL: uitofp_4i32_to_4f32: 2262; AVX512DQ: # %bb.0: 2263; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2264; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 2265; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2266; AVX512DQ-NEXT: vzeroupper 2267; AVX512DQ-NEXT: retq 2268; 2269; AVX512VLDQ-LABEL: uitofp_4i32_to_4f32: 2270; AVX512VLDQ: # %bb.0: 2271; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0 2272; AVX512VLDQ-NEXT: retq 2273 %cvt = uitofp <4 x i32> %a to <4 x float> 2274 ret <4 x float> %cvt 2275} 2276 2277define <4 x float> @uitofp_4i16_to_4f32(<8 x i16> %a) { 2278; SSE2-LABEL: uitofp_4i16_to_4f32: 2279; SSE2: # %bb.0: 2280; SSE2-NEXT: pxor %xmm1, %xmm1 2281; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2282; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 2283; SSE2-NEXT: retq 2284; 2285; SSE41-LABEL: uitofp_4i16_to_4f32: 2286; SSE41: # %bb.0: 2287; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2288; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 2289; SSE41-NEXT: retq 2290; 2291; AVX-LABEL: uitofp_4i16_to_4f32: 2292; AVX: # %bb.0: 2293; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2294; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 2295; AVX-NEXT: retq 2296 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2297 %cvt = uitofp <4 x i16> %shuf to <4 x float> 2298 ret <4 x float> %cvt 2299} 2300 2301define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) { 2302; SSE2-LABEL: uitofp_8i16_to_4f32: 2303; SSE2: # %bb.0: 2304; SSE2-NEXT: pxor %xmm1, %xmm1 2305; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2306; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 2307; SSE2-NEXT: retq 2308; 2309; SSE41-LABEL: uitofp_8i16_to_4f32: 2310; SSE41: # %bb.0: 2311; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2312; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 2313; SSE41-NEXT: retq 2314; 2315; AVX-LABEL: uitofp_8i16_to_4f32: 2316; AVX: # %bb.0: 2317; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2318; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 2319; AVX-NEXT: retq 2320 %cvt = uitofp <8 x i16> %a to <8 x float> 2321 %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2322 ret <4 x float> %shuf 2323} 2324 2325define <4 x float> @uitofp_4i8_to_4f32(<16 x i8> %a) { 2326; SSE2-LABEL: uitofp_4i8_to_4f32: 2327; SSE2: # %bb.0: 2328; SSE2-NEXT: pxor %xmm1, %xmm1 2329; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2330; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2331; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 2332; SSE2-NEXT: retq 2333; 2334; SSE41-LABEL: uitofp_4i8_to_4f32: 2335; SSE41: # %bb.0: 2336; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2337; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 2338; SSE41-NEXT: retq 2339; 2340; AVX-LABEL: uitofp_4i8_to_4f32: 2341; AVX: # %bb.0: 2342; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2343; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 2344; AVX-NEXT: retq 2345 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2346 %cvt = uitofp <4 x i8> %shuf to <4 x float> 2347 ret <4 x float> %cvt 2348} 2349 2350define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) { 2351; SSE2-LABEL: uitofp_16i8_to_4f32: 2352; SSE2: # %bb.0: 2353; SSE2-NEXT: pxor %xmm1, %xmm1 2354; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2355; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2356; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 2357; SSE2-NEXT: retq 2358; 2359; SSE41-LABEL: uitofp_16i8_to_4f32: 2360; SSE41: # %bb.0: 2361; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2362; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 2363; SSE41-NEXT: retq 2364; 2365; AVX-LABEL: uitofp_16i8_to_4f32: 2366; AVX: # %bb.0: 2367; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2368; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 2369; AVX-NEXT: retq 2370 %cvt = uitofp <16 x i8> %a to <16 x float> 2371 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2372 ret <4 x float> %shuf 2373} 2374 2375define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { 2376; SSE2-LABEL: uitofp_4i64_to_4f32: 2377; SSE2: # %bb.0: 2378; SSE2-NEXT: movq %xmm1, %rax 2379; SSE2-NEXT: testq %rax, %rax 2380; SSE2-NEXT: js .LBB49_1 2381; SSE2-NEXT: # %bb.2: 2382; SSE2-NEXT: cvtsi2ss %rax, %xmm2 2383; SSE2-NEXT: jmp .LBB49_3 2384; SSE2-NEXT: .LBB49_1: 2385; SSE2-NEXT: movq %rax, %rcx 2386; SSE2-NEXT: shrq %rcx 2387; SSE2-NEXT: andl $1, %eax 2388; SSE2-NEXT: orq %rcx, %rax 2389; SSE2-NEXT: cvtsi2ss %rax, %xmm2 2390; SSE2-NEXT: addss %xmm2, %xmm2 2391; SSE2-NEXT: .LBB49_3: 2392; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 2393; SSE2-NEXT: movq %xmm1, %rax 2394; SSE2-NEXT: testq %rax, %rax 2395; SSE2-NEXT: js .LBB49_4 2396; SSE2-NEXT: # %bb.5: 2397; SSE2-NEXT: cvtsi2ss %rax, %xmm3 2398; SSE2-NEXT: jmp .LBB49_6 2399; SSE2-NEXT: .LBB49_4: 2400; SSE2-NEXT: movq %rax, %rcx 2401; SSE2-NEXT: shrq %rcx 2402; SSE2-NEXT: andl $1, %eax 2403; SSE2-NEXT: orq %rcx, %rax 2404; SSE2-NEXT: cvtsi2ss %rax, %xmm3 2405; SSE2-NEXT: addss %xmm3, %xmm3 2406; SSE2-NEXT: .LBB49_6: 2407; SSE2-NEXT: movq %xmm0, %rax 2408; SSE2-NEXT: testq %rax, %rax 2409; SSE2-NEXT: js .LBB49_7 2410; SSE2-NEXT: # %bb.8: 2411; SSE2-NEXT: xorps %xmm1, %xmm1 2412; SSE2-NEXT: cvtsi2ss %rax, %xmm1 2413; SSE2-NEXT: jmp .LBB49_9 2414; SSE2-NEXT: .LBB49_7: 2415; SSE2-NEXT: movq %rax, %rcx 2416; SSE2-NEXT: shrq %rcx 2417; SSE2-NEXT: andl $1, %eax 2418; SSE2-NEXT: orq %rcx, %rax 2419; SSE2-NEXT: xorps %xmm1, %xmm1 2420; SSE2-NEXT: cvtsi2ss %rax, %xmm1 2421; SSE2-NEXT: addss %xmm1, %xmm1 2422; SSE2-NEXT: .LBB49_9: 2423; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 2424; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 2425; SSE2-NEXT: movq %xmm0, %rax 2426; SSE2-NEXT: testq %rax, %rax 2427; SSE2-NEXT: js .LBB49_10 2428; SSE2-NEXT: # %bb.11: 2429; SSE2-NEXT: xorps %xmm0, %xmm0 2430; SSE2-NEXT: cvtsi2ss %rax, %xmm0 2431; SSE2-NEXT: jmp .LBB49_12 2432; SSE2-NEXT: .LBB49_10: 2433; SSE2-NEXT: movq %rax, %rcx 2434; SSE2-NEXT: shrq %rcx 2435; SSE2-NEXT: andl $1, %eax 2436; SSE2-NEXT: orq %rcx, %rax 2437; SSE2-NEXT: xorps %xmm0, %xmm0 2438; SSE2-NEXT: cvtsi2ss %rax, %xmm0 2439; SSE2-NEXT: addss %xmm0, %xmm0 2440; SSE2-NEXT: .LBB49_12: 2441; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2442; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 2443; SSE2-NEXT: movaps %xmm1, %xmm0 2444; SSE2-NEXT: retq 2445; 2446; SSE41-LABEL: uitofp_4i64_to_4f32: 2447; SSE41: # %bb.0: 2448; SSE41-NEXT: movdqa %xmm1, %xmm2 2449; SSE41-NEXT: movdqa %xmm0, %xmm1 2450; SSE41-NEXT: pmovsxbq {{.*#+}} xmm4 = [1,1] 2451; SSE41-NEXT: pand %xmm4, %xmm0 2452; SSE41-NEXT: movdqa %xmm1, %xmm3 2453; SSE41-NEXT: psrlq $1, %xmm3 2454; SSE41-NEXT: por %xmm0, %xmm3 2455; SSE41-NEXT: movdqa %xmm1, %xmm5 2456; SSE41-NEXT: movdqa %xmm1, %xmm0 2457; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm5 2458; SSE41-NEXT: pextrq $1, %xmm5, %rax 2459; SSE41-NEXT: xorps %xmm0, %xmm0 2460; SSE41-NEXT: cvtsi2ss %rax, %xmm0 2461; SSE41-NEXT: movq %xmm5, %rax 2462; SSE41-NEXT: xorps %xmm3, %xmm3 2463; SSE41-NEXT: cvtsi2ss %rax, %xmm3 2464; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[2,3] 2465; SSE41-NEXT: pand %xmm2, %xmm4 2466; SSE41-NEXT: movdqa %xmm2, %xmm5 2467; SSE41-NEXT: psrlq $1, %xmm5 2468; SSE41-NEXT: por %xmm4, %xmm5 2469; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 2470; SSE41-NEXT: movaps %xmm2, %xmm0 2471; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm2 2472; SSE41-NEXT: movq %xmm2, %rax 2473; SSE41-NEXT: xorps %xmm0, %xmm0 2474; SSE41-NEXT: cvtsi2ss %rax, %xmm0 2475; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1],xmm0[0],xmm3[3] 2476; SSE41-NEXT: pextrq $1, %xmm2, %rax 2477; SSE41-NEXT: xorps %xmm0, %xmm0 2478; SSE41-NEXT: cvtsi2ss %rax, %xmm0 2479; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm0[0] 2480; SSE41-NEXT: movaps %xmm3, %xmm2 2481; SSE41-NEXT: addps %xmm3, %xmm2 2482; SSE41-NEXT: movaps %xmm1, %xmm0 2483; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3 2484; SSE41-NEXT: movaps %xmm3, %xmm0 2485; SSE41-NEXT: retq 2486; 2487; AVX1-LABEL: uitofp_4i64_to_4f32: 2488; AVX1: # %bb.0: 2489; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm1 2490; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2491; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm3 2492; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2493; AVX1-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 2494; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1 2495; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1 2496; AVX1-NEXT: vpextrq $1, %xmm1, %rax 2497; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 2498; AVX1-NEXT: vmovq %xmm1, %rax 2499; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4 2500; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 2501; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 2502; AVX1-NEXT: vmovq %xmm1, %rax 2503; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 2504; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 2505; AVX1-NEXT: vpextrq $1, %xmm1, %rax 2506; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1 2507; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] 2508; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3 2509; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 2510; AVX1-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0 2511; AVX1-NEXT: vzeroupper 2512; AVX1-NEXT: retq 2513; 2514; AVX2-LABEL: uitofp_4i64_to_4f32: 2515; AVX2: # %bb.0: 2516; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] 2517; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm1 2518; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm2 2519; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 2520; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1 2521; AVX2-NEXT: vpextrq $1, %xmm1, %rax 2522; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 2523; AVX2-NEXT: vmovq %xmm1, %rax 2524; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3 2525; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 2526; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 2527; AVX2-NEXT: vmovq %xmm1, %rax 2528; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 2529; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 2530; AVX2-NEXT: vpextrq $1, %xmm1, %rax 2531; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1 2532; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0] 2533; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2 2534; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 2535; AVX2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0 2536; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 2537; AVX2-NEXT: vzeroupper 2538; AVX2-NEXT: retq 2539; 2540; AVX512F-LABEL: uitofp_4i64_to_4f32: 2541; AVX512F: # %bb.0: 2542; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 2543; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 2544; AVX512F-NEXT: vmovq %xmm0, %rax 2545; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 2546; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 2547; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 2548; AVX512F-NEXT: vmovq %xmm0, %rax 2549; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2 2550; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 2551; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 2552; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0 2553; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 2554; AVX512F-NEXT: vzeroupper 2555; AVX512F-NEXT: retq 2556; 2557; AVX512VL-LABEL: uitofp_4i64_to_4f32: 2558; AVX512VL: # %bb.0: 2559; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 2560; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 2561; AVX512VL-NEXT: vmovq %xmm0, %rax 2562; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 2563; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 2564; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 2565; AVX512VL-NEXT: vmovq %xmm0, %rax 2566; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2 2567; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 2568; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 2569; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0 2570; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 2571; AVX512VL-NEXT: vzeroupper 2572; AVX512VL-NEXT: retq 2573; 2574; AVX512DQ-LABEL: uitofp_4i64_to_4f32: 2575; AVX512DQ: # %bb.0: 2576; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2577; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 2578; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2579; AVX512DQ-NEXT: vzeroupper 2580; AVX512DQ-NEXT: retq 2581; 2582; AVX512VLDQ-LABEL: uitofp_4i64_to_4f32: 2583; AVX512VLDQ: # %bb.0: 2584; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0 2585; AVX512VLDQ-NEXT: vzeroupper 2586; AVX512VLDQ-NEXT: retq 2587 %cvt = uitofp <4 x i64> %a to <4 x float> 2588 ret <4 x float> %cvt 2589} 2590 2591define <8 x float> @uitofp_8i32_to_8f32(<8 x i32> %a) { 2592; SSE2-LABEL: uitofp_8i32_to_8f32: 2593; SSE2: # %bb.0: 2594; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535] 2595; SSE2-NEXT: movdqa %xmm0, %xmm3 2596; SSE2-NEXT: pand %xmm2, %xmm3 2597; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200] 2598; SSE2-NEXT: por %xmm4, %xmm3 2599; SSE2-NEXT: psrld $16, %xmm0 2600; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928] 2601; SSE2-NEXT: por %xmm5, %xmm0 2602; SSE2-NEXT: movaps {{.*#+}} xmm6 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 2603; SSE2-NEXT: subps %xmm6, %xmm0 2604; SSE2-NEXT: addps %xmm3, %xmm0 2605; SSE2-NEXT: pand %xmm1, %xmm2 2606; SSE2-NEXT: por %xmm4, %xmm2 2607; SSE2-NEXT: psrld $16, %xmm1 2608; SSE2-NEXT: por %xmm5, %xmm1 2609; SSE2-NEXT: subps %xmm6, %xmm1 2610; SSE2-NEXT: addps %xmm2, %xmm1 2611; SSE2-NEXT: retq 2612; 2613; SSE41-LABEL: uitofp_8i32_to_8f32: 2614; SSE41: # %bb.0: 2615; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1258291200,1258291200,1258291200,1258291200] 2616; SSE41-NEXT: movdqa %xmm0, %xmm3 2617; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7] 2618; SSE41-NEXT: psrld $16, %xmm0 2619; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [1392508928,1392508928,1392508928,1392508928] 2620; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7] 2621; SSE41-NEXT: movaps {{.*#+}} xmm5 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 2622; SSE41-NEXT: subps %xmm5, %xmm0 2623; SSE41-NEXT: addps %xmm3, %xmm0 2624; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 2625; SSE41-NEXT: psrld $16, %xmm1 2626; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7] 2627; SSE41-NEXT: subps %xmm5, %xmm1 2628; SSE41-NEXT: addps %xmm2, %xmm1 2629; SSE41-NEXT: retq 2630; 2631; AVX1-LABEL: uitofp_8i32_to_8f32: 2632; AVX1: # %bb.0: 2633; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 2634; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2635; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 2636; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 2637; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 2638; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 2639; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 2640; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 2641; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0 2642; AVX1-NEXT: retq 2643; 2644; AVX2-LABEL: uitofp_8i32_to_8f32: 2645; AVX2: # %bb.0: 2646; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200] 2647; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 2648; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 2649; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928] 2650; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] 2651; AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 2652; AVX2-NEXT: vsubps %ymm2, %ymm0, %ymm0 2653; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0 2654; AVX2-NEXT: retq 2655; 2656; AVX512F-LABEL: uitofp_8i32_to_8f32: 2657; AVX512F: # %bb.0: 2658; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2659; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 2660; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2661; AVX512F-NEXT: retq 2662; 2663; AVX512VL-LABEL: uitofp_8i32_to_8f32: 2664; AVX512VL: # %bb.0: 2665; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0 2666; AVX512VL-NEXT: retq 2667; 2668; AVX512DQ-LABEL: uitofp_8i32_to_8f32: 2669; AVX512DQ: # %bb.0: 2670; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2671; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 2672; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2673; AVX512DQ-NEXT: retq 2674; 2675; AVX512VLDQ-LABEL: uitofp_8i32_to_8f32: 2676; AVX512VLDQ: # %bb.0: 2677; AVX512VLDQ-NEXT: vcvtudq2ps %ymm0, %ymm0 2678; AVX512VLDQ-NEXT: retq 2679 %cvt = uitofp <8 x i32> %a to <8 x float> 2680 ret <8 x float> %cvt 2681} 2682 2683define <8 x float> @uitofp_8i16_to_8f32(<8 x i16> %a) { 2684; SSE2-LABEL: uitofp_8i16_to_8f32: 2685; SSE2: # %bb.0: 2686; SSE2-NEXT: pxor %xmm1, %xmm1 2687; SSE2-NEXT: movdqa %xmm0, %xmm2 2688; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 2689; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2 2690; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2691; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1 2692; SSE2-NEXT: movaps %xmm2, %xmm0 2693; SSE2-NEXT: retq 2694; 2695; SSE41-LABEL: uitofp_8i16_to_8f32: 2696; SSE41: # %bb.0: 2697; SSE41-NEXT: pxor %xmm1, %xmm1 2698; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2699; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2700; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1 2701; SSE41-NEXT: cvtdq2ps %xmm2, %xmm0 2702; SSE41-NEXT: retq 2703; 2704; AVX1-LABEL: uitofp_8i16_to_8f32: 2705; AVX1: # %bb.0: 2706; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 2707; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2708; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2709; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2710; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 2711; AVX1-NEXT: retq 2712; 2713; AVX2-LABEL: uitofp_8i16_to_8f32: 2714; AVX2: # %bb.0: 2715; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2716; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 2717; AVX2-NEXT: retq 2718; 2719; AVX512-LABEL: uitofp_8i16_to_8f32: 2720; AVX512: # %bb.0: 2721; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2722; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 2723; AVX512-NEXT: retq 2724 %cvt = uitofp <8 x i16> %a to <8 x float> 2725 ret <8 x float> %cvt 2726} 2727 2728define <8 x float> @uitofp_8i8_to_8f32(<16 x i8> %a) { 2729; SSE2-LABEL: uitofp_8i8_to_8f32: 2730; SSE2: # %bb.0: 2731; SSE2-NEXT: pxor %xmm1, %xmm1 2732; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2733; SSE2-NEXT: movdqa %xmm0, %xmm2 2734; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 2735; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2 2736; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2737; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1 2738; SSE2-NEXT: movaps %xmm2, %xmm0 2739; SSE2-NEXT: retq 2740; 2741; SSE41-LABEL: uitofp_8i8_to_8f32: 2742; SSE41: # %bb.0: 2743; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2744; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2 2745; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 2746; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2747; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1 2748; SSE41-NEXT: movaps %xmm2, %xmm0 2749; SSE41-NEXT: retq 2750; 2751; AVX1-LABEL: uitofp_8i8_to_8f32: 2752; AVX1: # %bb.0: 2753; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2754; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 2755; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2756; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2757; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 2758; AVX1-NEXT: retq 2759; 2760; AVX2-LABEL: uitofp_8i8_to_8f32: 2761; AVX2: # %bb.0: 2762; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2763; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 2764; AVX2-NEXT: retq 2765; 2766; AVX512-LABEL: uitofp_8i8_to_8f32: 2767; AVX512: # %bb.0: 2768; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2769; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 2770; AVX512-NEXT: retq 2771 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2772 %cvt = uitofp <8 x i8> %shuf to <8 x float> 2773 ret <8 x float> %cvt 2774} 2775 2776define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) { 2777; SSE2-LABEL: uitofp_16i8_to_8f32: 2778; SSE2: # %bb.0: 2779; SSE2-NEXT: pxor %xmm1, %xmm1 2780; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2781; SSE2-NEXT: movdqa %xmm0, %xmm2 2782; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 2783; SSE2-NEXT: cvtdq2ps %xmm2, %xmm2 2784; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2785; SSE2-NEXT: cvtdq2ps %xmm0, %xmm1 2786; SSE2-NEXT: movaps %xmm2, %xmm0 2787; SSE2-NEXT: retq 2788; 2789; SSE41-LABEL: uitofp_16i8_to_8f32: 2790; SSE41: # %bb.0: 2791; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2792; SSE41-NEXT: cvtdq2ps %xmm1, %xmm2 2793; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 2794; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2795; SSE41-NEXT: cvtdq2ps %xmm0, %xmm1 2796; SSE41-NEXT: movaps %xmm2, %xmm0 2797; SSE41-NEXT: retq 2798; 2799; AVX1-LABEL: uitofp_16i8_to_8f32: 2800; AVX1: # %bb.0: 2801; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2802; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 2803; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 2804; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2805; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 2806; AVX1-NEXT: retq 2807; 2808; AVX2-LABEL: uitofp_16i8_to_8f32: 2809; AVX2: # %bb.0: 2810; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2811; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 2812; AVX2-NEXT: retq 2813; 2814; AVX512-LABEL: uitofp_16i8_to_8f32: 2815; AVX512: # %bb.0: 2816; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 2817; AVX512-NEXT: vcvtdq2ps %zmm0, %zmm0 2818; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2819; AVX512-NEXT: retq 2820 %cvt = uitofp <16 x i8> %a to <16 x float> 2821 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2822 ret <8 x float> %shuf 2823} 2824 2825; 2826; Load Signed Integer to Double 2827; 2828 2829define <2 x double> @sitofp_load_2i64_to_2f64(ptr%a) { 2830; SSE-LABEL: sitofp_load_2i64_to_2f64: 2831; SSE: # %bb.0: 2832; SSE-NEXT: cvtsi2sdq 8(%rdi), %xmm1 2833; SSE-NEXT: cvtsi2sdq (%rdi), %xmm0 2834; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2835; SSE-NEXT: retq 2836; 2837; VEX-LABEL: sitofp_load_2i64_to_2f64: 2838; VEX: # %bb.0: 2839; VEX-NEXT: vcvtsi2sdq 8(%rdi), %xmm0, %xmm0 2840; VEX-NEXT: vcvtsi2sdq (%rdi), %xmm1, %xmm1 2841; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2842; VEX-NEXT: retq 2843; 2844; AVX512F-LABEL: sitofp_load_2i64_to_2f64: 2845; AVX512F: # %bb.0: 2846; AVX512F-NEXT: vcvtsi2sdq 8(%rdi), %xmm0, %xmm0 2847; AVX512F-NEXT: vcvtsi2sdq (%rdi), %xmm1, %xmm1 2848; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2849; AVX512F-NEXT: retq 2850; 2851; AVX512VL-LABEL: sitofp_load_2i64_to_2f64: 2852; AVX512VL: # %bb.0: 2853; AVX512VL-NEXT: vcvtsi2sdq 8(%rdi), %xmm0, %xmm0 2854; AVX512VL-NEXT: vcvtsi2sdq (%rdi), %xmm1, %xmm1 2855; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2856; AVX512VL-NEXT: retq 2857; 2858; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64: 2859; AVX512DQ: # %bb.0: 2860; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 2861; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 2862; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2863; AVX512DQ-NEXT: vzeroupper 2864; AVX512DQ-NEXT: retq 2865; 2866; AVX512VLDQ-LABEL: sitofp_load_2i64_to_2f64: 2867; AVX512VLDQ: # %bb.0: 2868; AVX512VLDQ-NEXT: vcvtqq2pd (%rdi), %xmm0 2869; AVX512VLDQ-NEXT: retq 2870 %ld = load <2 x i64>, ptr%a 2871 %cvt = sitofp <2 x i64> %ld to <2 x double> 2872 ret <2 x double> %cvt 2873} 2874 2875define <2 x double> @sitofp_load_2i32_to_2f64(ptr%a) { 2876; SSE-LABEL: sitofp_load_2i32_to_2f64: 2877; SSE: # %bb.0: 2878; SSE-NEXT: cvtdq2pd (%rdi), %xmm0 2879; SSE-NEXT: retq 2880; 2881; AVX-LABEL: sitofp_load_2i32_to_2f64: 2882; AVX: # %bb.0: 2883; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0 2884; AVX-NEXT: retq 2885 %ld = load <2 x i32>, ptr%a 2886 %cvt = sitofp <2 x i32> %ld to <2 x double> 2887 ret <2 x double> %cvt 2888} 2889 2890define <2 x double> @sitofp_volatile_load_4i32_to_2f64(ptr%a) { 2891; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64: 2892; SSE: # %bb.0: 2893; SSE-NEXT: movaps (%rdi), %xmm0 2894; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 2895; SSE-NEXT: retq 2896; 2897; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64: 2898; AVX: # %bb.0: 2899; AVX-NEXT: vmovaps (%rdi), %xmm0 2900; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 2901; AVX-NEXT: retq 2902 %ld = load volatile <4 x i32>, ptr%a 2903 %b = shufflevector <4 x i32> %ld, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 2904 %cvt = sitofp <2 x i32> %b to <2 x double> 2905 ret <2 x double> %cvt 2906} 2907 2908define <2 x double> @sitofp_load_4i32_to_2f64_2(ptr %x) { 2909; SSE-LABEL: sitofp_load_4i32_to_2f64_2: 2910; SSE: # %bb.0: 2911; SSE-NEXT: cvtdq2pd (%rdi), %xmm0 2912; SSE-NEXT: retq 2913; 2914; AVX-LABEL: sitofp_load_4i32_to_2f64_2: 2915; AVX: # %bb.0: 2916; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0 2917; AVX-NEXT: retq 2918 %a = load <4 x i32>, ptr %x 2919 %b = sitofp <4 x i32> %a to <4 x double> 2920 %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1> 2921 ret <2 x double> %c 2922} 2923 2924define <2 x double> @sitofp_volatile_load_4i32_to_2f64_2(ptr %x) { 2925; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64_2: 2926; SSE: # %bb.0: 2927; SSE-NEXT: movaps (%rdi), %xmm0 2928; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 2929; SSE-NEXT: retq 2930; 2931; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64_2: 2932; AVX: # %bb.0: 2933; AVX-NEXT: vmovaps (%rdi), %xmm0 2934; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 2935; AVX-NEXT: retq 2936 %a = load volatile <4 x i32>, ptr %x 2937 %b = sitofp <4 x i32> %a to <4 x double> 2938 %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1> 2939 ret <2 x double> %c 2940} 2941 2942define <2 x double> @sitofp_load_2i16_to_2f64(ptr%a) { 2943; SSE2-LABEL: sitofp_load_2i16_to_2f64: 2944; SSE2: # %bb.0: 2945; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2946; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 2947; SSE2-NEXT: psrad $16, %xmm0 2948; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 2949; SSE2-NEXT: retq 2950; 2951; SSE41-LABEL: sitofp_load_2i16_to_2f64: 2952; SSE41: # %bb.0: 2953; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2954; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 2955; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 2956; SSE41-NEXT: retq 2957; 2958; AVX-LABEL: sitofp_load_2i16_to_2f64: 2959; AVX: # %bb.0: 2960; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2961; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 2962; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 2963; AVX-NEXT: retq 2964 %ld = load <2 x i16>, ptr%a 2965 %cvt = sitofp <2 x i16> %ld to <2 x double> 2966 ret <2 x double> %cvt 2967} 2968 2969define <2 x double> @sitofp_load_2i8_to_2f64(ptr%a) { 2970; SSE2-LABEL: sitofp_load_2i8_to_2f64: 2971; SSE2: # %bb.0: 2972; SSE2-NEXT: movzwl (%rdi), %eax 2973; SSE2-NEXT: movd %eax, %xmm0 2974; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2975; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 2976; SSE2-NEXT: psrad $24, %xmm0 2977; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 2978; SSE2-NEXT: retq 2979; 2980; SSE41-LABEL: sitofp_load_2i8_to_2f64: 2981; SSE41: # %bb.0: 2982; SSE41-NEXT: movzwl (%rdi), %eax 2983; SSE41-NEXT: movd %eax, %xmm0 2984; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 2985; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 2986; SSE41-NEXT: retq 2987; 2988; AVX-LABEL: sitofp_load_2i8_to_2f64: 2989; AVX: # %bb.0: 2990; AVX-NEXT: movzwl (%rdi), %eax 2991; AVX-NEXT: vmovd %eax, %xmm0 2992; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 2993; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 2994; AVX-NEXT: retq 2995 %ld = load <2 x i8>, ptr%a 2996 %cvt = sitofp <2 x i8> %ld to <2 x double> 2997 ret <2 x double> %cvt 2998} 2999 3000define <4 x double> @sitofp_load_4i64_to_4f64(ptr%a) { 3001; SSE-LABEL: sitofp_load_4i64_to_4f64: 3002; SSE: # %bb.0: 3003; SSE-NEXT: cvtsi2sdq 8(%rdi), %xmm1 3004; SSE-NEXT: cvtsi2sdq (%rdi), %xmm0 3005; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3006; SSE-NEXT: cvtsi2sdq 24(%rdi), %xmm2 3007; SSE-NEXT: xorps %xmm1, %xmm1 3008; SSE-NEXT: cvtsi2sdq 16(%rdi), %xmm1 3009; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 3010; SSE-NEXT: retq 3011; 3012; VEX-LABEL: sitofp_load_4i64_to_4f64: 3013; VEX: # %bb.0: 3014; VEX-NEXT: vcvtsi2sdq 24(%rdi), %xmm0, %xmm0 3015; VEX-NEXT: vcvtsi2sdq 16(%rdi), %xmm1, %xmm1 3016; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 3017; VEX-NEXT: vcvtsi2sdq 8(%rdi), %xmm2, %xmm1 3018; VEX-NEXT: vcvtsi2sdq (%rdi), %xmm2, %xmm2 3019; VEX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 3020; VEX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3021; VEX-NEXT: retq 3022; 3023; AVX512F-LABEL: sitofp_load_4i64_to_4f64: 3024; AVX512F: # %bb.0: 3025; AVX512F-NEXT: vcvtsi2sdq 24(%rdi), %xmm0, %xmm0 3026; AVX512F-NEXT: vcvtsi2sdq 16(%rdi), %xmm1, %xmm1 3027; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 3028; AVX512F-NEXT: vcvtsi2sdq 8(%rdi), %xmm2, %xmm1 3029; AVX512F-NEXT: vcvtsi2sdq (%rdi), %xmm2, %xmm2 3030; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 3031; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3032; AVX512F-NEXT: retq 3033; 3034; AVX512VL-LABEL: sitofp_load_4i64_to_4f64: 3035; AVX512VL: # %bb.0: 3036; AVX512VL-NEXT: vcvtsi2sdq 24(%rdi), %xmm0, %xmm0 3037; AVX512VL-NEXT: vcvtsi2sdq 16(%rdi), %xmm1, %xmm1 3038; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 3039; AVX512VL-NEXT: vcvtsi2sdq 8(%rdi), %xmm2, %xmm1 3040; AVX512VL-NEXT: vcvtsi2sdq (%rdi), %xmm2, %xmm2 3041; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 3042; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3043; AVX512VL-NEXT: retq 3044; 3045; AVX512DQ-LABEL: sitofp_load_4i64_to_4f64: 3046; AVX512DQ: # %bb.0: 3047; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0 3048; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 3049; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 3050; AVX512DQ-NEXT: retq 3051; 3052; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f64: 3053; AVX512VLDQ: # %bb.0: 3054; AVX512VLDQ-NEXT: vcvtqq2pd (%rdi), %ymm0 3055; AVX512VLDQ-NEXT: retq 3056 %ld = load <4 x i64>, ptr%a 3057 %cvt = sitofp <4 x i64> %ld to <4 x double> 3058 ret <4 x double> %cvt 3059} 3060 3061define <4 x double> @sitofp_load_4i32_to_4f64(ptr%a) { 3062; SSE-LABEL: sitofp_load_4i32_to_4f64: 3063; SSE: # %bb.0: 3064; SSE-NEXT: movdqa (%rdi), %xmm1 3065; SSE-NEXT: cvtdq2pd %xmm1, %xmm0 3066; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3067; SSE-NEXT: cvtdq2pd %xmm1, %xmm1 3068; SSE-NEXT: retq 3069; 3070; AVX-LABEL: sitofp_load_4i32_to_4f64: 3071; AVX: # %bb.0: 3072; AVX-NEXT: vcvtdq2pd (%rdi), %ymm0 3073; AVX-NEXT: retq 3074 %ld = load <4 x i32>, ptr%a 3075 %cvt = sitofp <4 x i32> %ld to <4 x double> 3076 ret <4 x double> %cvt 3077} 3078 3079define <4 x double> @sitofp_load_4i16_to_4f64(ptr%a) { 3080; SSE2-LABEL: sitofp_load_4i16_to_4f64: 3081; SSE2: # %bb.0: 3082; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3083; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3084; SSE2-NEXT: psrad $16, %xmm1 3085; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 3086; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3087; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 3088; SSE2-NEXT: retq 3089; 3090; SSE41-LABEL: sitofp_load_4i16_to_4f64: 3091; SSE41: # %bb.0: 3092; SSE41-NEXT: pmovsxwd (%rdi), %xmm1 3093; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 3094; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3095; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 3096; SSE41-NEXT: retq 3097; 3098; AVX-LABEL: sitofp_load_4i16_to_4f64: 3099; AVX: # %bb.0: 3100; AVX-NEXT: vpmovsxwd (%rdi), %xmm0 3101; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 3102; AVX-NEXT: retq 3103 %ld = load <4 x i16>, ptr%a 3104 %cvt = sitofp <4 x i16> %ld to <4 x double> 3105 ret <4 x double> %cvt 3106} 3107 3108define <4 x double> @sitofp_load_4i8_to_4f64(ptr%a) { 3109; SSE2-LABEL: sitofp_load_4i8_to_4f64: 3110; SSE2: # %bb.0: 3111; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3112; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3113; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3114; SSE2-NEXT: psrad $24, %xmm1 3115; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 3116; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3117; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 3118; SSE2-NEXT: retq 3119; 3120; SSE41-LABEL: sitofp_load_4i8_to_4f64: 3121; SSE41: # %bb.0: 3122; SSE41-NEXT: pmovsxbd (%rdi), %xmm1 3123; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 3124; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3125; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 3126; SSE41-NEXT: retq 3127; 3128; AVX-LABEL: sitofp_load_4i8_to_4f64: 3129; AVX: # %bb.0: 3130; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 3131; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 3132; AVX-NEXT: retq 3133 %ld = load <4 x i8>, ptr%a 3134 %cvt = sitofp <4 x i8> %ld to <4 x double> 3135 ret <4 x double> %cvt 3136} 3137 3138; 3139; Load Unsigned Integer to Double 3140; 3141 3142define <2 x double> @uitofp_load_2i64_to_2f64(ptr%a) { 3143; SSE2-LABEL: uitofp_load_2i64_to_2f64: 3144; SSE2: # %bb.0: 3145; SSE2-NEXT: movdqa (%rdi), %xmm0 3146; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,4294967295] 3147; SSE2-NEXT: pand %xmm0, %xmm1 3148; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 3149; SSE2-NEXT: psrlq $32, %xmm0 3150; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 3151; SSE2-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 3152; SSE2-NEXT: addpd %xmm1, %xmm0 3153; SSE2-NEXT: retq 3154; 3155; SSE41-LABEL: uitofp_load_2i64_to_2f64: 3156; SSE41: # %bb.0: 3157; SSE41-NEXT: movdqa (%rdi), %xmm0 3158; SSE41-NEXT: pxor %xmm1, %xmm1 3159; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 3160; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 3161; SSE41-NEXT: psrlq $32, %xmm0 3162; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 3163; SSE41-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 3164; SSE41-NEXT: addpd %xmm1, %xmm0 3165; SSE41-NEXT: retq 3166; 3167; AVX1-LABEL: uitofp_load_2i64_to_2f64: 3168; AVX1: # %bb.0: 3169; AVX1-NEXT: vmovdqa (%rdi), %xmm0 3170; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 3171; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 3172; AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 3173; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 3174; AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3175; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3176; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 3177; AVX1-NEXT: retq 3178; 3179; AVX2-LABEL: uitofp_load_2i64_to_2f64: 3180; AVX2: # %bb.0: 3181; AVX2-NEXT: vmovdqa (%rdi), %xmm0 3182; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 3183; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 3184; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 3185; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 3186; AVX2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3187; AVX2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3188; AVX2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 3189; AVX2-NEXT: retq 3190; 3191; AVX512F-LABEL: uitofp_load_2i64_to_2f64: 3192; AVX512F: # %bb.0: 3193; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 3194; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 3195; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 3196; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 3197; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0 3198; AVX512F-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3199; AVX512F-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3200; AVX512F-NEXT: vaddpd %xmm0, %xmm1, %xmm0 3201; AVX512F-NEXT: retq 3202; 3203; AVX512VL-LABEL: uitofp_load_2i64_to_2f64: 3204; AVX512VL: # %bb.0: 3205; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 3206; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 3207; AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 3208; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 3209; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 3210; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 3211; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 3212; AVX512VL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 3213; AVX512VL-NEXT: retq 3214; 3215; AVX512DQ-LABEL: uitofp_load_2i64_to_2f64: 3216; AVX512DQ: # %bb.0: 3217; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 3218; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 3219; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3220; AVX512DQ-NEXT: vzeroupper 3221; AVX512DQ-NEXT: retq 3222; 3223; AVX512VLDQ-LABEL: uitofp_load_2i64_to_2f64: 3224; AVX512VLDQ: # %bb.0: 3225; AVX512VLDQ-NEXT: vcvtuqq2pd (%rdi), %xmm0 3226; AVX512VLDQ-NEXT: retq 3227 %ld = load <2 x i64>, ptr%a 3228 %cvt = uitofp <2 x i64> %ld to <2 x double> 3229 ret <2 x double> %cvt 3230} 3231 3232define <2 x double> @uitofp_load_2i32_to_2f64(ptr%a) { 3233; SSE2-LABEL: uitofp_load_2i32_to_2f64: 3234; SSE2: # %bb.0: 3235; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3236; SSE2-NEXT: xorpd %xmm1, %xmm1 3237; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3238; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3239; SSE2-NEXT: orpd %xmm1, %xmm0 3240; SSE2-NEXT: subpd %xmm1, %xmm0 3241; SSE2-NEXT: retq 3242; 3243; SSE41-LABEL: uitofp_load_2i32_to_2f64: 3244; SSE41: # %bb.0: 3245; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 3246; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3247; SSE41-NEXT: por %xmm1, %xmm0 3248; SSE41-NEXT: subpd %xmm1, %xmm0 3249; SSE41-NEXT: retq 3250; 3251; AVX1-LABEL: uitofp_load_2i32_to_2f64: 3252; AVX1: # %bb.0: 3253; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 3254; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3255; AVX1-NEXT: # xmm1 = mem[0,0] 3256; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 3257; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 3258; AVX1-NEXT: retq 3259; 3260; AVX2-LABEL: uitofp_load_2i32_to_2f64: 3261; AVX2: # %bb.0: 3262; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 3263; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3264; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 3265; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 3266; AVX2-NEXT: retq 3267; 3268; AVX512F-LABEL: uitofp_load_2i32_to_2f64: 3269; AVX512F: # %bb.0: 3270; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3271; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 3272; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3273; AVX512F-NEXT: vzeroupper 3274; AVX512F-NEXT: retq 3275; 3276; AVX512VL-LABEL: uitofp_load_2i32_to_2f64: 3277; AVX512VL: # %bb.0: 3278; AVX512VL-NEXT: vcvtudq2pd (%rdi), %xmm0 3279; AVX512VL-NEXT: retq 3280; 3281; AVX512DQ-LABEL: uitofp_load_2i32_to_2f64: 3282; AVX512DQ: # %bb.0: 3283; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3284; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 3285; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3286; AVX512DQ-NEXT: vzeroupper 3287; AVX512DQ-NEXT: retq 3288; 3289; AVX512VLDQ-LABEL: uitofp_load_2i32_to_2f64: 3290; AVX512VLDQ: # %bb.0: 3291; AVX512VLDQ-NEXT: vcvtudq2pd (%rdi), %xmm0 3292; AVX512VLDQ-NEXT: retq 3293 %ld = load <2 x i32>, ptr%a 3294 %cvt = uitofp <2 x i32> %ld to <2 x double> 3295 ret <2 x double> %cvt 3296} 3297 3298define <2 x double> @uitofp_load_4i32_to_2f64_2(ptr %x) { 3299; SSE2-LABEL: uitofp_load_4i32_to_2f64_2: 3300; SSE2: # %bb.0: 3301; SSE2-NEXT: movapd (%rdi), %xmm0 3302; SSE2-NEXT: xorpd %xmm1, %xmm1 3303; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3304; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3305; SSE2-NEXT: orpd %xmm1, %xmm0 3306; SSE2-NEXT: subpd %xmm1, %xmm0 3307; SSE2-NEXT: retq 3308; 3309; SSE41-LABEL: uitofp_load_4i32_to_2f64_2: 3310; SSE41: # %bb.0: 3311; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 3312; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3313; SSE41-NEXT: por %xmm1, %xmm0 3314; SSE41-NEXT: subpd %xmm1, %xmm0 3315; SSE41-NEXT: retq 3316; 3317; AVX1-LABEL: uitofp_load_4i32_to_2f64_2: 3318; AVX1: # %bb.0: 3319; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 3320; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3321; AVX1-NEXT: # xmm1 = mem[0,0] 3322; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 3323; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 3324; AVX1-NEXT: retq 3325; 3326; AVX2-LABEL: uitofp_load_4i32_to_2f64_2: 3327; AVX2: # %bb.0: 3328; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 3329; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3330; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 3331; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 3332; AVX2-NEXT: vzeroupper 3333; AVX2-NEXT: retq 3334; 3335; AVX512F-LABEL: uitofp_load_4i32_to_2f64_2: 3336; AVX512F: # %bb.0: 3337; AVX512F-NEXT: vmovaps (%rdi), %xmm0 3338; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 3339; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3340; AVX512F-NEXT: vzeroupper 3341; AVX512F-NEXT: retq 3342; 3343; AVX512VL-LABEL: uitofp_load_4i32_to_2f64_2: 3344; AVX512VL: # %bb.0: 3345; AVX512VL-NEXT: vcvtudq2pd (%rdi), %xmm0 3346; AVX512VL-NEXT: retq 3347; 3348; AVX512DQ-LABEL: uitofp_load_4i32_to_2f64_2: 3349; AVX512DQ: # %bb.0: 3350; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 3351; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 3352; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3353; AVX512DQ-NEXT: vzeroupper 3354; AVX512DQ-NEXT: retq 3355; 3356; AVX512VLDQ-LABEL: uitofp_load_4i32_to_2f64_2: 3357; AVX512VLDQ: # %bb.0: 3358; AVX512VLDQ-NEXT: vcvtudq2pd (%rdi), %xmm0 3359; AVX512VLDQ-NEXT: retq 3360 %a = load <4 x i32>, ptr %x 3361 %b = uitofp <4 x i32> %a to <4 x double> 3362 %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1> 3363 ret <2 x double> %c 3364} 3365 3366define <2 x double> @uitofp_volatile_load_4i32_to_2f64_2(ptr %x) { 3367; SSE2-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3368; SSE2: # %bb.0: 3369; SSE2-NEXT: movapd (%rdi), %xmm0 3370; SSE2-NEXT: xorpd %xmm1, %xmm1 3371; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3372; SSE2-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3373; SSE2-NEXT: orpd %xmm1, %xmm0 3374; SSE2-NEXT: subpd %xmm1, %xmm0 3375; SSE2-NEXT: retq 3376; 3377; SSE41-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3378; SSE41: # %bb.0: 3379; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 3380; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3381; SSE41-NEXT: por %xmm1, %xmm0 3382; SSE41-NEXT: subpd %xmm1, %xmm0 3383; SSE41-NEXT: retq 3384; 3385; AVX1-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3386; AVX1: # %bb.0: 3387; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 3388; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3389; AVX1-NEXT: # xmm1 = mem[0,0] 3390; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 3391; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 3392; AVX1-NEXT: retq 3393; 3394; AVX2-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3395; AVX2: # %bb.0: 3396; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 3397; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 3398; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 3399; AVX2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 3400; AVX2-NEXT: vzeroupper 3401; AVX2-NEXT: retq 3402; 3403; AVX512F-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3404; AVX512F: # %bb.0: 3405; AVX512F-NEXT: vmovaps (%rdi), %xmm0 3406; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 3407; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3408; AVX512F-NEXT: vzeroupper 3409; AVX512F-NEXT: retq 3410; 3411; AVX512VL-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3412; AVX512VL: # %bb.0: 3413; AVX512VL-NEXT: vmovaps (%rdi), %xmm0 3414; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 3415; AVX512VL-NEXT: retq 3416; 3417; AVX512DQ-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3418; AVX512DQ: # %bb.0: 3419; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 3420; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 3421; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3422; AVX512DQ-NEXT: vzeroupper 3423; AVX512DQ-NEXT: retq 3424; 3425; AVX512VLDQ-LABEL: uitofp_volatile_load_4i32_to_2f64_2: 3426; AVX512VLDQ: # %bb.0: 3427; AVX512VLDQ-NEXT: vmovaps (%rdi), %xmm0 3428; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 3429; AVX512VLDQ-NEXT: retq 3430 %a = load volatile <4 x i32>, ptr %x 3431 %b = uitofp <4 x i32> %a to <4 x double> 3432 %c = shufflevector <4 x double> %b, <4 x double> undef, <2 x i32> <i32 0, i32 1> 3433 ret <2 x double> %c 3434} 3435 3436define <2 x double> @uitofp_load_2i16_to_2f64(ptr%a) { 3437; SSE2-LABEL: uitofp_load_2i16_to_2f64: 3438; SSE2: # %bb.0: 3439; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3440; SSE2-NEXT: pxor %xmm1, %xmm1 3441; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3442; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 3443; SSE2-NEXT: retq 3444; 3445; SSE41-LABEL: uitofp_load_2i16_to_2f64: 3446; SSE41: # %bb.0: 3447; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3448; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 3449; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 3450; SSE41-NEXT: retq 3451; 3452; AVX-LABEL: uitofp_load_2i16_to_2f64: 3453; AVX: # %bb.0: 3454; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3455; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 3456; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 3457; AVX-NEXT: retq 3458 %ld = load <2 x i16>, ptr%a 3459 %cvt = uitofp <2 x i16> %ld to <2 x double> 3460 ret <2 x double> %cvt 3461} 3462 3463define <2 x double> @uitofp_load_2i8_to_2f64(ptr%a) { 3464; SSE2-LABEL: uitofp_load_2i8_to_2f64: 3465; SSE2: # %bb.0: 3466; SSE2-NEXT: movzwl (%rdi), %eax 3467; SSE2-NEXT: movd %eax, %xmm0 3468; SSE2-NEXT: pxor %xmm1, %xmm1 3469; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3470; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3471; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 3472; SSE2-NEXT: retq 3473; 3474; SSE41-LABEL: uitofp_load_2i8_to_2f64: 3475; SSE41: # %bb.0: 3476; SSE41-NEXT: movzwl (%rdi), %eax 3477; SSE41-NEXT: movd %eax, %xmm0 3478; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 3479; SSE41-NEXT: cvtdq2pd %xmm0, %xmm0 3480; SSE41-NEXT: retq 3481; 3482; AVX-LABEL: uitofp_load_2i8_to_2f64: 3483; AVX: # %bb.0: 3484; AVX-NEXT: movzwl (%rdi), %eax 3485; AVX-NEXT: vmovd %eax, %xmm0 3486; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 3487; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 3488; AVX-NEXT: retq 3489 %ld = load <2 x i8>, ptr%a 3490 %cvt = uitofp <2 x i8> %ld to <2 x double> 3491 ret <2 x double> %cvt 3492} 3493 3494define <4 x double> @uitofp_load_4i64_to_4f64(ptr%a) { 3495; SSE2-LABEL: uitofp_load_4i64_to_4f64: 3496; SSE2: # %bb.0: 3497; SSE2-NEXT: movdqa (%rdi), %xmm0 3498; SSE2-NEXT: movdqa 16(%rdi), %xmm1 3499; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295] 3500; SSE2-NEXT: movdqa %xmm0, %xmm3 3501; SSE2-NEXT: pand %xmm2, %xmm3 3502; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 3503; SSE2-NEXT: por %xmm4, %xmm3 3504; SSE2-NEXT: psrlq $32, %xmm0 3505; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 3506; SSE2-NEXT: por %xmm5, %xmm0 3507; SSE2-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 3508; SSE2-NEXT: subpd %xmm6, %xmm0 3509; SSE2-NEXT: addpd %xmm3, %xmm0 3510; SSE2-NEXT: pand %xmm1, %xmm2 3511; SSE2-NEXT: por %xmm4, %xmm2 3512; SSE2-NEXT: psrlq $32, %xmm1 3513; SSE2-NEXT: por %xmm5, %xmm1 3514; SSE2-NEXT: subpd %xmm6, %xmm1 3515; SSE2-NEXT: addpd %xmm2, %xmm1 3516; SSE2-NEXT: retq 3517; 3518; SSE41-LABEL: uitofp_load_4i64_to_4f64: 3519; SSE41: # %bb.0: 3520; SSE41-NEXT: movdqa (%rdi), %xmm0 3521; SSE41-NEXT: movdqa 16(%rdi), %xmm1 3522; SSE41-NEXT: pxor %xmm2, %xmm2 3523; SSE41-NEXT: movdqa %xmm0, %xmm3 3524; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 3525; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 3526; SSE41-NEXT: por %xmm4, %xmm3 3527; SSE41-NEXT: psrlq $32, %xmm0 3528; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 3529; SSE41-NEXT: por %xmm5, %xmm0 3530; SSE41-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 3531; SSE41-NEXT: subpd %xmm6, %xmm0 3532; SSE41-NEXT: addpd %xmm3, %xmm0 3533; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 3534; SSE41-NEXT: por %xmm4, %xmm2 3535; SSE41-NEXT: psrlq $32, %xmm1 3536; SSE41-NEXT: por %xmm5, %xmm1 3537; SSE41-NEXT: subpd %xmm6, %xmm1 3538; SSE41-NEXT: addpd %xmm2, %xmm1 3539; SSE41-NEXT: retq 3540; 3541; AVX1-LABEL: uitofp_load_4i64_to_4f64: 3542; AVX1: # %bb.0: 3543; AVX1-NEXT: vmovaps (%rdi), %ymm0 3544; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 3545; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 3546; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 3547; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7] 3548; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5] 3549; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 3550; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 3551; AVX1-NEXT: vaddpd %ymm0, %ymm2, %ymm0 3552; AVX1-NEXT: retq 3553; 3554; AVX2-LABEL: uitofp_load_4i64_to_4f64: 3555; AVX2: # %bb.0: 3556; AVX2-NEXT: vmovdqa (%rdi), %ymm0 3557; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 3558; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 3559; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] 3560; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1 3561; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0 3562; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072] 3563; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 3564; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] 3565; AVX2-NEXT: vsubpd %ymm2, %ymm0, %ymm0 3566; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 3567; AVX2-NEXT: retq 3568; 3569; AVX512F-LABEL: uitofp_load_4i64_to_4f64: 3570; AVX512F: # %bb.0: 3571; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 3572; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 3573; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 3574; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] 3575; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1 3576; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm0 3577; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072] 3578; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0 3579; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] 3580; AVX512F-NEXT: vsubpd %ymm2, %ymm0, %ymm0 3581; AVX512F-NEXT: vaddpd %ymm0, %ymm1, %ymm0 3582; AVX512F-NEXT: retq 3583; 3584; AVX512VL-LABEL: uitofp_load_4i64_to_4f64: 3585; AVX512VL: # %bb.0: 3586; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 3587; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 3588; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 3589; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm1 3590; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 3591; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 3592; AVX512VL-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 3593; AVX512VL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 3594; AVX512VL-NEXT: retq 3595; 3596; AVX512DQ-LABEL: uitofp_load_4i64_to_4f64: 3597; AVX512DQ: # %bb.0: 3598; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0 3599; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 3600; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 3601; AVX512DQ-NEXT: retq 3602; 3603; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f64: 3604; AVX512VLDQ: # %bb.0: 3605; AVX512VLDQ-NEXT: vcvtuqq2pd (%rdi), %ymm0 3606; AVX512VLDQ-NEXT: retq 3607 %ld = load <4 x i64>, ptr%a 3608 %cvt = uitofp <4 x i64> %ld to <4 x double> 3609 ret <4 x double> %cvt 3610} 3611 3612define <4 x double> @uitofp_load_4i32_to_4f64(ptr%a) { 3613; SSE2-LABEL: uitofp_load_4i32_to_4f64: 3614; SSE2: # %bb.0: 3615; SSE2-NEXT: movapd (%rdi), %xmm1 3616; SSE2-NEXT: xorpd %xmm2, %xmm2 3617; SSE2-NEXT: movapd %xmm1, %xmm0 3618; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3619; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15] 3620; SSE2-NEXT: orpd %xmm3, %xmm0 3621; SSE2-NEXT: subpd %xmm3, %xmm0 3622; SSE2-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3623; SSE2-NEXT: orpd %xmm3, %xmm1 3624; SSE2-NEXT: subpd %xmm3, %xmm1 3625; SSE2-NEXT: retq 3626; 3627; SSE41-LABEL: uitofp_load_4i32_to_4f64: 3628; SSE41: # %bb.0: 3629; SSE41-NEXT: movdqa (%rdi), %xmm1 3630; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 3631; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4.503599627370496E+15,4.503599627370496E+15] 3632; SSE41-NEXT: por %xmm2, %xmm0 3633; SSE41-NEXT: subpd %xmm2, %xmm0 3634; SSE41-NEXT: pxor %xmm3, %xmm3 3635; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 3636; SSE41-NEXT: por %xmm2, %xmm1 3637; SSE41-NEXT: subpd %xmm2, %xmm1 3638; SSE41-NEXT: retq 3639; 3640; AVX1-LABEL: uitofp_load_4i32_to_4f64: 3641; AVX1: # %bb.0: 3642; AVX1-NEXT: vmovdqa (%rdi), %xmm0 3643; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 3644; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3645; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 3646; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3647; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] 3648; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0 3649; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 3650; AVX1-NEXT: retq 3651; 3652; AVX2-LABEL: uitofp_load_4i32_to_4f64: 3653; AVX2: # %bb.0: 3654; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 3655; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] 3656; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 3657; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 3658; AVX2-NEXT: retq 3659; 3660; AVX512F-LABEL: uitofp_load_4i32_to_4f64: 3661; AVX512F: # %bb.0: 3662; AVX512F-NEXT: vmovaps (%rdi), %xmm0 3663; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 3664; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 3665; AVX512F-NEXT: retq 3666; 3667; AVX512VL-LABEL: uitofp_load_4i32_to_4f64: 3668; AVX512VL: # %bb.0: 3669; AVX512VL-NEXT: vcvtudq2pd (%rdi), %ymm0 3670; AVX512VL-NEXT: retq 3671; 3672; AVX512DQ-LABEL: uitofp_load_4i32_to_4f64: 3673; AVX512DQ: # %bb.0: 3674; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 3675; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 3676; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 3677; AVX512DQ-NEXT: retq 3678; 3679; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f64: 3680; AVX512VLDQ: # %bb.0: 3681; AVX512VLDQ-NEXT: vcvtudq2pd (%rdi), %ymm0 3682; AVX512VLDQ-NEXT: retq 3683 %ld = load <4 x i32>, ptr%a 3684 %cvt = uitofp <4 x i32> %ld to <4 x double> 3685 ret <4 x double> %cvt 3686} 3687 3688define <4 x double> @uitofp_load_4i16_to_4f64(ptr%a) { 3689; SSE2-LABEL: uitofp_load_4i16_to_4f64: 3690; SSE2: # %bb.0: 3691; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 3692; SSE2-NEXT: pxor %xmm0, %xmm0 3693; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3694; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 3695; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3696; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 3697; SSE2-NEXT: retq 3698; 3699; SSE41-LABEL: uitofp_load_4i16_to_4f64: 3700; SSE41: # %bb.0: 3701; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 3702; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 3703; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3704; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 3705; SSE41-NEXT: retq 3706; 3707; AVX-LABEL: uitofp_load_4i16_to_4f64: 3708; AVX: # %bb.0: 3709; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 3710; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 3711; AVX-NEXT: retq 3712 %ld = load <4 x i16>, ptr%a 3713 %cvt = uitofp <4 x i16> %ld to <4 x double> 3714 ret <4 x double> %cvt 3715} 3716 3717define <4 x double> @uitofp_load_4i8_to_4f64(ptr%a) { 3718; SSE2-LABEL: uitofp_load_4i8_to_4f64: 3719; SSE2: # %bb.0: 3720; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 3721; SSE2-NEXT: pxor %xmm0, %xmm0 3722; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3723; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3724; SSE2-NEXT: cvtdq2pd %xmm1, %xmm0 3725; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3726; SSE2-NEXT: cvtdq2pd %xmm1, %xmm1 3727; SSE2-NEXT: retq 3728; 3729; SSE41-LABEL: uitofp_load_4i8_to_4f64: 3730; SSE41: # %bb.0: 3731; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 3732; SSE41-NEXT: cvtdq2pd %xmm1, %xmm0 3733; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3734; SSE41-NEXT: cvtdq2pd %xmm1, %xmm1 3735; SSE41-NEXT: retq 3736; 3737; AVX-LABEL: uitofp_load_4i8_to_4f64: 3738; AVX: # %bb.0: 3739; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 3740; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 3741; AVX-NEXT: retq 3742 %ld = load <4 x i8>, ptr%a 3743 %cvt = uitofp <4 x i8> %ld to <4 x double> 3744 ret <4 x double> %cvt 3745} 3746 3747; 3748; Load Signed Integer to Float 3749; 3750 3751define <4 x float> @sitofp_load_4i64_to_4f32(ptr%a) { 3752; SSE2-LABEL: sitofp_load_4i64_to_4f32: 3753; SSE2: # %bb.0: 3754; SSE2-NEXT: cvtsi2ssq 24(%rdi), %xmm0 3755; SSE2-NEXT: cvtsi2ssq 16(%rdi), %xmm1 3756; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3757; SSE2-NEXT: cvtsi2ssq 8(%rdi), %xmm2 3758; SSE2-NEXT: xorps %xmm0, %xmm0 3759; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0 3760; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3761; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3762; SSE2-NEXT: retq 3763; 3764; SSE41-LABEL: sitofp_load_4i64_to_4f32: 3765; SSE41: # %bb.0: 3766; SSE41-NEXT: cvtsi2ssq 8(%rdi), %xmm1 3767; SSE41-NEXT: cvtsi2ssq (%rdi), %xmm0 3768; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 3769; SSE41-NEXT: xorps %xmm1, %xmm1 3770; SSE41-NEXT: cvtsi2ssq 16(%rdi), %xmm1 3771; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 3772; SSE41-NEXT: xorps %xmm1, %xmm1 3773; SSE41-NEXT: cvtsi2ssq 24(%rdi), %xmm1 3774; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 3775; SSE41-NEXT: retq 3776; 3777; VEX-LABEL: sitofp_load_4i64_to_4f32: 3778; VEX: # %bb.0: 3779; VEX-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0 3780; VEX-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1 3781; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 3782; VEX-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1 3783; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 3784; VEX-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1 3785; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 3786; VEX-NEXT: retq 3787; 3788; AVX512F-LABEL: sitofp_load_4i64_to_4f32: 3789; AVX512F: # %bb.0: 3790; AVX512F-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0 3791; AVX512F-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1 3792; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 3793; AVX512F-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1 3794; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 3795; AVX512F-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1 3796; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 3797; AVX512F-NEXT: retq 3798; 3799; AVX512VL-LABEL: sitofp_load_4i64_to_4f32: 3800; AVX512VL: # %bb.0: 3801; AVX512VL-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0 3802; AVX512VL-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1 3803; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 3804; AVX512VL-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1 3805; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 3806; AVX512VL-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1 3807; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 3808; AVX512VL-NEXT: retq 3809; 3810; AVX512DQ-LABEL: sitofp_load_4i64_to_4f32: 3811; AVX512DQ: # %bb.0: 3812; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0 3813; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 3814; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 3815; AVX512DQ-NEXT: vzeroupper 3816; AVX512DQ-NEXT: retq 3817; 3818; AVX512VLDQ-LABEL: sitofp_load_4i64_to_4f32: 3819; AVX512VLDQ: # %bb.0: 3820; AVX512VLDQ-NEXT: vcvtqq2psy (%rdi), %xmm0 3821; AVX512VLDQ-NEXT: retq 3822 %ld = load <4 x i64>, ptr%a 3823 %cvt = sitofp <4 x i64> %ld to <4 x float> 3824 ret <4 x float> %cvt 3825} 3826 3827define <4 x float> @sitofp_load_4i32_to_4f32(ptr%a) { 3828; SSE-LABEL: sitofp_load_4i32_to_4f32: 3829; SSE: # %bb.0: 3830; SSE-NEXT: cvtdq2ps (%rdi), %xmm0 3831; SSE-NEXT: retq 3832; 3833; AVX-LABEL: sitofp_load_4i32_to_4f32: 3834; AVX: # %bb.0: 3835; AVX-NEXT: vcvtdq2ps (%rdi), %xmm0 3836; AVX-NEXT: retq 3837 %ld = load <4 x i32>, ptr%a 3838 %cvt = sitofp <4 x i32> %ld to <4 x float> 3839 ret <4 x float> %cvt 3840} 3841 3842define <4 x float> @sitofp_load_4i16_to_4f32(ptr%a) { 3843; SSE2-LABEL: sitofp_load_4i16_to_4f32: 3844; SSE2: # %bb.0: 3845; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3846; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3847; SSE2-NEXT: psrad $16, %xmm0 3848; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 3849; SSE2-NEXT: retq 3850; 3851; SSE41-LABEL: sitofp_load_4i16_to_4f32: 3852; SSE41: # %bb.0: 3853; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 3854; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 3855; SSE41-NEXT: retq 3856; 3857; AVX-LABEL: sitofp_load_4i16_to_4f32: 3858; AVX: # %bb.0: 3859; AVX-NEXT: vpmovsxwd (%rdi), %xmm0 3860; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 3861; AVX-NEXT: retq 3862 %ld = load <4 x i16>, ptr%a 3863 %cvt = sitofp <4 x i16> %ld to <4 x float> 3864 ret <4 x float> %cvt 3865} 3866 3867define <4 x float> @sitofp_load_4i8_to_4f32(ptr%a) { 3868; SSE2-LABEL: sitofp_load_4i8_to_4f32: 3869; SSE2: # %bb.0: 3870; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3871; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3872; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3873; SSE2-NEXT: psrad $24, %xmm0 3874; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 3875; SSE2-NEXT: retq 3876; 3877; SSE41-LABEL: sitofp_load_4i8_to_4f32: 3878; SSE41: # %bb.0: 3879; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 3880; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 3881; SSE41-NEXT: retq 3882; 3883; AVX-LABEL: sitofp_load_4i8_to_4f32: 3884; AVX: # %bb.0: 3885; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 3886; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 3887; AVX-NEXT: retq 3888 %ld = load <4 x i8>, ptr%a 3889 %cvt = sitofp <4 x i8> %ld to <4 x float> 3890 ret <4 x float> %cvt 3891} 3892 3893define <8 x float> @sitofp_load_8i64_to_8f32(ptr%a) { 3894; SSE2-LABEL: sitofp_load_8i64_to_8f32: 3895; SSE2: # %bb.0: 3896; SSE2-NEXT: cvtsi2ssq 24(%rdi), %xmm0 3897; SSE2-NEXT: cvtsi2ssq 16(%rdi), %xmm1 3898; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3899; SSE2-NEXT: cvtsi2ssq 8(%rdi), %xmm2 3900; SSE2-NEXT: xorps %xmm0, %xmm0 3901; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0 3902; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3903; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3904; SSE2-NEXT: xorps %xmm1, %xmm1 3905; SSE2-NEXT: cvtsi2ssq 56(%rdi), %xmm1 3906; SSE2-NEXT: xorps %xmm2, %xmm2 3907; SSE2-NEXT: cvtsi2ssq 48(%rdi), %xmm2 3908; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 3909; SSE2-NEXT: cvtsi2ssq 40(%rdi), %xmm3 3910; SSE2-NEXT: xorps %xmm1, %xmm1 3911; SSE2-NEXT: cvtsi2ssq 32(%rdi), %xmm1 3912; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 3913; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 3914; SSE2-NEXT: retq 3915; 3916; SSE41-LABEL: sitofp_load_8i64_to_8f32: 3917; SSE41: # %bb.0: 3918; SSE41-NEXT: cvtsi2ssq 8(%rdi), %xmm1 3919; SSE41-NEXT: cvtsi2ssq (%rdi), %xmm0 3920; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 3921; SSE41-NEXT: xorps %xmm1, %xmm1 3922; SSE41-NEXT: cvtsi2ssq 16(%rdi), %xmm1 3923; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 3924; SSE41-NEXT: xorps %xmm1, %xmm1 3925; SSE41-NEXT: cvtsi2ssq 24(%rdi), %xmm1 3926; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 3927; SSE41-NEXT: cvtsi2ssq 40(%rdi), %xmm2 3928; SSE41-NEXT: xorps %xmm1, %xmm1 3929; SSE41-NEXT: cvtsi2ssq 32(%rdi), %xmm1 3930; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 3931; SSE41-NEXT: xorps %xmm2, %xmm2 3932; SSE41-NEXT: cvtsi2ssq 48(%rdi), %xmm2 3933; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 3934; SSE41-NEXT: xorps %xmm2, %xmm2 3935; SSE41-NEXT: cvtsi2ssq 56(%rdi), %xmm2 3936; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] 3937; SSE41-NEXT: retq 3938; 3939; VEX-LABEL: sitofp_load_8i64_to_8f32: 3940; VEX: # %bb.0: 3941; VEX-NEXT: vcvtsi2ssq 40(%rdi), %xmm0, %xmm0 3942; VEX-NEXT: vcvtsi2ssq 32(%rdi), %xmm1, %xmm1 3943; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 3944; VEX-NEXT: vcvtsi2ssq 48(%rdi), %xmm2, %xmm1 3945; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 3946; VEX-NEXT: vcvtsi2ssq 56(%rdi), %xmm2, %xmm1 3947; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 3948; VEX-NEXT: vcvtsi2ssq 8(%rdi), %xmm2, %xmm1 3949; VEX-NEXT: vcvtsi2ssq (%rdi), %xmm2, %xmm2 3950; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 3951; VEX-NEXT: vcvtsi2ssq 16(%rdi), %xmm3, %xmm2 3952; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 3953; VEX-NEXT: vcvtsi2ssq 24(%rdi), %xmm3, %xmm2 3954; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] 3955; VEX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3956; VEX-NEXT: retq 3957; 3958; AVX512F-LABEL: sitofp_load_8i64_to_8f32: 3959; AVX512F: # %bb.0: 3960; AVX512F-NEXT: vcvtsi2ssq 40(%rdi), %xmm0, %xmm0 3961; AVX512F-NEXT: vcvtsi2ssq 32(%rdi), %xmm1, %xmm1 3962; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 3963; AVX512F-NEXT: vcvtsi2ssq 48(%rdi), %xmm2, %xmm1 3964; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 3965; AVX512F-NEXT: vcvtsi2ssq 56(%rdi), %xmm2, %xmm1 3966; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 3967; AVX512F-NEXT: vcvtsi2ssq 8(%rdi), %xmm2, %xmm1 3968; AVX512F-NEXT: vcvtsi2ssq (%rdi), %xmm2, %xmm2 3969; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 3970; AVX512F-NEXT: vcvtsi2ssq 16(%rdi), %xmm3, %xmm2 3971; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 3972; AVX512F-NEXT: vcvtsi2ssq 24(%rdi), %xmm3, %xmm2 3973; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] 3974; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3975; AVX512F-NEXT: retq 3976; 3977; AVX512VL-LABEL: sitofp_load_8i64_to_8f32: 3978; AVX512VL: # %bb.0: 3979; AVX512VL-NEXT: vcvtsi2ssq 40(%rdi), %xmm0, %xmm0 3980; AVX512VL-NEXT: vcvtsi2ssq 32(%rdi), %xmm1, %xmm1 3981; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 3982; AVX512VL-NEXT: vcvtsi2ssq 48(%rdi), %xmm2, %xmm1 3983; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 3984; AVX512VL-NEXT: vcvtsi2ssq 56(%rdi), %xmm2, %xmm1 3985; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 3986; AVX512VL-NEXT: vcvtsi2ssq 8(%rdi), %xmm2, %xmm1 3987; AVX512VL-NEXT: vcvtsi2ssq (%rdi), %xmm2, %xmm2 3988; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 3989; AVX512VL-NEXT: vcvtsi2ssq 16(%rdi), %xmm3, %xmm2 3990; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 3991; AVX512VL-NEXT: vcvtsi2ssq 24(%rdi), %xmm3, %xmm2 3992; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] 3993; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3994; AVX512VL-NEXT: retq 3995; 3996; AVX512DQ-LABEL: sitofp_load_8i64_to_8f32: 3997; AVX512DQ: # %bb.0: 3998; AVX512DQ-NEXT: vcvtqq2ps (%rdi), %ymm0 3999; AVX512DQ-NEXT: retq 4000; 4001; AVX512VLDQ-LABEL: sitofp_load_8i64_to_8f32: 4002; AVX512VLDQ: # %bb.0: 4003; AVX512VLDQ-NEXT: vcvtqq2ps (%rdi), %ymm0 4004; AVX512VLDQ-NEXT: retq 4005 %ld = load <8 x i64>, ptr%a 4006 %cvt = sitofp <8 x i64> %ld to <8 x float> 4007 ret <8 x float> %cvt 4008} 4009 4010define <8 x float> @sitofp_load_8i32_to_8f32(ptr%a) { 4011; SSE-LABEL: sitofp_load_8i32_to_8f32: 4012; SSE: # %bb.0: 4013; SSE-NEXT: cvtdq2ps (%rdi), %xmm0 4014; SSE-NEXT: cvtdq2ps 16(%rdi), %xmm1 4015; SSE-NEXT: retq 4016; 4017; AVX-LABEL: sitofp_load_8i32_to_8f32: 4018; AVX: # %bb.0: 4019; AVX-NEXT: vcvtdq2ps (%rdi), %ymm0 4020; AVX-NEXT: retq 4021 %ld = load <8 x i32>, ptr%a 4022 %cvt = sitofp <8 x i32> %ld to <8 x float> 4023 ret <8 x float> %cvt 4024} 4025 4026define <8 x float> @sitofp_load_8i16_to_8f32(ptr%a) { 4027; SSE2-LABEL: sitofp_load_8i16_to_8f32: 4028; SSE2: # %bb.0: 4029; SSE2-NEXT: movdqa (%rdi), %xmm1 4030; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4031; SSE2-NEXT: psrad $16, %xmm0 4032; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 4033; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 4034; SSE2-NEXT: psrad $16, %xmm1 4035; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 4036; SSE2-NEXT: retq 4037; 4038; SSE41-LABEL: sitofp_load_8i16_to_8f32: 4039; SSE41: # %bb.0: 4040; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1 4041; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 4042; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 4043; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1 4044; SSE41-NEXT: retq 4045; 4046; AVX1-LABEL: sitofp_load_8i16_to_8f32: 4047; AVX1: # %bb.0: 4048; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm0 4049; AVX1-NEXT: vpmovsxwd (%rdi), %xmm1 4050; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4051; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 4052; AVX1-NEXT: retq 4053; 4054; AVX2-LABEL: sitofp_load_8i16_to_8f32: 4055; AVX2: # %bb.0: 4056; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0 4057; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 4058; AVX2-NEXT: retq 4059; 4060; AVX512-LABEL: sitofp_load_8i16_to_8f32: 4061; AVX512: # %bb.0: 4062; AVX512-NEXT: vpmovsxwd (%rdi), %ymm0 4063; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 4064; AVX512-NEXT: retq 4065 %ld = load <8 x i16>, ptr%a 4066 %cvt = sitofp <8 x i16> %ld to <8 x float> 4067 ret <8 x float> %cvt 4068} 4069 4070define <8 x float> @sitofp_load_8i8_to_8f32(ptr%a) { 4071; SSE2-LABEL: sitofp_load_8i8_to_8f32: 4072; SSE2: # %bb.0: 4073; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4074; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 4075; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4076; SSE2-NEXT: psrad $24, %xmm0 4077; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 4078; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 4079; SSE2-NEXT: psrad $24, %xmm1 4080; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 4081; SSE2-NEXT: retq 4082; 4083; SSE41-LABEL: sitofp_load_8i8_to_8f32: 4084; SSE41: # %bb.0: 4085; SSE41-NEXT: pmovsxbd 4(%rdi), %xmm1 4086; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 4087; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 4088; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1 4089; SSE41-NEXT: retq 4090; 4091; AVX1-LABEL: sitofp_load_8i8_to_8f32: 4092; AVX1: # %bb.0: 4093; AVX1-NEXT: vpmovsxbd 4(%rdi), %xmm0 4094; AVX1-NEXT: vpmovsxbd (%rdi), %xmm1 4095; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4096; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 4097; AVX1-NEXT: retq 4098; 4099; AVX2-LABEL: sitofp_load_8i8_to_8f32: 4100; AVX2: # %bb.0: 4101; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0 4102; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 4103; AVX2-NEXT: retq 4104; 4105; AVX512-LABEL: sitofp_load_8i8_to_8f32: 4106; AVX512: # %bb.0: 4107; AVX512-NEXT: vpmovsxbd (%rdi), %ymm0 4108; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 4109; AVX512-NEXT: retq 4110 %ld = load <8 x i8>, ptr%a 4111 %cvt = sitofp <8 x i8> %ld to <8 x float> 4112 ret <8 x float> %cvt 4113} 4114 4115; 4116; Load Unsigned Integer to Float 4117; 4118 4119define <4 x float> @uitofp_load_4i64_to_4f32(ptr%a) { 4120; SSE2-LABEL: uitofp_load_4i64_to_4f32: 4121; SSE2: # %bb.0: 4122; SSE2-NEXT: movq 24(%rdi), %rax 4123; SSE2-NEXT: testq %rax, %rax 4124; SSE2-NEXT: js .LBB83_1 4125; SSE2-NEXT: # %bb.2: 4126; SSE2-NEXT: cvtsi2ss %rax, %xmm0 4127; SSE2-NEXT: jmp .LBB83_3 4128; SSE2-NEXT: .LBB83_1: 4129; SSE2-NEXT: movq %rax, %rcx 4130; SSE2-NEXT: shrq %rcx 4131; SSE2-NEXT: andl $1, %eax 4132; SSE2-NEXT: orq %rcx, %rax 4133; SSE2-NEXT: cvtsi2ss %rax, %xmm0 4134; SSE2-NEXT: addss %xmm0, %xmm0 4135; SSE2-NEXT: .LBB83_3: 4136; SSE2-NEXT: movq 16(%rdi), %rax 4137; SSE2-NEXT: testq %rax, %rax 4138; SSE2-NEXT: js .LBB83_4 4139; SSE2-NEXT: # %bb.5: 4140; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4141; SSE2-NEXT: jmp .LBB83_6 4142; SSE2-NEXT: .LBB83_4: 4143; SSE2-NEXT: movq %rax, %rcx 4144; SSE2-NEXT: shrq %rcx 4145; SSE2-NEXT: andl $1, %eax 4146; SSE2-NEXT: orq %rcx, %rax 4147; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4148; SSE2-NEXT: addss %xmm1, %xmm1 4149; SSE2-NEXT: .LBB83_6: 4150; SSE2-NEXT: movq (%rdi), %rax 4151; SSE2-NEXT: movq 8(%rdi), %rcx 4152; SSE2-NEXT: testq %rcx, %rcx 4153; SSE2-NEXT: js .LBB83_7 4154; SSE2-NEXT: # %bb.8: 4155; SSE2-NEXT: cvtsi2ss %rcx, %xmm2 4156; SSE2-NEXT: jmp .LBB83_9 4157; SSE2-NEXT: .LBB83_7: 4158; SSE2-NEXT: movq %rcx, %rdx 4159; SSE2-NEXT: shrq %rdx 4160; SSE2-NEXT: andl $1, %ecx 4161; SSE2-NEXT: orq %rdx, %rcx 4162; SSE2-NEXT: cvtsi2ss %rcx, %xmm2 4163; SSE2-NEXT: addss %xmm2, %xmm2 4164; SSE2-NEXT: .LBB83_9: 4165; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 4166; SSE2-NEXT: testq %rax, %rax 4167; SSE2-NEXT: js .LBB83_10 4168; SSE2-NEXT: # %bb.11: 4169; SSE2-NEXT: xorps %xmm0, %xmm0 4170; SSE2-NEXT: cvtsi2ss %rax, %xmm0 4171; SSE2-NEXT: jmp .LBB83_12 4172; SSE2-NEXT: .LBB83_10: 4173; SSE2-NEXT: movq %rax, %rcx 4174; SSE2-NEXT: shrq %rcx 4175; SSE2-NEXT: andl $1, %eax 4176; SSE2-NEXT: orq %rcx, %rax 4177; SSE2-NEXT: xorps %xmm0, %xmm0 4178; SSE2-NEXT: cvtsi2ss %rax, %xmm0 4179; SSE2-NEXT: addss %xmm0, %xmm0 4180; SSE2-NEXT: .LBB83_12: 4181; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4182; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4183; SSE2-NEXT: retq 4184; 4185; SSE41-LABEL: uitofp_load_4i64_to_4f32: 4186; SSE41: # %bb.0: 4187; SSE41-NEXT: movdqa (%rdi), %xmm1 4188; SSE41-NEXT: movdqa 16(%rdi), %xmm2 4189; SSE41-NEXT: pmovsxbq {{.*#+}} xmm4 = [1,1] 4190; SSE41-NEXT: movdqa %xmm1, %xmm0 4191; SSE41-NEXT: pand %xmm4, %xmm0 4192; SSE41-NEXT: movdqa %xmm1, %xmm3 4193; SSE41-NEXT: psrlq $1, %xmm3 4194; SSE41-NEXT: por %xmm0, %xmm3 4195; SSE41-NEXT: movdqa %xmm1, %xmm5 4196; SSE41-NEXT: movdqa %xmm1, %xmm0 4197; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm5 4198; SSE41-NEXT: pextrq $1, %xmm5, %rax 4199; SSE41-NEXT: xorps %xmm0, %xmm0 4200; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4201; SSE41-NEXT: movq %xmm5, %rax 4202; SSE41-NEXT: xorps %xmm3, %xmm3 4203; SSE41-NEXT: cvtsi2ss %rax, %xmm3 4204; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[2,3] 4205; SSE41-NEXT: pand %xmm2, %xmm4 4206; SSE41-NEXT: movdqa %xmm2, %xmm5 4207; SSE41-NEXT: psrlq $1, %xmm5 4208; SSE41-NEXT: por %xmm4, %xmm5 4209; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3] 4210; SSE41-NEXT: movaps %xmm2, %xmm0 4211; SSE41-NEXT: blendvpd %xmm0, %xmm5, %xmm2 4212; SSE41-NEXT: movq %xmm2, %rax 4213; SSE41-NEXT: xorps %xmm0, %xmm0 4214; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4215; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1],xmm0[0],xmm3[3] 4216; SSE41-NEXT: pextrq $1, %xmm2, %rax 4217; SSE41-NEXT: xorps %xmm0, %xmm0 4218; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4219; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm0[0] 4220; SSE41-NEXT: movaps %xmm3, %xmm2 4221; SSE41-NEXT: addps %xmm3, %xmm2 4222; SSE41-NEXT: movaps %xmm1, %xmm0 4223; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3 4224; SSE41-NEXT: movaps %xmm3, %xmm0 4225; SSE41-NEXT: retq 4226; 4227; AVX1-LABEL: uitofp_load_4i64_to_4f32: 4228; AVX1: # %bb.0: 4229; AVX1-NEXT: vmovdqa (%rdi), %ymm0 4230; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm1 4231; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2 4232; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm3 4233; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 4234; AVX1-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 4235; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1 4236; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1 4237; AVX1-NEXT: vpextrq $1, %xmm1, %rax 4238; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 4239; AVX1-NEXT: vmovq %xmm1, %rax 4240; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4 4241; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 4242; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 4243; AVX1-NEXT: vmovq %xmm1, %rax 4244; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 4245; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 4246; AVX1-NEXT: vpextrq $1, %xmm1, %rax 4247; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1 4248; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] 4249; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3 4250; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 4251; AVX1-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0 4252; AVX1-NEXT: vzeroupper 4253; AVX1-NEXT: retq 4254; 4255; AVX2-LABEL: uitofp_load_4i64_to_4f32: 4256; AVX2: # %bb.0: 4257; AVX2-NEXT: vmovdqa (%rdi), %ymm0 4258; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,1,1,1] 4259; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm1 4260; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm2 4261; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1 4262; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1 4263; AVX2-NEXT: vpextrq $1, %xmm1, %rax 4264; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 4265; AVX2-NEXT: vmovq %xmm1, %rax 4266; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3 4267; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 4268; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 4269; AVX2-NEXT: vmovq %xmm1, %rax 4270; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 4271; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 4272; AVX2-NEXT: vpextrq $1, %xmm1, %rax 4273; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1 4274; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0] 4275; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2 4276; AVX2-NEXT: vpackssdw 16(%rdi), %xmm0, %xmm0 4277; AVX2-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 4278; AVX2-NEXT: vzeroupper 4279; AVX2-NEXT: retq 4280; 4281; AVX512F-LABEL: uitofp_load_4i64_to_4f32: 4282; AVX512F: # %bb.0: 4283; AVX512F-NEXT: vcvtusi2ssq 8(%rdi), %xmm0, %xmm0 4284; AVX512F-NEXT: vcvtusi2ssq (%rdi), %xmm1, %xmm1 4285; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 4286; AVX512F-NEXT: vcvtusi2ssq 16(%rdi), %xmm2, %xmm1 4287; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 4288; AVX512F-NEXT: vcvtusi2ssq 24(%rdi), %xmm2, %xmm1 4289; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 4290; AVX512F-NEXT: retq 4291; 4292; AVX512VL-LABEL: uitofp_load_4i64_to_4f32: 4293; AVX512VL: # %bb.0: 4294; AVX512VL-NEXT: vcvtusi2ssq 8(%rdi), %xmm0, %xmm0 4295; AVX512VL-NEXT: vcvtusi2ssq (%rdi), %xmm1, %xmm1 4296; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 4297; AVX512VL-NEXT: vcvtusi2ssq 16(%rdi), %xmm2, %xmm1 4298; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 4299; AVX512VL-NEXT: vcvtusi2ssq 24(%rdi), %xmm2, %xmm1 4300; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 4301; AVX512VL-NEXT: retq 4302; 4303; AVX512DQ-LABEL: uitofp_load_4i64_to_4f32: 4304; AVX512DQ: # %bb.0: 4305; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0 4306; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 4307; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 4308; AVX512DQ-NEXT: vzeroupper 4309; AVX512DQ-NEXT: retq 4310; 4311; AVX512VLDQ-LABEL: uitofp_load_4i64_to_4f32: 4312; AVX512VLDQ: # %bb.0: 4313; AVX512VLDQ-NEXT: vcvtuqq2psy (%rdi), %xmm0 4314; AVX512VLDQ-NEXT: retq 4315 %ld = load <4 x i64>, ptr%a 4316 %cvt = uitofp <4 x i64> %ld to <4 x float> 4317 ret <4 x float> %cvt 4318} 4319 4320define <4 x float> @uitofp_load_4i32_to_4f32(ptr%a) { 4321; SSE2-LABEL: uitofp_load_4i32_to_4f32: 4322; SSE2: # %bb.0: 4323; SSE2-NEXT: movdqa (%rdi), %xmm0 4324; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] 4325; SSE2-NEXT: pand %xmm0, %xmm1 4326; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 4327; SSE2-NEXT: psrld $16, %xmm0 4328; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 4329; SSE2-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 4330; SSE2-NEXT: addps %xmm1, %xmm0 4331; SSE2-NEXT: retq 4332; 4333; SSE41-LABEL: uitofp_load_4i32_to_4f32: 4334; SSE41: # %bb.0: 4335; SSE41-NEXT: movdqa (%rdi), %xmm0 4336; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200] 4337; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 4338; SSE41-NEXT: psrld $16, %xmm0 4339; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 4340; SSE41-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 4341; SSE41-NEXT: addps %xmm1, %xmm0 4342; SSE41-NEXT: retq 4343; 4344; AVX1-LABEL: uitofp_load_4i32_to_4f32: 4345; AVX1: # %bb.0: 4346; AVX1-NEXT: vmovdqa (%rdi), %xmm0 4347; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 4348; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 4349; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 4350; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 4351; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0 4352; AVX1-NEXT: retq 4353; 4354; AVX2-LABEL: uitofp_load_4i32_to_4f32: 4355; AVX2: # %bb.0: 4356; AVX2-NEXT: vmovdqa (%rdi), %xmm0 4357; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1258291200,1258291200,1258291200,1258291200] 4358; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 4359; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0 4360; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1392508928,1392508928,1392508928,1392508928] 4361; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 4362; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 4363; AVX2-NEXT: vsubps %xmm2, %xmm0, %xmm0 4364; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0 4365; AVX2-NEXT: retq 4366; 4367; AVX512F-LABEL: uitofp_load_4i32_to_4f32: 4368; AVX512F: # %bb.0: 4369; AVX512F-NEXT: vmovaps (%rdi), %xmm0 4370; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 4371; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 4372; AVX512F-NEXT: vzeroupper 4373; AVX512F-NEXT: retq 4374; 4375; AVX512VL-LABEL: uitofp_load_4i32_to_4f32: 4376; AVX512VL: # %bb.0: 4377; AVX512VL-NEXT: vcvtudq2ps (%rdi), %xmm0 4378; AVX512VL-NEXT: retq 4379; 4380; AVX512DQ-LABEL: uitofp_load_4i32_to_4f32: 4381; AVX512DQ: # %bb.0: 4382; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 4383; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 4384; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 4385; AVX512DQ-NEXT: vzeroupper 4386; AVX512DQ-NEXT: retq 4387; 4388; AVX512VLDQ-LABEL: uitofp_load_4i32_to_4f32: 4389; AVX512VLDQ: # %bb.0: 4390; AVX512VLDQ-NEXT: vcvtudq2ps (%rdi), %xmm0 4391; AVX512VLDQ-NEXT: retq 4392 %ld = load <4 x i32>, ptr%a 4393 %cvt = uitofp <4 x i32> %ld to <4 x float> 4394 ret <4 x float> %cvt 4395} 4396 4397define <4 x float> @uitofp_load_4i16_to_4f32(ptr%a) { 4398; SSE2-LABEL: uitofp_load_4i16_to_4f32: 4399; SSE2: # %bb.0: 4400; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4401; SSE2-NEXT: pxor %xmm1, %xmm1 4402; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4403; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 4404; SSE2-NEXT: retq 4405; 4406; SSE41-LABEL: uitofp_load_4i16_to_4f32: 4407; SSE41: # %bb.0: 4408; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 4409; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 4410; SSE41-NEXT: retq 4411; 4412; AVX-LABEL: uitofp_load_4i16_to_4f32: 4413; AVX: # %bb.0: 4414; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 4415; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 4416; AVX-NEXT: retq 4417 %ld = load <4 x i16>, ptr%a 4418 %cvt = uitofp <4 x i16> %ld to <4 x float> 4419 ret <4 x float> %cvt 4420} 4421 4422define <4 x float> @uitofp_load_4i8_to_4f32(ptr%a) { 4423; SSE2-LABEL: uitofp_load_4i8_to_4f32: 4424; SSE2: # %bb.0: 4425; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 4426; SSE2-NEXT: pxor %xmm1, %xmm1 4427; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 4428; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4429; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 4430; SSE2-NEXT: retq 4431; 4432; SSE41-LABEL: uitofp_load_4i8_to_4f32: 4433; SSE41: # %bb.0: 4434; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 4435; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 4436; SSE41-NEXT: retq 4437; 4438; AVX-LABEL: uitofp_load_4i8_to_4f32: 4439; AVX: # %bb.0: 4440; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 4441; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 4442; AVX-NEXT: retq 4443 %ld = load <4 x i8>, ptr%a 4444 %cvt = uitofp <4 x i8> %ld to <4 x float> 4445 ret <4 x float> %cvt 4446} 4447 4448define <8 x float> @uitofp_load_8i64_to_8f32(ptr%a) { 4449; SSE2-LABEL: uitofp_load_8i64_to_8f32: 4450; SSE2: # %bb.0: 4451; SSE2-NEXT: movq 24(%rdi), %rax 4452; SSE2-NEXT: testq %rax, %rax 4453; SSE2-NEXT: js .LBB87_1 4454; SSE2-NEXT: # %bb.2: 4455; SSE2-NEXT: cvtsi2ss %rax, %xmm2 4456; SSE2-NEXT: jmp .LBB87_3 4457; SSE2-NEXT: .LBB87_1: 4458; SSE2-NEXT: movq %rax, %rcx 4459; SSE2-NEXT: shrq %rcx 4460; SSE2-NEXT: andl $1, %eax 4461; SSE2-NEXT: orq %rcx, %rax 4462; SSE2-NEXT: cvtsi2ss %rax, %xmm2 4463; SSE2-NEXT: addss %xmm2, %xmm2 4464; SSE2-NEXT: .LBB87_3: 4465; SSE2-NEXT: movq 16(%rdi), %rax 4466; SSE2-NEXT: testq %rax, %rax 4467; SSE2-NEXT: js .LBB87_4 4468; SSE2-NEXT: # %bb.5: 4469; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4470; SSE2-NEXT: jmp .LBB87_6 4471; SSE2-NEXT: .LBB87_4: 4472; SSE2-NEXT: movq %rax, %rcx 4473; SSE2-NEXT: shrq %rcx 4474; SSE2-NEXT: andl $1, %eax 4475; SSE2-NEXT: orq %rcx, %rax 4476; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4477; SSE2-NEXT: addss %xmm1, %xmm1 4478; SSE2-NEXT: .LBB87_6: 4479; SSE2-NEXT: movq (%rdi), %rax 4480; SSE2-NEXT: movq 8(%rdi), %rcx 4481; SSE2-NEXT: testq %rcx, %rcx 4482; SSE2-NEXT: js .LBB87_7 4483; SSE2-NEXT: # %bb.8: 4484; SSE2-NEXT: cvtsi2ss %rcx, %xmm3 4485; SSE2-NEXT: testq %rax, %rax 4486; SSE2-NEXT: jns .LBB87_11 4487; SSE2-NEXT: .LBB87_10: 4488; SSE2-NEXT: movq %rax, %rcx 4489; SSE2-NEXT: shrq %rcx 4490; SSE2-NEXT: andl $1, %eax 4491; SSE2-NEXT: orq %rcx, %rax 4492; SSE2-NEXT: cvtsi2ss %rax, %xmm0 4493; SSE2-NEXT: addss %xmm0, %xmm0 4494; SSE2-NEXT: jmp .LBB87_12 4495; SSE2-NEXT: .LBB87_7: 4496; SSE2-NEXT: movq %rcx, %rdx 4497; SSE2-NEXT: shrq %rdx 4498; SSE2-NEXT: andl $1, %ecx 4499; SSE2-NEXT: orq %rdx, %rcx 4500; SSE2-NEXT: cvtsi2ss %rcx, %xmm3 4501; SSE2-NEXT: addss %xmm3, %xmm3 4502; SSE2-NEXT: testq %rax, %rax 4503; SSE2-NEXT: js .LBB87_10 4504; SSE2-NEXT: .LBB87_11: 4505; SSE2-NEXT: cvtsi2ss %rax, %xmm0 4506; SSE2-NEXT: .LBB87_12: 4507; SSE2-NEXT: movq 56(%rdi), %rax 4508; SSE2-NEXT: testq %rax, %rax 4509; SSE2-NEXT: js .LBB87_13 4510; SSE2-NEXT: # %bb.14: 4511; SSE2-NEXT: cvtsi2ss %rax, %xmm5 4512; SSE2-NEXT: jmp .LBB87_15 4513; SSE2-NEXT: .LBB87_13: 4514; SSE2-NEXT: movq %rax, %rcx 4515; SSE2-NEXT: shrq %rcx 4516; SSE2-NEXT: andl $1, %eax 4517; SSE2-NEXT: orq %rcx, %rax 4518; SSE2-NEXT: cvtsi2ss %rax, %xmm5 4519; SSE2-NEXT: addss %xmm5, %xmm5 4520; SSE2-NEXT: .LBB87_15: 4521; SSE2-NEXT: movq 48(%rdi), %rax 4522; SSE2-NEXT: testq %rax, %rax 4523; SSE2-NEXT: js .LBB87_16 4524; SSE2-NEXT: # %bb.17: 4525; SSE2-NEXT: cvtsi2ss %rax, %xmm4 4526; SSE2-NEXT: jmp .LBB87_18 4527; SSE2-NEXT: .LBB87_16: 4528; SSE2-NEXT: movq %rax, %rcx 4529; SSE2-NEXT: shrq %rcx 4530; SSE2-NEXT: andl $1, %eax 4531; SSE2-NEXT: orq %rcx, %rax 4532; SSE2-NEXT: cvtsi2ss %rax, %xmm4 4533; SSE2-NEXT: addss %xmm4, %xmm4 4534; SSE2-NEXT: .LBB87_18: 4535; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4536; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 4537; SSE2-NEXT: movq 40(%rdi), %rax 4538; SSE2-NEXT: testq %rax, %rax 4539; SSE2-NEXT: js .LBB87_19 4540; SSE2-NEXT: # %bb.20: 4541; SSE2-NEXT: xorps %xmm2, %xmm2 4542; SSE2-NEXT: cvtsi2ss %rax, %xmm2 4543; SSE2-NEXT: jmp .LBB87_21 4544; SSE2-NEXT: .LBB87_19: 4545; SSE2-NEXT: movq %rax, %rcx 4546; SSE2-NEXT: shrq %rcx 4547; SSE2-NEXT: andl $1, %eax 4548; SSE2-NEXT: orq %rcx, %rax 4549; SSE2-NEXT: xorps %xmm2, %xmm2 4550; SSE2-NEXT: cvtsi2ss %rax, %xmm2 4551; SSE2-NEXT: addss %xmm2, %xmm2 4552; SSE2-NEXT: .LBB87_21: 4553; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4554; SSE2-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] 4555; SSE2-NEXT: movq 32(%rdi), %rax 4556; SSE2-NEXT: testq %rax, %rax 4557; SSE2-NEXT: js .LBB87_22 4558; SSE2-NEXT: # %bb.23: 4559; SSE2-NEXT: xorps %xmm1, %xmm1 4560; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4561; SSE2-NEXT: jmp .LBB87_24 4562; SSE2-NEXT: .LBB87_22: 4563; SSE2-NEXT: movq %rax, %rcx 4564; SSE2-NEXT: shrq %rcx 4565; SSE2-NEXT: andl $1, %eax 4566; SSE2-NEXT: orq %rcx, %rax 4567; SSE2-NEXT: xorps %xmm1, %xmm1 4568; SSE2-NEXT: cvtsi2ss %rax, %xmm1 4569; SSE2-NEXT: addss %xmm1, %xmm1 4570; SSE2-NEXT: .LBB87_24: 4571; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4572; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] 4573; SSE2-NEXT: retq 4574; 4575; SSE41-LABEL: uitofp_load_8i64_to_8f32: 4576; SSE41: # %bb.0: 4577; SSE41-NEXT: movdqa (%rdi), %xmm4 4578; SSE41-NEXT: movdqa 16(%rdi), %xmm5 4579; SSE41-NEXT: movdqa 32(%rdi), %xmm6 4580; SSE41-NEXT: movdqa 48(%rdi), %xmm2 4581; SSE41-NEXT: pmovsxbq {{.*#+}} xmm7 = [1,1] 4582; SSE41-NEXT: movdqa %xmm4, %xmm0 4583; SSE41-NEXT: pand %xmm7, %xmm0 4584; SSE41-NEXT: movdqa %xmm4, %xmm1 4585; SSE41-NEXT: psrlq $1, %xmm1 4586; SSE41-NEXT: por %xmm0, %xmm1 4587; SSE41-NEXT: movdqa %xmm4, %xmm3 4588; SSE41-NEXT: movdqa %xmm4, %xmm0 4589; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm3 4590; SSE41-NEXT: pextrq $1, %xmm3, %rax 4591; SSE41-NEXT: xorps %xmm0, %xmm0 4592; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4593; SSE41-NEXT: movq %xmm3, %rax 4594; SSE41-NEXT: xorps %xmm3, %xmm3 4595; SSE41-NEXT: cvtsi2ss %rax, %xmm3 4596; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[2,3] 4597; SSE41-NEXT: movdqa %xmm5, %xmm0 4598; SSE41-NEXT: pand %xmm7, %xmm0 4599; SSE41-NEXT: movdqa %xmm5, %xmm1 4600; SSE41-NEXT: psrlq $1, %xmm1 4601; SSE41-NEXT: por %xmm0, %xmm1 4602; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm5[1,3] 4603; SSE41-NEXT: movaps %xmm5, %xmm0 4604; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm5 4605; SSE41-NEXT: movq %xmm5, %rax 4606; SSE41-NEXT: xorps %xmm0, %xmm0 4607; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4608; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1],xmm0[0],xmm3[3] 4609; SSE41-NEXT: pextrq $1, %xmm5, %rax 4610; SSE41-NEXT: xorps %xmm0, %xmm0 4611; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4612; SSE41-NEXT: insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm0[0] 4613; SSE41-NEXT: movaps %xmm3, %xmm1 4614; SSE41-NEXT: addps %xmm3, %xmm1 4615; SSE41-NEXT: movaps %xmm4, %xmm0 4616; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm3 4617; SSE41-NEXT: movdqa %xmm6, %xmm0 4618; SSE41-NEXT: pand %xmm7, %xmm0 4619; SSE41-NEXT: movdqa %xmm6, %xmm1 4620; SSE41-NEXT: psrlq $1, %xmm1 4621; SSE41-NEXT: por %xmm0, %xmm1 4622; SSE41-NEXT: movdqa %xmm6, %xmm4 4623; SSE41-NEXT: movdqa %xmm6, %xmm0 4624; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm4 4625; SSE41-NEXT: pextrq $1, %xmm4, %rax 4626; SSE41-NEXT: xorps %xmm0, %xmm0 4627; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4628; SSE41-NEXT: movq %xmm4, %rax 4629; SSE41-NEXT: xorps %xmm1, %xmm1 4630; SSE41-NEXT: cvtsi2ss %rax, %xmm1 4631; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3] 4632; SSE41-NEXT: pand %xmm2, %xmm7 4633; SSE41-NEXT: movdqa %xmm2, %xmm4 4634; SSE41-NEXT: psrlq $1, %xmm4 4635; SSE41-NEXT: por %xmm7, %xmm4 4636; SSE41-NEXT: shufps {{.*#+}} xmm6 = xmm6[1,3],xmm2[1,3] 4637; SSE41-NEXT: movaps %xmm2, %xmm0 4638; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 4639; SSE41-NEXT: movq %xmm2, %rax 4640; SSE41-NEXT: xorps %xmm0, %xmm0 4641; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4642; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3] 4643; SSE41-NEXT: pextrq $1, %xmm2, %rax 4644; SSE41-NEXT: xorps %xmm0, %xmm0 4645; SSE41-NEXT: cvtsi2ss %rax, %xmm0 4646; SSE41-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0] 4647; SSE41-NEXT: movaps %xmm1, %xmm2 4648; SSE41-NEXT: addps %xmm1, %xmm2 4649; SSE41-NEXT: movaps %xmm6, %xmm0 4650; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 4651; SSE41-NEXT: movaps %xmm3, %xmm0 4652; SSE41-NEXT: retq 4653; 4654; AVX1-LABEL: uitofp_load_8i64_to_8f32: 4655; AVX1: # %bb.0: 4656; AVX1-NEXT: vmovaps (%rdi), %ymm0 4657; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 4658; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1,1,1,1] 4659; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm3 4660; AVX1-NEXT: vpsrlq $1, %xmm1, %xmm4 4661; AVX1-NEXT: vmovdqa 48(%rdi), %xmm5 4662; AVX1-NEXT: vpsrlq $1, %xmm5, %xmm6 4663; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 4664; AVX1-NEXT: vorps %ymm3, %ymm4, %ymm3 4665; AVX1-NEXT: vblendvpd %ymm1, %ymm3, %ymm1, %ymm3 4666; AVX1-NEXT: vpextrq $1, %xmm3, %rax 4667; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm4 4668; AVX1-NEXT: vmovq %xmm3, %rax 4669; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm6 4670; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = xmm6[0],xmm4[0],xmm6[2,3] 4671; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 4672; AVX1-NEXT: vmovq %xmm3, %rax 4673; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm6 4674; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3] 4675; AVX1-NEXT: vpextrq $1, %xmm3, %rax 4676; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm3 4677; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0,1,2],xmm3[0] 4678; AVX1-NEXT: vaddps %xmm3, %xmm3, %xmm4 4679; AVX1-NEXT: vpackssdw %xmm5, %xmm1, %xmm1 4680; AVX1-NEXT: vblendvps %xmm1, %xmm4, %xmm3, %xmm1 4681; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm2 4682; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm3 4683; AVX1-NEXT: vmovdqa 16(%rdi), %xmm4 4684; AVX1-NEXT: vpsrlq $1, %xmm4, %xmm5 4685; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm3, %ymm3 4686; AVX1-NEXT: vorps %ymm2, %ymm3, %ymm2 4687; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm2 4688; AVX1-NEXT: vpextrq $1, %xmm2, %rax 4689; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm3 4690; AVX1-NEXT: vmovq %xmm2, %rax 4691; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm5 4692; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[2,3] 4693; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 4694; AVX1-NEXT: vmovq %xmm2, %rax 4695; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm5 4696; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm5[0],xmm3[3] 4697; AVX1-NEXT: vpextrq $1, %xmm2, %rax 4698; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm2 4699; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0] 4700; AVX1-NEXT: vaddps %xmm2, %xmm2, %xmm3 4701; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm0 4702; AVX1-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0 4703; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4704; AVX1-NEXT: retq 4705; 4706; AVX2-LABEL: uitofp_load_8i64_to_8f32: 4707; AVX2: # %bb.0: 4708; AVX2-NEXT: vmovaps (%rdi), %ymm0 4709; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1 4710; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1] 4711; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm3 4712; AVX2-NEXT: vpsrlq $1, %ymm1, %ymm4 4713; AVX2-NEXT: vpor %ymm3, %ymm4, %ymm3 4714; AVX2-NEXT: vblendvpd %ymm1, %ymm3, %ymm1, %ymm3 4715; AVX2-NEXT: vpextrq $1, %xmm3, %rax 4716; AVX2-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 4717; AVX2-NEXT: vmovq %xmm3, %rax 4718; AVX2-NEXT: vcvtsi2ss %rax, %xmm5, %xmm5 4719; AVX2-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[2,3] 4720; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm3 4721; AVX2-NEXT: vmovq %xmm3, %rax 4722; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm5 4723; AVX2-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0],xmm4[3] 4724; AVX2-NEXT: vpextrq $1, %xmm3, %rax 4725; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm3 4726; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0,1,2],xmm3[0] 4727; AVX2-NEXT: vaddps %xmm3, %xmm3, %xmm4 4728; AVX2-NEXT: vpackssdw 48(%rdi), %xmm1, %xmm1 4729; AVX2-NEXT: vblendvps %xmm1, %xmm4, %xmm3, %xmm1 4730; AVX2-NEXT: vandps %ymm2, %ymm0, %ymm2 4731; AVX2-NEXT: vpsrlq $1, %ymm0, %ymm3 4732; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2 4733; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm2 4734; AVX2-NEXT: vpextrq $1, %xmm2, %rax 4735; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm3 4736; AVX2-NEXT: vmovq %xmm2, %rax 4737; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm4 4738; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 4739; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 4740; AVX2-NEXT: vmovq %xmm2, %rax 4741; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm4 4742; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 4743; AVX2-NEXT: vpextrq $1, %xmm2, %rax 4744; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm2 4745; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0] 4746; AVX2-NEXT: vaddps %xmm2, %xmm2, %xmm3 4747; AVX2-NEXT: vpackssdw 16(%rdi), %xmm0, %xmm0 4748; AVX2-NEXT: vblendvps %xmm0, %xmm3, %xmm2, %xmm0 4749; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4750; AVX2-NEXT: retq 4751; 4752; AVX512F-LABEL: uitofp_load_8i64_to_8f32: 4753; AVX512F: # %bb.0: 4754; AVX512F-NEXT: vcvtusi2ssq 40(%rdi), %xmm0, %xmm0 4755; AVX512F-NEXT: vcvtusi2ssq 32(%rdi), %xmm1, %xmm1 4756; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 4757; AVX512F-NEXT: vcvtusi2ssq 48(%rdi), %xmm2, %xmm1 4758; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 4759; AVX512F-NEXT: vcvtusi2ssq 56(%rdi), %xmm2, %xmm1 4760; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 4761; AVX512F-NEXT: vcvtusi2ssq 8(%rdi), %xmm2, %xmm1 4762; AVX512F-NEXT: vcvtusi2ssq (%rdi), %xmm2, %xmm2 4763; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 4764; AVX512F-NEXT: vcvtusi2ssq 16(%rdi), %xmm3, %xmm2 4765; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 4766; AVX512F-NEXT: vcvtusi2ssq 24(%rdi), %xmm3, %xmm2 4767; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] 4768; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4769; AVX512F-NEXT: retq 4770; 4771; AVX512VL-LABEL: uitofp_load_8i64_to_8f32: 4772; AVX512VL: # %bb.0: 4773; AVX512VL-NEXT: vcvtusi2ssq 40(%rdi), %xmm0, %xmm0 4774; AVX512VL-NEXT: vcvtusi2ssq 32(%rdi), %xmm1, %xmm1 4775; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 4776; AVX512VL-NEXT: vcvtusi2ssq 48(%rdi), %xmm2, %xmm1 4777; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 4778; AVX512VL-NEXT: vcvtusi2ssq 56(%rdi), %xmm2, %xmm1 4779; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 4780; AVX512VL-NEXT: vcvtusi2ssq 8(%rdi), %xmm2, %xmm1 4781; AVX512VL-NEXT: vcvtusi2ssq (%rdi), %xmm2, %xmm2 4782; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 4783; AVX512VL-NEXT: vcvtusi2ssq 16(%rdi), %xmm3, %xmm2 4784; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 4785; AVX512VL-NEXT: vcvtusi2ssq 24(%rdi), %xmm3, %xmm2 4786; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] 4787; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4788; AVX512VL-NEXT: retq 4789; 4790; AVX512DQ-LABEL: uitofp_load_8i64_to_8f32: 4791; AVX512DQ: # %bb.0: 4792; AVX512DQ-NEXT: vcvtuqq2ps (%rdi), %ymm0 4793; AVX512DQ-NEXT: retq 4794; 4795; AVX512VLDQ-LABEL: uitofp_load_8i64_to_8f32: 4796; AVX512VLDQ: # %bb.0: 4797; AVX512VLDQ-NEXT: vcvtuqq2ps (%rdi), %ymm0 4798; AVX512VLDQ-NEXT: retq 4799 %ld = load <8 x i64>, ptr%a 4800 %cvt = uitofp <8 x i64> %ld to <8 x float> 4801 ret <8 x float> %cvt 4802} 4803 4804define <8 x float> @uitofp_load_8i32_to_8f32(ptr%a) { 4805; SSE2-LABEL: uitofp_load_8i32_to_8f32: 4806; SSE2: # %bb.0: 4807; SSE2-NEXT: movdqa (%rdi), %xmm0 4808; SSE2-NEXT: movdqa 16(%rdi), %xmm1 4809; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535] 4810; SSE2-NEXT: movdqa %xmm0, %xmm3 4811; SSE2-NEXT: pand %xmm2, %xmm3 4812; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200] 4813; SSE2-NEXT: por %xmm4, %xmm3 4814; SSE2-NEXT: psrld $16, %xmm0 4815; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928] 4816; SSE2-NEXT: por %xmm5, %xmm0 4817; SSE2-NEXT: movaps {{.*#+}} xmm6 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 4818; SSE2-NEXT: subps %xmm6, %xmm0 4819; SSE2-NEXT: addps %xmm3, %xmm0 4820; SSE2-NEXT: pand %xmm1, %xmm2 4821; SSE2-NEXT: por %xmm4, %xmm2 4822; SSE2-NEXT: psrld $16, %xmm1 4823; SSE2-NEXT: por %xmm5, %xmm1 4824; SSE2-NEXT: subps %xmm6, %xmm1 4825; SSE2-NEXT: addps %xmm2, %xmm1 4826; SSE2-NEXT: retq 4827; 4828; SSE41-LABEL: uitofp_load_8i32_to_8f32: 4829; SSE41: # %bb.0: 4830; SSE41-NEXT: movdqa (%rdi), %xmm0 4831; SSE41-NEXT: movdqa 16(%rdi), %xmm1 4832; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1258291200,1258291200,1258291200,1258291200] 4833; SSE41-NEXT: movdqa %xmm0, %xmm3 4834; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7] 4835; SSE41-NEXT: psrld $16, %xmm0 4836; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [1392508928,1392508928,1392508928,1392508928] 4837; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7] 4838; SSE41-NEXT: movaps {{.*#+}} xmm5 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 4839; SSE41-NEXT: subps %xmm5, %xmm0 4840; SSE41-NEXT: addps %xmm3, %xmm0 4841; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 4842; SSE41-NEXT: psrld $16, %xmm1 4843; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7] 4844; SSE41-NEXT: subps %xmm5, %xmm1 4845; SSE41-NEXT: addps %xmm2, %xmm1 4846; SSE41-NEXT: retq 4847; 4848; AVX1-LABEL: uitofp_load_8i32_to_8f32: 4849; AVX1: # %bb.0: 4850; AVX1-NEXT: vmovdqa (%rdi), %ymm0 4851; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 4852; AVX1-NEXT: vmovdqa 16(%rdi), %xmm2 4853; AVX1-NEXT: vpsrld $16, %xmm2, %xmm2 4854; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 4855; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 4856; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 4857; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 4858; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 4859; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0 4860; AVX1-NEXT: retq 4861; 4862; AVX2-LABEL: uitofp_load_8i32_to_8f32: 4863; AVX2: # %bb.0: 4864; AVX2-NEXT: vmovdqa (%rdi), %ymm0 4865; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200] 4866; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 4867; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 4868; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928] 4869; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] 4870; AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11,5.49764202E+11] 4871; AVX2-NEXT: vsubps %ymm2, %ymm0, %ymm0 4872; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0 4873; AVX2-NEXT: retq 4874; 4875; AVX512F-LABEL: uitofp_load_8i32_to_8f32: 4876; AVX512F: # %bb.0: 4877; AVX512F-NEXT: vmovaps (%rdi), %ymm0 4878; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 4879; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 4880; AVX512F-NEXT: retq 4881; 4882; AVX512VL-LABEL: uitofp_load_8i32_to_8f32: 4883; AVX512VL: # %bb.0: 4884; AVX512VL-NEXT: vcvtudq2ps (%rdi), %ymm0 4885; AVX512VL-NEXT: retq 4886; 4887; AVX512DQ-LABEL: uitofp_load_8i32_to_8f32: 4888; AVX512DQ: # %bb.0: 4889; AVX512DQ-NEXT: vmovaps (%rdi), %ymm0 4890; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 4891; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 4892; AVX512DQ-NEXT: retq 4893; 4894; AVX512VLDQ-LABEL: uitofp_load_8i32_to_8f32: 4895; AVX512VLDQ: # %bb.0: 4896; AVX512VLDQ-NEXT: vcvtudq2ps (%rdi), %ymm0 4897; AVX512VLDQ-NEXT: retq 4898 %ld = load <8 x i32>, ptr%a 4899 %cvt = uitofp <8 x i32> %ld to <8 x float> 4900 ret <8 x float> %cvt 4901} 4902 4903define <8 x float> @uitofp_load_8i16_to_8f32(ptr%a) { 4904; SSE2-LABEL: uitofp_load_8i16_to_8f32: 4905; SSE2: # %bb.0: 4906; SSE2-NEXT: movdqa (%rdi), %xmm1 4907; SSE2-NEXT: pxor %xmm2, %xmm2 4908; SSE2-NEXT: movdqa %xmm1, %xmm0 4909; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 4910; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 4911; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 4912; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 4913; SSE2-NEXT: retq 4914; 4915; SSE41-LABEL: uitofp_load_8i16_to_8f32: 4916; SSE41: # %bb.0: 4917; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 4918; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 4919; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 4920; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1 4921; SSE41-NEXT: retq 4922; 4923; AVX1-LABEL: uitofp_load_8i16_to_8f32: 4924; AVX1: # %bb.0: 4925; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 4926; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 4927; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4928; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 4929; AVX1-NEXT: retq 4930; 4931; AVX2-LABEL: uitofp_load_8i16_to_8f32: 4932; AVX2: # %bb.0: 4933; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 4934; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 4935; AVX2-NEXT: retq 4936; 4937; AVX512-LABEL: uitofp_load_8i16_to_8f32: 4938; AVX512: # %bb.0: 4939; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 4940; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 4941; AVX512-NEXT: retq 4942 %ld = load <8 x i16>, ptr%a 4943 %cvt = uitofp <8 x i16> %ld to <8 x float> 4944 ret <8 x float> %cvt 4945} 4946 4947define <8 x float> @uitofp_load_8i8_to_8f32(ptr%a) { 4948; SSE2-LABEL: uitofp_load_8i8_to_8f32: 4949; SSE2: # %bb.0: 4950; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 4951; SSE2-NEXT: pxor %xmm2, %xmm2 4952; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 4953; SSE2-NEXT: movdqa %xmm1, %xmm0 4954; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 4955; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 4956; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 4957; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 4958; SSE2-NEXT: retq 4959; 4960; SSE41-LABEL: uitofp_load_8i8_to_8f32: 4961; SSE41: # %bb.0: 4962; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 4963; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 4964; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 4965; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1 4966; SSE41-NEXT: retq 4967; 4968; AVX1-LABEL: uitofp_load_8i8_to_8f32: 4969; AVX1: # %bb.0: 4970; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 4971; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 4972; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4973; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 4974; AVX1-NEXT: retq 4975; 4976; AVX2-LABEL: uitofp_load_8i8_to_8f32: 4977; AVX2: # %bb.0: 4978; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 4979; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 4980; AVX2-NEXT: retq 4981; 4982; AVX512-LABEL: uitofp_load_8i8_to_8f32: 4983; AVX512: # %bb.0: 4984; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 4985; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 4986; AVX512-NEXT: retq 4987 %ld = load <8 x i8>, ptr%a 4988 %cvt = uitofp <8 x i8> %ld to <8 x float> 4989 ret <8 x float> %cvt 4990} 4991 4992; 4993; Aggregates 4994; 4995 4996%Arguments = type <{ <8 x i8>, <8 x i16>, ptr }> 4997define void @aggregate_sitofp_8i16_to_8f32(ptr nocapture readonly %a0) { 4998; SSE2-LABEL: aggregate_sitofp_8i16_to_8f32: 4999; SSE2: # %bb.0: 5000; SSE2-NEXT: movq 24(%rdi), %rax 5001; SSE2-NEXT: movdqu 8(%rdi), %xmm0 5002; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 5003; SSE2-NEXT: psrad $16, %xmm1 5004; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1 5005; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 5006; SSE2-NEXT: psrad $16, %xmm0 5007; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 5008; SSE2-NEXT: movaps %xmm0, 16(%rax) 5009; SSE2-NEXT: movaps %xmm1, (%rax) 5010; SSE2-NEXT: retq 5011; 5012; SSE41-LABEL: aggregate_sitofp_8i16_to_8f32: 5013; SSE41: # %bb.0: 5014; SSE41-NEXT: movq 24(%rdi), %rax 5015; SSE41-NEXT: pmovsxwd 16(%rdi), %xmm0 5016; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1 5017; SSE41-NEXT: cvtdq2ps %xmm1, %xmm1 5018; SSE41-NEXT: cvtdq2ps %xmm0, %xmm0 5019; SSE41-NEXT: movaps %xmm0, 16(%rax) 5020; SSE41-NEXT: movaps %xmm1, (%rax) 5021; SSE41-NEXT: retq 5022; 5023; AVX1-LABEL: aggregate_sitofp_8i16_to_8f32: 5024; AVX1: # %bb.0: 5025; AVX1-NEXT: movq 24(%rdi), %rax 5026; AVX1-NEXT: vpmovsxwd 16(%rdi), %xmm0 5027; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm1 5028; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 5029; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 5030; AVX1-NEXT: vmovaps %ymm0, (%rax) 5031; AVX1-NEXT: vzeroupper 5032; AVX1-NEXT: retq 5033; 5034; AVX2-LABEL: aggregate_sitofp_8i16_to_8f32: 5035; AVX2: # %bb.0: 5036; AVX2-NEXT: movq 24(%rdi), %rax 5037; AVX2-NEXT: vpmovsxwd 8(%rdi), %ymm0 5038; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 5039; AVX2-NEXT: vmovaps %ymm0, (%rax) 5040; AVX2-NEXT: vzeroupper 5041; AVX2-NEXT: retq 5042; 5043; AVX512-LABEL: aggregate_sitofp_8i16_to_8f32: 5044; AVX512: # %bb.0: 5045; AVX512-NEXT: movq 24(%rdi), %rax 5046; AVX512-NEXT: vpmovsxwd 8(%rdi), %ymm0 5047; AVX512-NEXT: vcvtdq2ps %ymm0, %ymm0 5048; AVX512-NEXT: vmovaps %ymm0, (%rax) 5049; AVX512-NEXT: vzeroupper 5050; AVX512-NEXT: retq 5051 %1 = load %Arguments, ptr %a0, align 1 5052 %2 = extractvalue %Arguments %1, 1 5053 %3 = extractvalue %Arguments %1, 2 5054 %4 = sitofp <8 x i16> %2 to <8 x float> 5055 store <8 x float> %4, ptr %3, align 32 5056 ret void 5057} 5058 5059define <2 x double> @sitofp_i32_to_2f64(<2 x double> %a0, i32 %a1) nounwind { 5060; SSE-LABEL: sitofp_i32_to_2f64: 5061; SSE: # %bb.0: 5062; SSE-NEXT: cvtsi2sd %edi, %xmm0 5063; SSE-NEXT: retq 5064; 5065; AVX-LABEL: sitofp_i32_to_2f64: 5066; AVX: # %bb.0: 5067; AVX-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 5068; AVX-NEXT: retq 5069 %cvt = sitofp i32 %a1 to double 5070 %res = insertelement <2 x double> %a0, double %cvt, i32 0 5071 ret <2 x double> %res 5072} 5073 5074define <4 x float> @sitofp_i32_to_4f32(<4 x float> %a0, i32 %a1) nounwind { 5075; SSE-LABEL: sitofp_i32_to_4f32: 5076; SSE: # %bb.0: 5077; SSE-NEXT: cvtsi2ss %edi, %xmm0 5078; SSE-NEXT: retq 5079; 5080; AVX-LABEL: sitofp_i32_to_4f32: 5081; AVX: # %bb.0: 5082; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 5083; AVX-NEXT: retq 5084 %cvt = sitofp i32 %a1 to float 5085 %res = insertelement <4 x float> %a0, float %cvt, i32 0 5086 ret <4 x float> %res 5087} 5088 5089define <2 x double> @sitofp_i64_to_2f64(<2 x double> %a0, i64 %a1) nounwind { 5090; SSE-LABEL: sitofp_i64_to_2f64: 5091; SSE: # %bb.0: 5092; SSE-NEXT: cvtsi2sd %rdi, %xmm0 5093; SSE-NEXT: retq 5094; 5095; AVX-LABEL: sitofp_i64_to_2f64: 5096; AVX: # %bb.0: 5097; AVX-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0 5098; AVX-NEXT: retq 5099 %cvt = sitofp i64 %a1 to double 5100 %res = insertelement <2 x double> %a0, double %cvt, i32 0 5101 ret <2 x double> %res 5102} 5103 5104define <4 x float> @sitofp_i64_to_4f32(<4 x float> %a0, i64 %a1) nounwind { 5105; SSE-LABEL: sitofp_i64_to_4f32: 5106; SSE: # %bb.0: 5107; SSE-NEXT: cvtsi2ss %rdi, %xmm0 5108; SSE-NEXT: retq 5109; 5110; AVX-LABEL: sitofp_i64_to_4f32: 5111; AVX: # %bb.0: 5112; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 5113; AVX-NEXT: retq 5114 %cvt = sitofp i64 %a1 to float 5115 %res = insertelement <4 x float> %a0, float %cvt, i32 0 5116 ret <4 x float> %res 5117} 5118 5119; Extract from int vector and convert to FP. 5120 5121define float @extract0_sitofp_v4i32_f32(<4 x i32> %x) nounwind { 5122; SSE-LABEL: extract0_sitofp_v4i32_f32: 5123; SSE: # %bb.0: 5124; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 5125; SSE-NEXT: retq 5126; 5127; AVX-LABEL: extract0_sitofp_v4i32_f32: 5128; AVX: # %bb.0: 5129; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 5130; AVX-NEXT: retq 5131 %e = extractelement <4 x i32> %x, i32 0 5132 %r = sitofp i32 %e to float 5133 ret float %r 5134} 5135 5136define float @extract0_sitofp_v4i32_f32i_multiuse1(<4 x i32> %x) nounwind { 5137; SSE-LABEL: extract0_sitofp_v4i32_f32i_multiuse1: 5138; SSE: # %bb.0: 5139; SSE-NEXT: movd %xmm0, %eax 5140; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 5141; SSE-NEXT: incl %eax 5142; SSE-NEXT: cvtsi2ss %eax, %xmm1 5143; SSE-NEXT: divss %xmm1, %xmm0 5144; SSE-NEXT: retq 5145; 5146; AVX-LABEL: extract0_sitofp_v4i32_f32i_multiuse1: 5147; AVX: # %bb.0: 5148; AVX-NEXT: vmovd %xmm0, %eax 5149; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 5150; AVX-NEXT: incl %eax 5151; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1 5152; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 5153; AVX-NEXT: retq 5154 %e = extractelement <4 x i32> %x, i32 0 5155 %f = sitofp i32 %e to float 5156 %e1 = add i32 %e, 1 5157 %f1 = sitofp i32 %e1 to float 5158 %r = fdiv float %f, %f1 5159 ret float %r 5160} 5161 5162define float @extract0_sitofp_v4i32_f32_multiuse2(<4 x i32> %x, ptr %p) nounwind { 5163; SSE-LABEL: extract0_sitofp_v4i32_f32_multiuse2: 5164; SSE: # %bb.0: 5165; SSE-NEXT: cvtdq2ps %xmm0, %xmm1 5166; SSE-NEXT: movss %xmm0, (%rdi) 5167; SSE-NEXT: movaps %xmm1, %xmm0 5168; SSE-NEXT: retq 5169; 5170; AVX-LABEL: extract0_sitofp_v4i32_f32_multiuse2: 5171; AVX: # %bb.0: 5172; AVX-NEXT: vcvtdq2ps %xmm0, %xmm1 5173; AVX-NEXT: vmovss %xmm0, (%rdi) 5174; AVX-NEXT: vmovaps %xmm1, %xmm0 5175; AVX-NEXT: retq 5176 %e = extractelement <4 x i32> %x, i32 0 5177 %r = sitofp i32 %e to float 5178 store i32 %e, ptr %p 5179 ret float %r 5180} 5181 5182define double @extract0_sitofp_v4i32_f64(<4 x i32> %x) nounwind { 5183; SSE-LABEL: extract0_sitofp_v4i32_f64: 5184; SSE: # %bb.0: 5185; SSE-NEXT: movd %xmm0, %eax 5186; SSE-NEXT: xorps %xmm0, %xmm0 5187; SSE-NEXT: cvtsi2sd %eax, %xmm0 5188; SSE-NEXT: retq 5189; 5190; AVX-LABEL: extract0_sitofp_v4i32_f64: 5191; AVX: # %bb.0: 5192; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 5193; AVX-NEXT: retq 5194 %e = extractelement <4 x i32> %x, i32 0 5195 %r = sitofp i32 %e to double 5196 ret double %r 5197} 5198 5199define float @extract0_uitofp_v4i32_f32(<4 x i32> %x) nounwind { 5200; SSE-LABEL: extract0_uitofp_v4i32_f32: 5201; SSE: # %bb.0: 5202; SSE-NEXT: movd %xmm0, %eax 5203; SSE-NEXT: xorps %xmm0, %xmm0 5204; SSE-NEXT: cvtsi2ss %rax, %xmm0 5205; SSE-NEXT: retq 5206; 5207; VEX-LABEL: extract0_uitofp_v4i32_f32: 5208; VEX: # %bb.0: 5209; VEX-NEXT: vmovd %xmm0, %eax 5210; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm0 5211; VEX-NEXT: retq 5212; 5213; AVX512F-LABEL: extract0_uitofp_v4i32_f32: 5214; AVX512F: # %bb.0: 5215; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5216; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 5217; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5218; AVX512F-NEXT: vzeroupper 5219; AVX512F-NEXT: retq 5220; 5221; AVX512VL-LABEL: extract0_uitofp_v4i32_f32: 5222; AVX512VL: # %bb.0: 5223; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0 5224; AVX512VL-NEXT: retq 5225; 5226; AVX512DQ-LABEL: extract0_uitofp_v4i32_f32: 5227; AVX512DQ: # %bb.0: 5228; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5229; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 5230; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5231; AVX512DQ-NEXT: vzeroupper 5232; AVX512DQ-NEXT: retq 5233; 5234; AVX512VLDQ-LABEL: extract0_uitofp_v4i32_f32: 5235; AVX512VLDQ: # %bb.0: 5236; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0 5237; AVX512VLDQ-NEXT: retq 5238 %e = extractelement <4 x i32> %x, i32 0 5239 %r = uitofp i32 %e to float 5240 ret float %r 5241} 5242 5243define double @extract0_uitofp_v4i32_f64(<4 x i32> %x) nounwind { 5244; SSE-LABEL: extract0_uitofp_v4i32_f64: 5245; SSE: # %bb.0: 5246; SSE-NEXT: movd %xmm0, %eax 5247; SSE-NEXT: xorps %xmm0, %xmm0 5248; SSE-NEXT: cvtsi2sd %rax, %xmm0 5249; SSE-NEXT: retq 5250; 5251; VEX-LABEL: extract0_uitofp_v4i32_f64: 5252; VEX: # %bb.0: 5253; VEX-NEXT: vmovd %xmm0, %eax 5254; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0 5255; VEX-NEXT: retq 5256; 5257; AVX512F-LABEL: extract0_uitofp_v4i32_f64: 5258; AVX512F: # %bb.0: 5259; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 5260; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 5261; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5262; AVX512F-NEXT: vzeroupper 5263; AVX512F-NEXT: retq 5264; 5265; AVX512VL-LABEL: extract0_uitofp_v4i32_f64: 5266; AVX512VL: # %bb.0: 5267; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 5268; AVX512VL-NEXT: retq 5269; 5270; AVX512DQ-LABEL: extract0_uitofp_v4i32_f64: 5271; AVX512DQ: # %bb.0: 5272; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 5273; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 5274; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5275; AVX512DQ-NEXT: vzeroupper 5276; AVX512DQ-NEXT: retq 5277; 5278; AVX512VLDQ-LABEL: extract0_uitofp_v4i32_f64: 5279; AVX512VLDQ: # %bb.0: 5280; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 5281; AVX512VLDQ-NEXT: retq 5282 %e = extractelement <4 x i32> %x, i32 0 5283 %r = uitofp i32 %e to double 5284 ret double %r 5285} 5286 5287; Extract non-zero element from int vector and convert to FP. 5288 5289define float @extract3_sitofp_v4i32_f32(<4 x i32> %x) nounwind { 5290; SSE-LABEL: extract3_sitofp_v4i32_f32: 5291; SSE: # %bb.0: 5292; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 5293; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 5294; SSE-NEXT: retq 5295; 5296; AVX-LABEL: extract3_sitofp_v4i32_f32: 5297; AVX: # %bb.0: 5298; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5299; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 5300; AVX-NEXT: retq 5301 %e = extractelement <4 x i32> %x, i32 3 5302 %r = sitofp i32 %e to float 5303 ret float %r 5304} 5305 5306define double @extract3_sitofp_v4i32_f64(<4 x i32> %x) nounwind { 5307; SSE2-LABEL: extract3_sitofp_v4i32_f64: 5308; SSE2: # %bb.0: 5309; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 5310; SSE2-NEXT: movd %xmm0, %eax 5311; SSE2-NEXT: xorps %xmm0, %xmm0 5312; SSE2-NEXT: cvtsi2sd %eax, %xmm0 5313; SSE2-NEXT: retq 5314; 5315; SSE41-LABEL: extract3_sitofp_v4i32_f64: 5316; SSE41: # %bb.0: 5317; SSE41-NEXT: extractps $3, %xmm0, %eax 5318; SSE41-NEXT: xorps %xmm0, %xmm0 5319; SSE41-NEXT: cvtsi2sd %eax, %xmm0 5320; SSE41-NEXT: retq 5321; 5322; AVX-LABEL: extract3_sitofp_v4i32_f64: 5323; AVX: # %bb.0: 5324; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5325; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 5326; AVX-NEXT: retq 5327 %e = extractelement <4 x i32> %x, i32 3 5328 %r = sitofp i32 %e to double 5329 ret double %r 5330} 5331 5332define float @extract3_uitofp_v4i32_f32(<4 x i32> %x) nounwind { 5333; SSE2-LABEL: extract3_uitofp_v4i32_f32: 5334; SSE2: # %bb.0: 5335; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 5336; SSE2-NEXT: movd %xmm0, %eax 5337; SSE2-NEXT: xorps %xmm0, %xmm0 5338; SSE2-NEXT: cvtsi2ss %rax, %xmm0 5339; SSE2-NEXT: retq 5340; 5341; SSE41-LABEL: extract3_uitofp_v4i32_f32: 5342; SSE41: # %bb.0: 5343; SSE41-NEXT: extractps $3, %xmm0, %eax 5344; SSE41-NEXT: xorps %xmm0, %xmm0 5345; SSE41-NEXT: cvtsi2ss %rax, %xmm0 5346; SSE41-NEXT: retq 5347; 5348; VEX-LABEL: extract3_uitofp_v4i32_f32: 5349; VEX: # %bb.0: 5350; VEX-NEXT: vextractps $3, %xmm0, %eax 5351; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm0 5352; VEX-NEXT: retq 5353; 5354; AVX512F-LABEL: extract3_uitofp_v4i32_f32: 5355; AVX512F: # %bb.0: 5356; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5357; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 5358; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5359; AVX512F-NEXT: vzeroupper 5360; AVX512F-NEXT: retq 5361; 5362; AVX512VL-LABEL: extract3_uitofp_v4i32_f32: 5363; AVX512VL: # %bb.0: 5364; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5365; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0 5366; AVX512VL-NEXT: retq 5367; 5368; AVX512DQ-LABEL: extract3_uitofp_v4i32_f32: 5369; AVX512DQ: # %bb.0: 5370; AVX512DQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5371; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 5372; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5373; AVX512DQ-NEXT: vzeroupper 5374; AVX512DQ-NEXT: retq 5375; 5376; AVX512VLDQ-LABEL: extract3_uitofp_v4i32_f32: 5377; AVX512VLDQ: # %bb.0: 5378; AVX512VLDQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5379; AVX512VLDQ-NEXT: vcvtudq2ps %xmm0, %xmm0 5380; AVX512VLDQ-NEXT: retq 5381 %e = extractelement <4 x i32> %x, i32 3 5382 %r = uitofp i32 %e to float 5383 ret float %r 5384} 5385 5386define double @extract3_uitofp_v4i32_f64(<4 x i32> %x) nounwind { 5387; SSE2-LABEL: extract3_uitofp_v4i32_f64: 5388; SSE2: # %bb.0: 5389; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 5390; SSE2-NEXT: movd %xmm0, %eax 5391; SSE2-NEXT: xorps %xmm0, %xmm0 5392; SSE2-NEXT: cvtsi2sd %rax, %xmm0 5393; SSE2-NEXT: retq 5394; 5395; SSE41-LABEL: extract3_uitofp_v4i32_f64: 5396; SSE41: # %bb.0: 5397; SSE41-NEXT: extractps $3, %xmm0, %eax 5398; SSE41-NEXT: xorps %xmm0, %xmm0 5399; SSE41-NEXT: cvtsi2sd %rax, %xmm0 5400; SSE41-NEXT: retq 5401; 5402; VEX-LABEL: extract3_uitofp_v4i32_f64: 5403; VEX: # %bb.0: 5404; VEX-NEXT: vextractps $3, %xmm0, %eax 5405; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0 5406; VEX-NEXT: retq 5407; 5408; AVX512F-LABEL: extract3_uitofp_v4i32_f64: 5409; AVX512F: # %bb.0: 5410; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5411; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 5412; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5413; AVX512F-NEXT: vzeroupper 5414; AVX512F-NEXT: retq 5415; 5416; AVX512VL-LABEL: extract3_uitofp_v4i32_f64: 5417; AVX512VL: # %bb.0: 5418; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5419; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 5420; AVX512VL-NEXT: retq 5421; 5422; AVX512DQ-LABEL: extract3_uitofp_v4i32_f64: 5423; AVX512DQ: # %bb.0: 5424; AVX512DQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5425; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 5426; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5427; AVX512DQ-NEXT: vzeroupper 5428; AVX512DQ-NEXT: retq 5429; 5430; AVX512VLDQ-LABEL: extract3_uitofp_v4i32_f64: 5431; AVX512VLDQ: # %bb.0: 5432; AVX512VLDQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 5433; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 5434; AVX512VLDQ-NEXT: retq 5435 %e = extractelement <4 x i32> %x, i32 3 5436 %r = uitofp i32 %e to double 5437 ret double %r 5438} 5439 5440define void @PR43609(ptr nocapture %x, <2 x i64> %y) #0 { 5441; SSE2-LABEL: PR43609: 5442; SSE2: # %bb.0: 5443; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2,2] 5444; SSE2-NEXT: paddq %xmm0, %xmm1 5445; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295] 5446; SSE2-NEXT: movdqa %xmm0, %xmm3 5447; SSE2-NEXT: pand %xmm2, %xmm3 5448; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 5449; SSE2-NEXT: por %xmm4, %xmm3 5450; SSE2-NEXT: psrlq $32, %xmm0 5451; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 5452; SSE2-NEXT: por %xmm5, %xmm0 5453; SSE2-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 5454; SSE2-NEXT: subpd %xmm6, %xmm0 5455; SSE2-NEXT: addpd %xmm3, %xmm0 5456; SSE2-NEXT: pand %xmm1, %xmm2 5457; SSE2-NEXT: por %xmm4, %xmm2 5458; SSE2-NEXT: psrlq $32, %xmm1 5459; SSE2-NEXT: por %xmm5, %xmm1 5460; SSE2-NEXT: subpd %xmm6, %xmm1 5461; SSE2-NEXT: addpd %xmm2, %xmm1 5462; SSE2-NEXT: movapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5463; SSE2-NEXT: addpd %xmm2, %xmm0 5464; SSE2-NEXT: addpd %xmm2, %xmm1 5465; SSE2-NEXT: movupd %xmm0, (%rdi) 5466; SSE2-NEXT: movupd %xmm1, 16(%rdi) 5467; SSE2-NEXT: retq 5468; 5469; SSE41-LABEL: PR43609: 5470; SSE41: # %bb.0: 5471; SSE41-NEXT: pmovsxbq {{.*#+}} xmm1 = [2,2] 5472; SSE41-NEXT: paddq %xmm0, %xmm1 5473; SSE41-NEXT: pxor %xmm2, %xmm2 5474; SSE41-NEXT: movdqa %xmm0, %xmm3 5475; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7] 5476; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 5477; SSE41-NEXT: por %xmm4, %xmm3 5478; SSE41-NEXT: psrlq $32, %xmm0 5479; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 5480; SSE41-NEXT: por %xmm5, %xmm0 5481; SSE41-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 5482; SSE41-NEXT: subpd %xmm6, %xmm0 5483; SSE41-NEXT: addpd %xmm3, %xmm0 5484; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 5485; SSE41-NEXT: por %xmm4, %xmm2 5486; SSE41-NEXT: psrlq $32, %xmm1 5487; SSE41-NEXT: por %xmm5, %xmm1 5488; SSE41-NEXT: subpd %xmm6, %xmm1 5489; SSE41-NEXT: addpd %xmm2, %xmm1 5490; SSE41-NEXT: movapd {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5491; SSE41-NEXT: addpd %xmm2, %xmm0 5492; SSE41-NEXT: addpd %xmm2, %xmm1 5493; SSE41-NEXT: movupd %xmm0, (%rdi) 5494; SSE41-NEXT: movupd %xmm1, 16(%rdi) 5495; SSE41-NEXT: retq 5496; 5497; AVX1-LABEL: PR43609: 5498; AVX1: # %bb.0: 5499; AVX1-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 5500; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 5501; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 5502; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 5503; AVX1-NEXT: # xmm4 = mem[0,0] 5504; AVX1-NEXT: vpor %xmm4, %xmm3, %xmm3 5505; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0 5506; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 5507; AVX1-NEXT: # xmm5 = mem[0,0] 5508; AVX1-NEXT: vpor %xmm5, %xmm0, %xmm0 5509; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 5510; AVX1-NEXT: # xmm6 = mem[0,0] 5511; AVX1-NEXT: vsubpd %xmm6, %xmm0, %xmm0 5512; AVX1-NEXT: vaddpd %xmm0, %xmm3, %xmm0 5513; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 5514; AVX1-NEXT: vpor %xmm4, %xmm2, %xmm2 5515; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm1 5516; AVX1-NEXT: vpor %xmm5, %xmm1, %xmm1 5517; AVX1-NEXT: vsubpd %xmm6, %xmm1, %xmm1 5518; AVX1-NEXT: vaddpd %xmm1, %xmm2, %xmm1 5519; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5520; AVX1-NEXT: # xmm2 = mem[0,0] 5521; AVX1-NEXT: vaddpd %xmm2, %xmm0, %xmm0 5522; AVX1-NEXT: vaddpd %xmm2, %xmm1, %xmm1 5523; AVX1-NEXT: vmovupd %xmm0, (%rdi) 5524; AVX1-NEXT: vmovupd %xmm1, 16(%rdi) 5525; AVX1-NEXT: retq 5526; 5527; AVX2-LABEL: PR43609: 5528; AVX2: # %bb.0: 5529; AVX2-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 5530; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 5531; AVX2-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 5532; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 5533; AVX2-NEXT: vpor %xmm4, %xmm3, %xmm3 5534; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0 5535; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 5536; AVX2-NEXT: vpor %xmm5, %xmm0, %xmm0 5537; AVX2-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 5538; AVX2-NEXT: # xmm6 = mem[0,0] 5539; AVX2-NEXT: vsubpd %xmm6, %xmm0, %xmm0 5540; AVX2-NEXT: vaddpd %xmm0, %xmm3, %xmm0 5541; AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 5542; AVX2-NEXT: vpor %xmm4, %xmm2, %xmm2 5543; AVX2-NEXT: vpsrlq $32, %xmm1, %xmm1 5544; AVX2-NEXT: vpor %xmm5, %xmm1, %xmm1 5545; AVX2-NEXT: vsubpd %xmm6, %xmm1, %xmm1 5546; AVX2-NEXT: vaddpd %xmm1, %xmm2, %xmm1 5547; AVX2-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5548; AVX2-NEXT: # xmm2 = mem[0,0] 5549; AVX2-NEXT: vaddpd %xmm2, %xmm0, %xmm0 5550; AVX2-NEXT: vaddpd %xmm2, %xmm1, %xmm1 5551; AVX2-NEXT: vmovupd %xmm0, (%rdi) 5552; AVX2-NEXT: vmovupd %xmm1, 16(%rdi) 5553; AVX2-NEXT: retq 5554; 5555; AVX512F-LABEL: PR43609: 5556; AVX512F: # %bb.0: 5557; AVX512F-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 5558; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 5559; AVX512F-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 5560; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 5561; AVX512F-NEXT: vpor %xmm4, %xmm3, %xmm3 5562; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0 5563; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 5564; AVX512F-NEXT: vpor %xmm5, %xmm0, %xmm0 5565; AVX512F-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 5566; AVX512F-NEXT: # xmm6 = mem[0,0] 5567; AVX512F-NEXT: vsubpd %xmm6, %xmm0, %xmm0 5568; AVX512F-NEXT: vaddpd %xmm0, %xmm3, %xmm0 5569; AVX512F-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 5570; AVX512F-NEXT: vpor %xmm4, %xmm2, %xmm2 5571; AVX512F-NEXT: vpsrlq $32, %xmm1, %xmm1 5572; AVX512F-NEXT: vpor %xmm5, %xmm1, %xmm1 5573; AVX512F-NEXT: vsubpd %xmm6, %xmm1, %xmm1 5574; AVX512F-NEXT: vaddpd %xmm1, %xmm2, %xmm1 5575; AVX512F-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5576; AVX512F-NEXT: # xmm2 = mem[0,0] 5577; AVX512F-NEXT: vaddpd %xmm2, %xmm0, %xmm0 5578; AVX512F-NEXT: vaddpd %xmm2, %xmm1, %xmm1 5579; AVX512F-NEXT: vmovupd %xmm0, (%rdi) 5580; AVX512F-NEXT: vmovupd %xmm1, 16(%rdi) 5581; AVX512F-NEXT: retq 5582; 5583; AVX512VL-LABEL: PR43609: 5584; AVX512VL: # %bb.0: 5585; AVX512VL-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1 5586; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 5587; AVX512VL-NEXT: vpblendd {{.*#+}} xmm3 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 5588; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 5589; AVX512VL-NEXT: vpor %xmm4, %xmm3, %xmm3 5590; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 5591; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 5592; AVX512VL-NEXT: vpor %xmm5, %xmm0, %xmm0 5593; AVX512VL-NEXT: vmovddup {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 5594; AVX512VL-NEXT: # xmm6 = mem[0,0] 5595; AVX512VL-NEXT: vsubpd %xmm6, %xmm0, %xmm0 5596; AVX512VL-NEXT: vaddpd %xmm0, %xmm3, %xmm0 5597; AVX512VL-NEXT: vpblendd {{.*#+}} xmm2 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 5598; AVX512VL-NEXT: vpor %xmm4, %xmm2, %xmm2 5599; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm1 5600; AVX512VL-NEXT: vpor %xmm5, %xmm1, %xmm1 5601; AVX512VL-NEXT: vsubpd %xmm6, %xmm1, %xmm1 5602; AVX512VL-NEXT: vaddpd %xmm1, %xmm2, %xmm1 5603; AVX512VL-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5604; AVX512VL-NEXT: # xmm2 = mem[0,0] 5605; AVX512VL-NEXT: vaddpd %xmm2, %xmm0, %xmm0 5606; AVX512VL-NEXT: vaddpd %xmm2, %xmm1, %xmm1 5607; AVX512VL-NEXT: vmovupd %xmm0, (%rdi) 5608; AVX512VL-NEXT: vmovupd %xmm1, 16(%rdi) 5609; AVX512VL-NEXT: retq 5610; 5611; AVX512DQ-LABEL: PR43609: 5612; AVX512DQ: # %bb.0: 5613; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5614; AVX512DQ-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 5615; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 5616; AVX512DQ-NEXT: vcvtuqq2pd %zmm1, %zmm1 5617; AVX512DQ-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5618; AVX512DQ-NEXT: # xmm2 = mem[0,0] 5619; AVX512DQ-NEXT: vaddpd %xmm2, %xmm0, %xmm0 5620; AVX512DQ-NEXT: vaddpd %xmm2, %xmm1, %xmm1 5621; AVX512DQ-NEXT: vmovupd %xmm0, (%rdi) 5622; AVX512DQ-NEXT: vmovupd %xmm1, 16(%rdi) 5623; AVX512DQ-NEXT: vzeroupper 5624; AVX512DQ-NEXT: retq 5625; 5626; AVX512VLDQ-LABEL: PR43609: 5627; AVX512VLDQ: # %bb.0: 5628; AVX512VLDQ-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm1 5629; AVX512VLDQ-NEXT: vcvtuqq2pd %xmm0, %xmm0 5630; AVX512VLDQ-NEXT: vcvtuqq2pd %xmm1, %xmm1 5631; AVX512VLDQ-NEXT: vmovddup {{.*#+}} xmm2 = [5.0E-1,5.0E-1] 5632; AVX512VLDQ-NEXT: # xmm2 = mem[0,0] 5633; AVX512VLDQ-NEXT: vaddpd %xmm2, %xmm0, %xmm0 5634; AVX512VLDQ-NEXT: vaddpd %xmm2, %xmm1, %xmm1 5635; AVX512VLDQ-NEXT: vmovupd %xmm0, (%rdi) 5636; AVX512VLDQ-NEXT: vmovupd %xmm1, 16(%rdi) 5637; AVX512VLDQ-NEXT: retq 5638 %step.add.epil = add <2 x i64> %y, <i64 2, i64 2> 5639 %t20 = uitofp <2 x i64> %y to <2 x double> 5640 %t21 = uitofp <2 x i64> %step.add.epil to <2 x double> 5641 %t22 = fadd fast <2 x double> %t20, <double 5.0e-01, double 5.0e-01> 5642 %t23 = fadd fast <2 x double> %t21, <double 5.0e-01, double 5.0e-01> 5643 store <2 x double> %t22, ptr %x, align 8 5644 %t26 = getelementptr inbounds double, ptr %x, i64 2 5645 store <2 x double> %t23, ptr %t26, align 8 5646 ret void 5647} 5648 5649attributes #0 = { "unsafe-fp-math"="true" } 5650 5651