1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE 3; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X86-AVX 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX 6 7;PR29079 8 9define <4 x float> @mask_ucvt_4i32_4f32(<4 x i32> %a) { 10; X86-SSE-LABEL: mask_ucvt_4i32_4f32: 11; X86-SSE: # %bb.0: 12; X86-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 13; X86-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 14; X86-SSE-NEXT: retl 15; 16; X86-AVX-LABEL: mask_ucvt_4i32_4f32: 17; X86-AVX: # %bb.0: 18; X86-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 19; X86-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 20; X86-AVX-NEXT: retl 21; 22; X64-SSE-LABEL: mask_ucvt_4i32_4f32: 23; X64-SSE: # %bb.0: 24; X64-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 25; X64-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 26; X64-SSE-NEXT: retq 27; 28; X64-AVX-LABEL: mask_ucvt_4i32_4f32: 29; X64-AVX: # %bb.0: 30; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 31; X64-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 32; X64-AVX-NEXT: retq 33 %and = and <4 x i32> %a, <i32 127, i32 255, i32 4095, i32 65595> 34 %cvt = uitofp <4 x i32> %and to <4 x float> 35 ret <4 x float> %cvt 36} 37 38define <4 x double> @mask_ucvt_4i32_4f64(<4 x i32> %a) { 39; X86-SSE-LABEL: mask_ucvt_4i32_4f64: 40; X86-SSE: # %bb.0: 41; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 42; X86-SSE-NEXT: cvtdq2pd %xmm0, %xmm2 43; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 44; X86-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 45; X86-SSE-NEXT: movaps %xmm2, %xmm0 46; X86-SSE-NEXT: retl 47; 48; X86-AVX-LABEL: mask_ucvt_4i32_4f64: 49; X86-AVX: # %bb.0: 50; X86-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 51; X86-AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 52; X86-AVX-NEXT: retl 53; 54; X64-SSE-LABEL: mask_ucvt_4i32_4f64: 55; X64-SSE: # %bb.0: 56; X64-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 57; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm2 58; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 59; X64-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 60; X64-SSE-NEXT: movaps %xmm2, %xmm0 61; X64-SSE-NEXT: retq 62; 63; X64-AVX-LABEL: mask_ucvt_4i32_4f64: 64; X64-AVX: # %bb.0: 65; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 66; X64-AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 67; X64-AVX-NEXT: retq 68 %and = and <4 x i32> %a, <i32 127, i32 255, i32 4095, i32 65595> 69 %cvt = uitofp <4 x i32> %and to <4 x double> 70 ret <4 x double> %cvt 71} 72 73; Regression noticed in D56387 74define <4 x float> @lshr_truncate_mask_ucvt_4i64_4f32(ptr%p0) { 75; X86-SSE-LABEL: lshr_truncate_mask_ucvt_4i64_4f32: 76; X86-SSE: # %bb.0: 77; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 78; X86-SSE-NEXT: movups (%eax), %xmm0 79; X86-SSE-NEXT: movups 16(%eax), %xmm1 80; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 81; X86-SSE-NEXT: psrld $16, %xmm0 82; X86-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 83; X86-SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 84; X86-SSE-NEXT: retl 85; 86; X86-AVX-LABEL: lshr_truncate_mask_ucvt_4i64_4f32: 87; X86-AVX: # %bb.0: 88; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 89; X86-AVX-NEXT: vmovups (%eax), %xmm0 90; X86-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],mem[0,2] 91; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm0 92; X86-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 93; X86-AVX-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 94; X86-AVX-NEXT: retl 95; 96; X64-SSE-LABEL: lshr_truncate_mask_ucvt_4i64_4f32: 97; X64-SSE: # %bb.0: 98; X64-SSE-NEXT: movups (%rdi), %xmm0 99; X64-SSE-NEXT: movups 16(%rdi), %xmm1 100; X64-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 101; X64-SSE-NEXT: psrld $16, %xmm0 102; X64-SSE-NEXT: cvtdq2ps %xmm0, %xmm0 103; X64-SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 104; X64-SSE-NEXT: retq 105; 106; X64-AVX-LABEL: lshr_truncate_mask_ucvt_4i64_4f32: 107; X64-AVX: # %bb.0: 108; X64-AVX-NEXT: vmovups (%rdi), %xmm0 109; X64-AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],mem[0,2] 110; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm0 111; X64-AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 112; X64-AVX-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 113; X64-AVX-NEXT: retq 114 %load = load <4 x i64>, ptr %p0, align 2 115 %lshr = lshr <4 x i64> %load, <i64 16, i64 16, i64 16, i64 16> 116 %and = and <4 x i64> %lshr, <i64 65535, i64 65535, i64 65535, i64 65535> 117 %uitofp = uitofp <4 x i64> %and to <4 x float> 118 %fmul = fmul <4 x float> %uitofp, <float 0x3EF0001000000000, float 0x3EF0001000000000, float 0x3EF0001000000000, float 0x3EF0001000000000> 119 ret <4 x float> %fmul 120} 121