xref: /llvm-project/llvm/test/CodeGen/X86/pr116153.ll (revision e088249b74586590c9e143d85b97a175acc9465e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
3
4define void @_test_func(<16 x half> %0) #0 {
5; CHECK-LABEL: _test_func:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
8; CHECK-NEXT:    vcvtph2ps %xmm1, %xmm1
9; CHECK-NEXT:    xorl %eax, %eax
10; CHECK-NEXT:    vucomiss %xmm1, %xmm1
11; CHECK-NEXT:    movl $65535, %ecx # imm = 0xFFFF
12; CHECK-NEXT:    movl $0, %edx
13; CHECK-NEXT:    cmovnpl %ecx, %edx
14; CHECK-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
15; CHECK-NEXT:    vcvtph2ps %xmm1, %xmm1
16; CHECK-NEXT:    vucomiss %xmm1, %xmm1
17; CHECK-NEXT:    movl $0, %esi
18; CHECK-NEXT:    cmovnpl %ecx, %esi
19; CHECK-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
20; CHECK-NEXT:    vcvtph2ps %xmm1, %xmm1
21; CHECK-NEXT:    vucomiss %xmm1, %xmm1
22; CHECK-NEXT:    movl $0, %edi
23; CHECK-NEXT:    cmovnpl %ecx, %edi
24; CHECK-NEXT:    vcvtph2ps %xmm0, %xmm0
25; CHECK-NEXT:    vucomiss %xmm0, %xmm0
26; CHECK-NEXT:    cmovnpl %ecx, %eax
27; CHECK-NEXT:    vmovd %eax, %xmm0
28; CHECK-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0
29; CHECK-NEXT:    vpinsrw $2, %esi, %xmm0, %xmm0
30; CHECK-NEXT:    vpinsrw $3, %edx, %xmm0, %xmm0
31; CHECK-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
32; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
33; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
34; CHECK-NEXT:    vmovdqu %xmm1, 16
35; CHECK-NEXT:    vmovdqu %xmm0, 0
36; CHECK-NEXT:    vzeroupper
37; CHECK-NEXT:    retq
38  %2 = fcmp ord <16 x half> %0, zeroinitializer
39  %3 = sext <16 x i1> %2 to <16 x i32>
40  %4 = shufflevector <16 x i32> %3, <16 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
41  %5 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %4, <4 x i32> zeroinitializer)
42  %6 = shufflevector <8 x i16> %5, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
43  %7 = bitcast <16 x i16> %6 to <32 x i8>
44  store <32 x i8> %7, ptr null, align 1
45  ret void
46}
47
48declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
49
50attributes #0 = { "target-features"="+aes,+avx,+avx2,+avx512f,+avx512vnni,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+pclmul,+popcnt,+prfchw,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }
51