xref: /llvm-project/llvm/test/CodeGen/X86/avx512fp16-mscatter.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s
3
4define void @test_mscatter_v16f16(ptr %base, <16 x i32> %index, <16 x half> %val)
5; CHECK-LABEL: test_mscatter_v16f16:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    vpbroadcastq %rdi, %zmm3
8; CHECK-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
9; CHECK-NEXT:    vpmovsxdq %ymm2, %zmm2
10; CHECK-NEXT:    vpaddq %zmm2, %zmm2, %zmm2
11; CHECK-NEXT:    vpaddq %zmm2, %zmm3, %zmm2
12; CHECK-NEXT:    vpmovsxdq %ymm0, %zmm0
13; CHECK-NEXT:    vpaddq %zmm0, %zmm0, %zmm0
14; CHECK-NEXT:    vpaddq %zmm0, %zmm3, %zmm0
15; CHECK-NEXT:    vmovq %xmm0, %rax
16; CHECK-NEXT:    vmovsh %xmm1, (%rax)
17; CHECK-NEXT:    vpsrld $16, %xmm1, %xmm3
18; CHECK-NEXT:    vpextrq $1, %xmm0, %rax
19; CHECK-NEXT:    vmovsh %xmm3, (%rax)
20; CHECK-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
21; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm4
22; CHECK-NEXT:    vmovq %xmm4, %rax
23; CHECK-NEXT:    vmovsh %xmm3, (%rax)
24; CHECK-NEXT:    vpsrlq $48, %xmm1, %xmm3
25; CHECK-NEXT:    vpextrq $1, %xmm4, %rax
26; CHECK-NEXT:    vmovsh %xmm3, (%rax)
27; CHECK-NEXT:    vshufpd {{.*#+}} xmm3 = xmm1[1,0]
28; CHECK-NEXT:    vextracti32x4 $2, %zmm0, %xmm4
29; CHECK-NEXT:    vmovq %xmm4, %rax
30; CHECK-NEXT:    vmovsh %xmm3, (%rax)
31; CHECK-NEXT:    vpsrldq {{.*#+}} xmm3 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
32; CHECK-NEXT:    vpextrq $1, %xmm4, %rax
33; CHECK-NEXT:    vmovsh %xmm3, (%rax)
34; CHECK-NEXT:    vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3]
35; CHECK-NEXT:    vextracti32x4 $3, %zmm0, %xmm0
36; CHECK-NEXT:    vmovq %xmm0, %rax
37; CHECK-NEXT:    vmovsh %xmm3, (%rax)
38; CHECK-NEXT:    vpsrldq {{.*#+}} xmm3 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
39; CHECK-NEXT:    vpextrq $1, %xmm0, %rax
40; CHECK-NEXT:    vmovsh %xmm3, (%rax)
41; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm0
42; CHECK-NEXT:    vmovq %xmm2, %rax
43; CHECK-NEXT:    vmovsh %xmm0, (%rax)
44; CHECK-NEXT:    vpsrld $16, %xmm0, %xmm1
45; CHECK-NEXT:    vpextrq $1, %xmm2, %rax
46; CHECK-NEXT:    vmovsh %xmm1, (%rax)
47; CHECK-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
48; CHECK-NEXT:    vextracti128 $1, %ymm2, %xmm3
49; CHECK-NEXT:    vmovq %xmm3, %rax
50; CHECK-NEXT:    vmovsh %xmm1, (%rax)
51; CHECK-NEXT:    vpsrlq $48, %xmm0, %xmm1
52; CHECK-NEXT:    vpextrq $1, %xmm3, %rax
53; CHECK-NEXT:    vmovsh %xmm1, (%rax)
54; CHECK-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
55; CHECK-NEXT:    vextracti32x4 $2, %zmm2, %xmm3
56; CHECK-NEXT:    vmovq %xmm3, %rax
57; CHECK-NEXT:    vmovsh %xmm1, (%rax)
58; CHECK-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
59; CHECK-NEXT:    vpextrq $1, %xmm3, %rax
60; CHECK-NEXT:    vmovsh %xmm1, (%rax)
61; CHECK-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3]
62; CHECK-NEXT:    vextracti32x4 $3, %zmm2, %xmm2
63; CHECK-NEXT:    vmovq %xmm2, %rax
64; CHECK-NEXT:    vmovsh %xmm1, (%rax)
65; CHECK-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
66; CHECK-NEXT:    vpextrq $1, %xmm2, %rax
67; CHECK-NEXT:    vmovsh %xmm0, (%rax)
68; CHECK-NEXT:    vzeroupper
69; CHECK-NEXT:    retq
70{
71  %gep = getelementptr half, ptr %base, <16 x i32> %index
72  call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> %val, <16 x ptr> %gep, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
73  ret void
74}
75declare void @llvm.masked.scatter.v16f16.v16p0(<16 x half> , <16 x ptr> , i32 , <16 x i1>)
76