1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s 3 4define void @test_mscatter_v16f16(ptr %base, <16 x i32> %index, <16 x half> %val) 5; CHECK-LABEL: test_mscatter_v16f16: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vpbroadcastq %rdi, %zmm3 8; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2 9; CHECK-NEXT: vpmovsxdq %ymm2, %zmm2 10; CHECK-NEXT: vpaddq %zmm2, %zmm2, %zmm2 11; CHECK-NEXT: vpaddq %zmm2, %zmm3, %zmm2 12; CHECK-NEXT: vpmovsxdq %ymm0, %zmm0 13; CHECK-NEXT: vpaddq %zmm0, %zmm0, %zmm0 14; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 15; CHECK-NEXT: vmovq %xmm0, %rax 16; CHECK-NEXT: vmovsh %xmm1, (%rax) 17; CHECK-NEXT: vpsrld $16, %xmm1, %xmm3 18; CHECK-NEXT: vpextrq $1, %xmm0, %rax 19; CHECK-NEXT: vmovsh %xmm3, (%rax) 20; CHECK-NEXT: vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] 21; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm4 22; CHECK-NEXT: vmovq %xmm4, %rax 23; CHECK-NEXT: vmovsh %xmm3, (%rax) 24; CHECK-NEXT: vpsrlq $48, %xmm1, %xmm3 25; CHECK-NEXT: vpextrq $1, %xmm4, %rax 26; CHECK-NEXT: vmovsh %xmm3, (%rax) 27; CHECK-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0] 28; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm4 29; CHECK-NEXT: vmovq %xmm4, %rax 30; CHECK-NEXT: vmovsh %xmm3, (%rax) 31; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 32; CHECK-NEXT: vpextrq $1, %xmm4, %rax 33; CHECK-NEXT: vmovsh %xmm3, (%rax) 34; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3] 35; CHECK-NEXT: vextracti32x4 $3, %zmm0, %xmm0 36; CHECK-NEXT: vmovq %xmm0, %rax 37; CHECK-NEXT: vmovsh %xmm3, (%rax) 38; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 39; CHECK-NEXT: vpextrq $1, %xmm0, %rax 40; CHECK-NEXT: vmovsh %xmm3, (%rax) 41; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm0 42; CHECK-NEXT: vmovq %xmm2, %rax 43; CHECK-NEXT: vmovsh %xmm0, (%rax) 44; CHECK-NEXT: vpsrld $16, %xmm0, %xmm1 45; CHECK-NEXT: vpextrq $1, %xmm2, %rax 46; CHECK-NEXT: vmovsh %xmm1, (%rax) 47; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 48; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3 49; CHECK-NEXT: vmovq %xmm3, %rax 50; CHECK-NEXT: vmovsh %xmm1, (%rax) 51; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm1 52; CHECK-NEXT: vpextrq $1, %xmm3, %rax 53; CHECK-NEXT: vmovsh %xmm1, (%rax) 54; CHECK-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 55; CHECK-NEXT: vextracti32x4 $2, %zmm2, %xmm3 56; CHECK-NEXT: vmovq %xmm3, %rax 57; CHECK-NEXT: vmovsh %xmm1, (%rax) 58; CHECK-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 59; CHECK-NEXT: vpextrq $1, %xmm3, %rax 60; CHECK-NEXT: vmovsh %xmm1, (%rax) 61; CHECK-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 62; CHECK-NEXT: vextracti32x4 $3, %zmm2, %xmm2 63; CHECK-NEXT: vmovq %xmm2, %rax 64; CHECK-NEXT: vmovsh %xmm1, (%rax) 65; CHECK-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 66; CHECK-NEXT: vpextrq $1, %xmm2, %rax 67; CHECK-NEXT: vmovsh %xmm0, (%rax) 68; CHECK-NEXT: vzeroupper 69; CHECK-NEXT: retq 70{ 71 %gep = getelementptr half, ptr %base, <16 x i32> %index 72 call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> %val, <16 x ptr> %gep, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 73 ret void 74} 75declare void @llvm.masked.scatter.v16f16.v16p0(<16 x half> , <16 x ptr> , i32 , <16 x i1>) 76