1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>) 6 7define <16 x i32> @test_int_x86_avx512_ask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 8; CHECK-LABEL: test_int_x86_avx512_ask_vpdpbusd_512: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2] 11; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12 %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 13 ret <16 x i32> %1 14} 15 16define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) { 17; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: 18; X86: # %bb.0: 19; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 20; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 21; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 22; X86-NEXT: vpdpbusd (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x00] 23; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xda] 24; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 25; X86-NEXT: retl # encoding: [0xc3] 26; 27; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: 28; X64: # %bb.0: 29; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 30; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 31; X64-NEXT: vpdpbusd (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x07] 32; X64-NEXT: vpdpbusd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xda] 33; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 34; X64-NEXT: retq # encoding: [0xc3] 35 %x2 = load <16 x i32>, ptr %x2p 36 %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 37 %2 = bitcast i16 %x3 to <16 x i1> 38 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 39 %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) 40 %5 = bitcast i16 %x3 to <16 x i1> 41 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 42 %res1 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 43 %res2 = insertvalue { <16 x i32>, <16 x i32> } %res1, <16 x i32> %6, 1 44 ret { <16 x i32>, <16 x i32> } %res2 45} 46 47declare <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>) 48 49define <16 x i32>@test_int_x86_avx512_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 50; CHECK-LABEL: test_int_x86_avx512_vpdpbusds_512: 51; CHECK: # %bb.0: 52; CHECK-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x51,0xc2] 53; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 54 %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 55 ret <16 x i32> %1 56} 57 58define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) { 59; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: 60; X86: # %bb.0: 61; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 62; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 63; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 64; X86-NEXT: vpdpbusds (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x00] 65; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xda] 66; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 67; X86-NEXT: retl # encoding: [0xc3] 68; 69; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: 70; X64: # %bb.0: 71; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 72; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 73; X64-NEXT: vpdpbusds (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x07] 74; X64-NEXT: vpdpbusds %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xda] 75; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 76; X64-NEXT: retq # encoding: [0xc3] 77 %x2 = load <16 x i32>, ptr %x2p 78 %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 79 %2 = bitcast i16 %x3 to <16 x i1> 80 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 81 %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) 82 %5 = bitcast i16 %x3 to <16 x i1> 83 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 84 %res1 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 85 %res2 = insertvalue { <16 x i32>, <16 x i32> } %res1, <16 x i32> %6, 1 86 ret { <16 x i32>, <16 x i32> } %res2 87} 88 89declare <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32>, <16 x i32>, <16 x i32>) 90 91define <16 x i32>@test_int_x86_avx512_vpdpwssd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 92; CHECK-LABEL: test_int_x86_avx512_vpdpwssd_512: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x52,0xc2] 95; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 96 %1 = call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 97 ret <16 x i32> %1 98} 99 100define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpwssd_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) { 101; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_512: 102; X86: # %bb.0: 103; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 104; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 105; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 106; X86-NEXT: vpdpwssd (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x00] 107; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xda] 108; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 109; X86-NEXT: retl # encoding: [0xc3] 110; 111; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_512: 112; X64: # %bb.0: 113; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 114; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 115; X64-NEXT: vpdpwssd (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x07] 116; X64-NEXT: vpdpwssd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xda] 117; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 118; X64-NEXT: retq # encoding: [0xc3] 119 %x2 = load <16 x i32>, ptr %x2p 120 %1 = call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 121 %2 = bitcast i16 %x3 to <16 x i1> 122 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 123 %4 = call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) 124 %5 = bitcast i16 %x3 to <16 x i1> 125 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 126 %res1 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 127 %res2 = insertvalue { <16 x i32>, <16 x i32> } %res1, <16 x i32> %6, 1 128 ret { <16 x i32>, <16 x i32> } %res2 129} 130 131declare <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32>, <16 x i32>, <16 x i32>) 132 133define <16 x i32>@test_int_x86_avx512_ask_vpdpwssds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { 134; CHECK-LABEL: test_int_x86_avx512_ask_vpdpwssds_512: 135; CHECK: # %bb.0: 136; CHECK-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x53,0xc2] 137; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 138 %1 = call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 139 ret <16 x i32> %1 140} 141 142define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpwssds_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) { 143; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_512: 144; X86: # %bb.0: 145; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 146; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 147; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] 148; X86-NEXT: vpdpwssds (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x00] 149; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xda] 150; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 151; X86-NEXT: retl # encoding: [0xc3] 152; 153; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_512: 154; X64: # %bb.0: 155; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] 156; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 157; X64-NEXT: vpdpwssds (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x07] 158; X64-NEXT: vpdpwssds %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xda] 159; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] 160; X64-NEXT: retq # encoding: [0xc3] 161 %x2 = load <16 x i32>, ptr %x2p 162 %1 = call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) 163 %2 = bitcast i16 %x3 to <16 x i1> 164 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 165 %4 = call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) 166 %5 = bitcast i16 %x3 to <16 x i1> 167 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer 168 %res1 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 169 %res2 = insertvalue { <16 x i32>, <16 x i32> } %res1, <16 x i32> %6, 1 170 ret { <16 x i32>, <16 x i32> } %res2 171} 172