1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vnni,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 6declare <8 x i32> @llvm.x86.avx512.maskz.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 7 8define <8 x i32>@test_int_x86_avx512_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 9; CHECK-LABEL: test_int_x86_avx512_vpdpbusd_256: 10; CHECK: # %bb.0: 11; CHECK-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x50,0xc2] 12; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 13 %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 14 ret <8 x i32> %res 15} 16 17define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) { 18; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_256: 19; X86: # %bb.0: 20; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 21; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 22; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 23; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 24; X86-NEXT: vpdpbusd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x00] 25; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xda] 26; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 27; X86-NEXT: retl # encoding: [0xc3] 28; 29; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_256: 30; X64: # %bb.0: 31; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 32; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 33; X64-NEXT: vpdpbusd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x07] 34; X64-NEXT: vpdpbusd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xda] 35; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 36; X64-NEXT: retq # encoding: [0xc3] 37 %x2 = load <8 x i32>, ptr %x2p 38 %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 39 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) 40 %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0 41 %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %res1, 1 42 ret { <8 x i32>, <8 x i32> } %res3 43} 44 45declare <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 46declare <4 x i32> @llvm.x86.avx512.maskz.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 47 48define <4 x i32>@test_int_x86_avx512_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 49; CHECK-LABEL: test_int_x86_avx512_vpdpbusd_128: 50; CHECK: # %bb.0: 51; CHECK-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x50,0xc2] 52; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 53 %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 54 ret <4 x i32> %res 55} 56 57define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) { 58; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_128: 59; X86: # %bb.0: 60; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 61; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 62; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 63; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 64; X86-NEXT: vpdpbusd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x00] 65; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xda] 66; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 67; X86-NEXT: retl # encoding: [0xc3] 68; 69; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_128: 70; X64: # %bb.0: 71; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 72; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 73; X64-NEXT: vpdpbusd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x07] 74; X64-NEXT: vpdpbusd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xda] 75; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 76; X64-NEXT: retq # encoding: [0xc3] 77 %x2 = load <4 x i32>, ptr %x2p 78 %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 79 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) 80 %res2 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0 81 %res3 = insertvalue { <4 x i32>, <4 x i32> } %res2, <4 x i32> %res1, 1 82 ret { <4 x i32>, <4 x i32> } %res3 83} 84 85declare <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 86declare <8 x i32> @llvm.x86.avx512.maskz.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 87 88define <8 x i32>@test_int_x86_avx512_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 89; CHECK-LABEL: test_int_x86_avx512_vpdpbusds_256: 90; CHECK: # %bb.0: 91; CHECK-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x51,0xc2] 92; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 93 %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 94 ret <8 x i32> %res 95} 96 97define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) { 98; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_256: 99; X86: # %bb.0: 100; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 101; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 102; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 103; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 104; X86-NEXT: vpdpbusds (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x00] 105; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xda] 106; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 107; X86-NEXT: retl # encoding: [0xc3] 108; 109; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_256: 110; X64: # %bb.0: 111; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 112; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 113; X64-NEXT: vpdpbusds (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x07] 114; X64-NEXT: vpdpbusds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xda] 115; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 116; X64-NEXT: retq # encoding: [0xc3] 117 %x2 = load <8 x i32>, ptr %x2p 118 %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 119 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) 120 %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0 121 %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %res1, 1 122 ret { <8 x i32>, <8 x i32> } %res3 123} 124 125declare <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 126declare <4 x i32> @llvm.x86.avx512.maskz.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 127 128define <4 x i32>@test_int_x86_avx512_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 129; CHECK-LABEL: test_int_x86_avx512_vpdpbusds_128: 130; CHECK: # %bb.0: 131; CHECK-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x51,0xc2] 132; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 133 %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 134 ret <4 x i32> %res 135} 136 137define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) { 138; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_128: 139; X86: # %bb.0: 140; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 141; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 142; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 143; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 144; X86-NEXT: vpdpbusds (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x00] 145; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xda] 146; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 147; X86-NEXT: retl # encoding: [0xc3] 148; 149; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_128: 150; X64: # %bb.0: 151; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 152; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 153; X64-NEXT: vpdpbusds (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x07] 154; X64-NEXT: vpdpbusds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xda] 155; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 156; X64-NEXT: retq # encoding: [0xc3] 157 %x2 = load <4 x i32>, ptr %x2p 158 %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 159 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) 160 %res2 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0 161 %res3 = insertvalue { <4 x i32>, <4 x i32> } %res2, <4 x i32> %res1, 1 162 ret { <4 x i32>, <4 x i32> } %res3 163} 164 165declare <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 166declare <8 x i32> @llvm.x86.avx512.maskz.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 167 168define <8 x i32>@test_int_x86_avx512_vpdpwssd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 169; CHECK-LABEL: test_int_x86_avx512_vpdpwssd_256: 170; CHECK: # %bb.0: 171; CHECK-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x52,0xc2] 172; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 173 %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 174 ret <8 x i32> %res 175} 176 177define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpwssd_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) { 178; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_256: 179; X86: # %bb.0: 180; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 181; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 182; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 183; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 184; X86-NEXT: vpdpwssd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x00] 185; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xda] 186; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 187; X86-NEXT: retl # encoding: [0xc3] 188; 189; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_256: 190; X64: # %bb.0: 191; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 192; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 193; X64-NEXT: vpdpwssd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x07] 194; X64-NEXT: vpdpwssd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xda] 195; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 196; X64-NEXT: retq # encoding: [0xc3] 197 %x2 = load <8 x i32>, ptr %x2p 198 %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 199 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) 200 %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0 201 %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %res1, 1 202 ret { <8 x i32>, <8 x i32> } %res3 203} 204 205declare <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 206declare <4 x i32> @llvm.x86.avx512.maskz.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 207 208define <4 x i32>@test_int_x86_avx512_vpdpwssd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 209; CHECK-LABEL: test_int_x86_avx512_vpdpwssd_128: 210; CHECK: # %bb.0: 211; CHECK-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x52,0xc2] 212; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 213 %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 214 ret <4 x i32> %res 215} 216 217define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpwssd_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) { 218; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_128: 219; X86: # %bb.0: 220; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 221; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 222; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 223; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 224; X86-NEXT: vpdpwssd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x00] 225; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xda] 226; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 227; X86-NEXT: retl # encoding: [0xc3] 228; 229; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_128: 230; X64: # %bb.0: 231; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 232; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 233; X64-NEXT: vpdpwssd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x07] 234; X64-NEXT: vpdpwssd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xda] 235; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 236; X64-NEXT: retq # encoding: [0xc3] 237 %x2 = load <4 x i32>, ptr %x2p 238 %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 239 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) 240 %res2 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0 241 %res3 = insertvalue { <4 x i32>, <4 x i32> } %res2, <4 x i32> %res1, 1 242 ret { <4 x i32>, <4 x i32> } %res3 243} 244 245 246declare <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 247declare <8 x i32> @llvm.x86.avx512.maskz.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) 248 249define <8 x i32>@test_int_x86_avx512_vpdpwssds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) { 250; CHECK-LABEL: test_int_x86_avx512_vpdpwssds_256: 251; CHECK: # %bb.0: 252; CHECK-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x53,0xc2] 253; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 254 %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) 255 ret <8 x i32> %res 256} 257 258define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpwssds_256(<8 x i32> %x0, <8 x i32> %x1, ptr %x2p, <8 x i32> %x4, i8 %x3) { 259; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_256: 260; X86: # %bb.0: 261; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 262; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 263; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 264; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 265; X86-NEXT: vpdpwssds (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x00] 266; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xda] 267; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 268; X86-NEXT: retl # encoding: [0xc3] 269; 270; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_256: 271; X64: # %bb.0: 272; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 273; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 274; X64-NEXT: vpdpwssds (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x07] 275; X64-NEXT: vpdpwssds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xda] 276; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] 277; X64-NEXT: retq # encoding: [0xc3] 278 %x2 = load <8 x i32>, ptr %x2p 279 %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) 280 %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) 281 %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0 282 %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %res1, 1 283 ret { <8 x i32>, <8 x i32> } %res3 284} 285 286declare <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 287declare <4 x i32> @llvm.x86.avx512.maskz.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) 288 289define <4 x i32>@test_int_x86_avx512_vpdpwssds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) { 290; CHECK-LABEL: test_int_x86_avx512_vpdpwssds_128: 291; CHECK: # %bb.0: 292; CHECK-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x53,0xc2] 293; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 294 %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) 295 ret <4 x i32> %res 296} 297 298define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpwssds_128(<4 x i32> %x0, <4 x i32> %x1, ptr %x2p, <4 x i32> %x4, i8 %x3) { 299; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_128: 300; X86: # %bb.0: 301; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 302; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 303; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] 304; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] 305; X86-NEXT: vpdpwssds (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x00] 306; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xda] 307; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 308; X86-NEXT: retl # encoding: [0xc3] 309; 310; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_128: 311; X64: # %bb.0: 312; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 313; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] 314; X64-NEXT: vpdpwssds (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x07] 315; X64-NEXT: vpdpwssds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xda] 316; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] 317; X64-NEXT: retq # encoding: [0xc3] 318 %x2 = load <4 x i32>, ptr %x2p 319 %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) 320 %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) 321 %res2 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0 322 %res3 = insertvalue { <4 x i32>, <4 x i32> } %res2, <4 x i32> %res1, 1 323 ret { <4 x i32>, <4 x i32> } %res3 324} 325