1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK 3 4define <16 x i32> @test_pmaddwd_v32i16_add_v16i32(<16 x i32> %a0, <32 x i16> %a1, <32 x i16> %a2) { 5; CHECK-LABEL: test_pmaddwd_v32i16_add_v16i32: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 8; CHECK-NEXT: retq 9 %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a1, <32 x i16> %a2) 10 %2 = add <16 x i32> %1, %a0 11 ret <16 x i32> %2 12} 13 14define <16 x i32> @test_pmaddwd_v32i16_add_v16i32_commute(<16 x i32> %a0, <32 x i16> %a1, <32 x i16> %a2) { 15; CHECK-LABEL: test_pmaddwd_v32i16_add_v16i32_commute: 16; CHECK: # %bb.0: 17; CHECK-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 18; CHECK-NEXT: retq 19 %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a1, <32 x i16> %a2) 20 %2 = add <16 x i32> %a0, %1 21 ret <16 x i32> %2 22} 23 24define <16 x i32> @test_pmaddwd_v32i16_add_v16i32_load1(<16 x i32> %a0, ptr %p1, <32 x i16> %a2) { 25; CHECK-LABEL: test_pmaddwd_v32i16_add_v16i32_load1: 26; CHECK: # %bb.0: 27; CHECK-NEXT: vpdpwssd (%rdi), %zmm1, %zmm0 28; CHECK-NEXT: retq 29 %a1 = load <32 x i16>, ptr %p1 30 %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a1, <32 x i16> %a2) 31 %2 = add <16 x i32> %1, %a0 32 ret <16 x i32> %2 33} 34 35define <16 x i32> @test_pmaddwd_v32i16_add_v16i32_load2(<16 x i32> %a0, <32 x i16> %a1, ptr %p2) { 36; CHECK-LABEL: test_pmaddwd_v32i16_add_v16i32_load2: 37; CHECK: # %bb.0: 38; CHECK-NEXT: vpdpwssd (%rdi), %zmm1, %zmm0 39; CHECK-NEXT: retq 40 %a2 = load <32 x i16>, ptr %p2 41 %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a1, <32 x i16> %a2) 42 %2 = add <16 x i32> %1, %a0 43 ret <16 x i32> %2 44} 45 46define <16 x i32> @test_pmaddwd_v32i16_add_v16i32_commute_load1(<16 x i32> %a0, ptr %p1, <32 x i16> %a2) { 47; CHECK-LABEL: test_pmaddwd_v32i16_add_v16i32_commute_load1: 48; CHECK: # %bb.0: 49; CHECK-NEXT: vpdpwssd (%rdi), %zmm1, %zmm0 50; CHECK-NEXT: retq 51 %a1 = load <32 x i16>, ptr %p1 52 %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a1, <32 x i16> %a2) 53 %2 = add <16 x i32> %a0, %1 54 ret <16 x i32> %2 55} 56 57define <16 x i32> @test_pmaddwd_v32i16_add_v16i32_commute_load2(<16 x i32> %a0, <32 x i16> %a1, ptr %p2) { 58; CHECK-LABEL: test_pmaddwd_v32i16_add_v16i32_commute_load2: 59; CHECK: # %bb.0: 60; CHECK-NEXT: vpdpwssd (%rdi), %zmm1, %zmm0 61; CHECK-NEXT: retq 62 %a2 = load <32 x i16>, ptr %p2 63 %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a1, <32 x i16> %a2) 64 %2 = add <16 x i32> %a0, %1 65 ret <16 x i32> %2 66} 67 68declare <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>) 69