1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ 3; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ 4; RUN: FileCheck %s 5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ 6; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ 7; RUN: FileCheck %s 8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ 9; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ 10; RUN: FileCheck %s 11 12; This test case aims to test the vector multiply instructions on Power10. 13; This includes the low order and high order versions of vector multiply. 14; The low order version operates on doublewords, whereas the high order version 15; operates on signed and unsigned words and doublewords. 16; This file also includes 128 bit vector multiply instructions. 17 18define <2 x i64> @test_vmulld(<2 x i64> %a, <2 x i64> %b) { 19; CHECK-LABEL: test_vmulld: 20; CHECK: # %bb.0: # %entry 21; CHECK-NEXT: vmulld v2, v3, v2 22; CHECK-NEXT: blr 23entry: 24 %mul = mul <2 x i64> %b, %a 25 ret <2 x i64> %mul 26} 27 28define <2 x i64> @test_vmulhsd(<2 x i64> %a, <2 x i64> %b) { 29; CHECK-LABEL: test_vmulhsd: 30; CHECK: # %bb.0: # %entry 31; CHECK-NEXT: vmulhsd v2, v3, v2 32; CHECK-NEXT: blr 33entry: 34 %0 = sext <2 x i64> %a to <2 x i128> 35 %1 = sext <2 x i64> %b to <2 x i128> 36 %mul = mul <2 x i128> %1, %0 37 %shr = lshr <2 x i128> %mul, <i128 64, i128 64> 38 %tr = trunc <2 x i128> %shr to <2 x i64> 39 ret <2 x i64> %tr 40} 41 42define <2 x i64> @test_vmulhud(<2 x i64> %a, <2 x i64> %b) { 43; CHECK-LABEL: test_vmulhud: 44; CHECK: # %bb.0: # %entry 45; CHECK-NEXT: vmulhud v2, v3, v2 46; CHECK-NEXT: blr 47entry: 48 %0 = zext <2 x i64> %a to <2 x i128> 49 %1 = zext <2 x i64> %b to <2 x i128> 50 %mul = mul <2 x i128> %1, %0 51 %shr = lshr <2 x i128> %mul, <i128 64, i128 64> 52 %tr = trunc <2 x i128> %shr to <2 x i64> 53 ret <2 x i64> %tr 54} 55 56define <4 x i32> @test_vmulhsw(<4 x i32> %a, <4 x i32> %b) { 57; CHECK-LABEL: test_vmulhsw: 58; CHECK: # %bb.0: # %entry 59; CHECK-NEXT: vmulhsw v2, v3, v2 60; CHECK-NEXT: blr 61entry: 62 %0 = sext <4 x i32> %a to <4 x i64> 63 %1 = sext <4 x i32> %b to <4 x i64> 64 %mul = mul <4 x i64> %1, %0 65 %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32> 66 %tr = trunc <4 x i64> %shr to <4 x i32> 67 ret <4 x i32> %tr 68} 69 70define <4 x i32> @test_vmulhuw(<4 x i32> %a, <4 x i32> %b) { 71; CHECK-LABEL: test_vmulhuw: 72; CHECK: # %bb.0: # %entry 73; CHECK-NEXT: vmulhuw v2, v3, v2 74; CHECK-NEXT: blr 75entry: 76 %0 = zext <4 x i32> %a to <4 x i64> 77 %1 = zext <4 x i32> %b to <4 x i64> 78 %mul = mul <4 x i64> %1, %0 79 %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32> 80 %tr = trunc <4 x i64> %shr to <4 x i32> 81 ret <4 x i32> %tr 82} 83 84; Test the vector multiply high intrinsics. 85declare <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32>, <4 x i32>) 86declare <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32>, <4 x i32>) 87declare <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64>, <2 x i64>) 88declare <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64>, <2 x i64>) 89 90define <4 x i32> @test_vmulhsw_intrinsic(<4 x i32> %a, <4 x i32> %b) { 91; CHECK-LABEL: test_vmulhsw_intrinsic: 92; CHECK: # %bb.0: # %entry 93; CHECK-NEXT: vmulhsw v2, v2, v3 94; CHECK-NEXT: blr 95entry: 96 %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32> %a, <4 x i32> %b) 97 ret <4 x i32> %mulh 98} 99 100define <4 x i32> @test_vmulhuw_intrinsic(<4 x i32> %a, <4 x i32> %b) { 101; CHECK-LABEL: test_vmulhuw_intrinsic: 102; CHECK: # %bb.0: # %entry 103; CHECK-NEXT: vmulhuw v2, v2, v3 104; CHECK-NEXT: blr 105entry: 106 %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32> %a, <4 x i32> %b) 107 ret <4 x i32> %mulh 108} 109 110define <2 x i64> @test_vmulhsd_intrinsic(<2 x i64> %a, <2 x i64> %b) { 111; CHECK-LABEL: test_vmulhsd_intrinsic: 112; CHECK: # %bb.0: # %entry 113; CHECK-NEXT: vmulhsd v2, v2, v3 114; CHECK-NEXT: blr 115entry: 116 %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64> %a, <2 x i64> %b) 117 ret <2 x i64> %mulh 118} 119 120define <2 x i64> @test_vmulhud_intrinsic(<2 x i64> %a, <2 x i64> %b) { 121; CHECK-LABEL: test_vmulhud_intrinsic: 122; CHECK: # %bb.0: # %entry 123; CHECK-NEXT: vmulhud v2, v2, v3 124; CHECK-NEXT: blr 125entry: 126 %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64> %a, <2 x i64> %b) 127 ret <2 x i64> %mulh 128} 129 130declare <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64>, <2 x i64>) nounwind readnone 131declare <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64>, <2 x i64>) nounwind readnone 132declare <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64>, <2 x i64>) nounwind readnone 133declare <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64>, <2 x i64>) nounwind readnone 134declare <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64>, <2 x i64>, <1 x i128>) nounwind readnone 135 136define <1 x i128> @test_vmuleud(<2 x i64> %x, <2 x i64> %y) nounwind readnone { 137; CHECK-LABEL: test_vmuleud: 138; CHECK: # %bb.0: 139; CHECK-NEXT: vmuleud v2, v2, v3 140; CHECK-NEXT: blr 141 %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuleud(<2 x i64> %x, <2 x i64> %y) 142 ret <1 x i128> %tmp 143} 144 145define <1 x i128> @test_vmuloud(<2 x i64> %x, <2 x i64> %y) nounwind readnone { 146; CHECK-LABEL: test_vmuloud: 147; CHECK: # %bb.0: 148; CHECK-NEXT: vmuloud v2, v2, v3 149; CHECK-NEXT: blr 150 %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmuloud(<2 x i64> %x, <2 x i64> %y) 151 ret <1 x i128> %tmp 152} 153 154define <1 x i128> @test_vmulesd(<2 x i64> %x, <2 x i64> %y) nounwind readnone { 155; CHECK-LABEL: test_vmulesd: 156; CHECK: # %bb.0: 157; CHECK-NEXT: vmulesd v2, v2, v3 158; CHECK-NEXT: blr 159 %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulesd(<2 x i64> %x, <2 x i64> %y) 160 ret <1 x i128> %tmp 161} 162 163define <1 x i128> @test_vmulosd(<2 x i64> %x, <2 x i64> %y) nounwind readnone { 164; CHECK-LABEL: test_vmulosd: 165; CHECK: # %bb.0: 166; CHECK-NEXT: vmulosd v2, v2, v3 167; CHECK-NEXT: blr 168 %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmulosd(<2 x i64> %x, <2 x i64> %y) 169 ret <1 x i128> %tmp 170} 171 172define <1 x i128> @test_vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) nounwind readnone { 173; CHECK-LABEL: test_vmsumcud: 174; CHECK: # %bb.0: 175; CHECK-NEXT: vmsumcud v2, v2, v3, v4 176; CHECK-NEXT: blr 177 %tmp = tail call <1 x i128> @llvm.ppc.altivec.vmsumcud(<2 x i64> %x, <2 x i64> %y, <1 x i128> %z) 178 ret <1 x i128> %tmp 179} 180