1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s --mattr=+sve2 -o - | FileCheck %s 3 4target triple = "aarch64" 5 6; Expected to transform 7define <vscale x 4 x i32> @complex_mul_v4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 8; CHECK-LABEL: complex_mul_v4i32: 9; CHECK: // %bb.0: // %entry 10; CHECK-NEXT: mov z2.s, #0 // =0x0 11; CHECK-NEXT: cmla z2.s, z1.s, z0.s, #0 12; CHECK-NEXT: cmla z2.s, z1.s, z0.s, #90 13; CHECK-NEXT: mov z0.d, z2.d 14; CHECK-NEXT: ret 15entry: 16 %a.deinterleaved = tail call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %a) 17 %a.real = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %a.deinterleaved, 0 18 %a.imag = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %a.deinterleaved, 1 19 %b.deinterleaved = tail call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %b) 20 %b.real = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %b.deinterleaved, 0 21 %b.imag = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %b.deinterleaved, 1 22 %0 = mul <vscale x 2 x i32> %b.imag, %a.real 23 %1 = mul <vscale x 2 x i32> %b.real, %a.imag 24 %2 = add <vscale x 2 x i32> %1, %0 25 %3 = mul <vscale x 2 x i32> %b.real, %a.real 26 %4 = mul <vscale x 2 x i32> %a.imag, %b.imag 27 %5 = sub <vscale x 2 x i32> %3, %4 28 %interleaved.vec = tail call <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32> %5, <vscale x 2 x i32> %2) 29 ret <vscale x 4 x i32> %interleaved.vec 30} 31 32; Expected to transform 33define <vscale x 8 x i32> @complex_mul_v8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) { 34; CHECK-LABEL: complex_mul_v8i32: 35; CHECK: // %bb.0: // %entry 36; CHECK-NEXT: mov z4.s, #0 // =0x0 37; CHECK-NEXT: mov z5.d, z4.d 38; CHECK-NEXT: cmla z4.s, z3.s, z1.s, #0 39; CHECK-NEXT: cmla z5.s, z2.s, z0.s, #0 40; CHECK-NEXT: cmla z4.s, z3.s, z1.s, #90 41; CHECK-NEXT: cmla z5.s, z2.s, z0.s, #90 42; CHECK-NEXT: mov z1.d, z4.d 43; CHECK-NEXT: mov z0.d, z5.d 44; CHECK-NEXT: ret 45entry: 46 %a.deinterleaved = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %a) 47 %a.real = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %a.deinterleaved, 0 48 %a.imag = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %a.deinterleaved, 1 49 %b.deinterleaved = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %b) 50 %b.real = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %b.deinterleaved, 0 51 %b.imag = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } %b.deinterleaved, 1 52 %0 = mul <vscale x 4 x i32> %b.imag, %a.real 53 %1 = mul <vscale x 4 x i32> %b.real, %a.imag 54 %2 = add <vscale x 4 x i32> %1, %0 55 %3 = mul <vscale x 4 x i32> %b.real, %a.real 56 %4 = mul <vscale x 4 x i32> %a.imag, %b.imag 57 %5 = sub <vscale x 4 x i32> %3, %4 58 %interleaved.vec = tail call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %5, <vscale x 4 x i32> %2) 59 ret <vscale x 8 x i32> %interleaved.vec 60} 61 62; Expected to transform 63define <vscale x 16 x i32> @complex_mul_v16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b) { 64; CHECK-LABEL: complex_mul_v16i32: 65; CHECK: // %bb.0: // %entry 66; CHECK-NEXT: mov z24.s, #0 // =0x0 67; CHECK-NEXT: mov z25.d, z24.d 68; CHECK-NEXT: mov z26.d, z24.d 69; CHECK-NEXT: mov z27.d, z24.d 70; CHECK-NEXT: cmla z24.s, z7.s, z3.s, #0 71; CHECK-NEXT: cmla z25.s, z4.s, z0.s, #0 72; CHECK-NEXT: cmla z26.s, z5.s, z1.s, #0 73; CHECK-NEXT: cmla z27.s, z6.s, z2.s, #0 74; CHECK-NEXT: cmla z24.s, z7.s, z3.s, #90 75; CHECK-NEXT: cmla z25.s, z4.s, z0.s, #90 76; CHECK-NEXT: cmla z26.s, z5.s, z1.s, #90 77; CHECK-NEXT: cmla z27.s, z6.s, z2.s, #90 78; CHECK-NEXT: mov z3.d, z24.d 79; CHECK-NEXT: mov z0.d, z25.d 80; CHECK-NEXT: mov z1.d, z26.d 81; CHECK-NEXT: mov z2.d, z27.d 82; CHECK-NEXT: ret 83entry: 84 %a.deinterleaved = tail call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> %a) 85 %a.real = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %a.deinterleaved, 0 86 %a.imag = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %a.deinterleaved, 1 87 %b.deinterleaved = tail call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> %b) 88 %b.real = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %b.deinterleaved, 0 89 %b.imag = extractvalue { <vscale x 8 x i32>, <vscale x 8 x i32> } %b.deinterleaved, 1 90 %0 = mul <vscale x 8 x i32> %b.imag, %a.real 91 %1 = mul <vscale x 8 x i32> %b.real, %a.imag 92 %2 = add <vscale x 8 x i32> %1, %0 93 %3 = mul <vscale x 8 x i32> %b.real, %a.real 94 %4 = mul <vscale x 8 x i32> %a.imag, %b.imag 95 %5 = sub <vscale x 8 x i32> %3, %4 96 %interleaved.vec = tail call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> %5, <vscale x 8 x i32> %2) 97 ret <vscale x 16 x i32> %interleaved.vec 98} 99 100declare { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32>) 101declare <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32>, <vscale x 2 x i32>) 102 103declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32>) 104declare <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 105 106declare { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32>) 107declare <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32>, <vscale x 8 x i32>) 108 109