1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s --mattr=+sve -o - | FileCheck %s 3 4target triple = "aarch64" 5 6; Expected to transform 7define <vscale x 2 x double> @complex_mul_v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 8; CHECK-LABEL: complex_mul_v2f64: 9; CHECK: // %bb.0: // %entry 10; CHECK-NEXT: mov z2.d, #0 // =0x0 11; CHECK-NEXT: ptrue p0.d 12; CHECK-NEXT: fcmla z2.d, p0/m, z1.d, z0.d, #0 13; CHECK-NEXT: fcmla z2.d, p0/m, z1.d, z0.d, #90 14; CHECK-NEXT: mov z0.d, z2.d 15; CHECK-NEXT: ret 16entry: 17 %a.deinterleaved = tail call { <vscale x 1 x double>, <vscale x 1 x double> } @llvm.vector.deinterleave2.nxv2f64(<vscale x 2 x double> %a) 18 %a.real = extractvalue { <vscale x 1 x double>, <vscale x 1 x double> } %a.deinterleaved, 0 19 %a.imag = extractvalue { <vscale x 1 x double>, <vscale x 1 x double> } %a.deinterleaved, 1 20 %b.deinterleaved = tail call { <vscale x 1 x double>, <vscale x 1 x double> } @llvm.vector.deinterleave2.nxv2f64(<vscale x 2 x double> %b) 21 %b.real = extractvalue { <vscale x 1 x double>, <vscale x 1 x double> } %b.deinterleaved, 0 22 %b.imag = extractvalue { <vscale x 1 x double>, <vscale x 1 x double> } %b.deinterleaved, 1 23 %0 = fmul fast <vscale x 1 x double> %b.imag, %a.real 24 %1 = fmul fast <vscale x 1 x double> %b.real, %a.imag 25 %2 = fadd fast <vscale x 1 x double> %1, %0 26 %3 = fmul fast <vscale x 1 x double> %b.real, %a.real 27 %4 = fmul fast <vscale x 1 x double> %a.imag, %b.imag 28 %5 = fsub fast <vscale x 1 x double> %3, %4 29 %interleaved.vec = tail call <vscale x 2 x double> @llvm.vector.interleave2.nxv2f64(<vscale x 1 x double> %5, <vscale x 1 x double> %2) 30 ret <vscale x 2 x double> %interleaved.vec 31} 32 33; Expected to transform 34define <vscale x 4 x double> @complex_mul_v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) { 35; CHECK-LABEL: complex_mul_v4f64: 36; CHECK: // %bb.0: // %entry 37; CHECK-NEXT: mov z4.d, #0 // =0x0 38; CHECK-NEXT: ptrue p0.d 39; CHECK-NEXT: mov z5.d, z4.d 40; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #0 41; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #0 42; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #90 43; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #90 44; CHECK-NEXT: mov z1.d, z4.d 45; CHECK-NEXT: mov z0.d, z5.d 46; CHECK-NEXT: ret 47entry: 48 %a.deinterleaved = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a) 49 %a.real = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %a.deinterleaved, 0 50 %a.imag = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %a.deinterleaved, 1 51 %b.deinterleaved = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b) 52 %b.real = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %b.deinterleaved, 0 53 %b.imag = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %b.deinterleaved, 1 54 %0 = fmul fast <vscale x 2 x double> %b.imag, %a.real 55 %1 = fmul fast <vscale x 2 x double> %b.real, %a.imag 56 %2 = fadd fast <vscale x 2 x double> %1, %0 57 %3 = fmul fast <vscale x 2 x double> %b.real, %a.real 58 %4 = fmul fast <vscale x 2 x double> %a.imag, %b.imag 59 %5 = fsub fast <vscale x 2 x double> %3, %4 60 %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %5, <vscale x 2 x double> %2) 61 ret <vscale x 4 x double> %interleaved.vec 62} 63 64; Expected to transform 65define <vscale x 8 x double> @complex_mul_v8f64(<vscale x 8 x double> %a, <vscale x 8 x double> %b) { 66; CHECK-LABEL: complex_mul_v8f64: 67; CHECK: // %bb.0: // %entry 68; CHECK-NEXT: mov z24.d, #0 // =0x0 69; CHECK-NEXT: ptrue p0.d 70; CHECK-NEXT: mov z25.d, z24.d 71; CHECK-NEXT: mov z26.d, z24.d 72; CHECK-NEXT: mov z27.d, z24.d 73; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z3.d, #0 74; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z0.d, #0 75; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z1.d, #0 76; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z2.d, #0 77; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z3.d, #90 78; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z0.d, #90 79; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z1.d, #90 80; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z2.d, #90 81; CHECK-NEXT: mov z3.d, z24.d 82; CHECK-NEXT: mov z0.d, z25.d 83; CHECK-NEXT: mov z1.d, z26.d 84; CHECK-NEXT: mov z2.d, z27.d 85; CHECK-NEXT: ret 86entry: 87 %a.deinterleaved = tail call { <vscale x 4 x double>, <vscale x 4 x double> } @llvm.vector.deinterleave2.nxv8f64(<vscale x 8 x double> %a) 88 %a.real = extractvalue { <vscale x 4 x double>, <vscale x 4 x double> } %a.deinterleaved, 0 89 %a.imag = extractvalue { <vscale x 4 x double>, <vscale x 4 x double> } %a.deinterleaved, 1 90 %b.deinterleaved = tail call { <vscale x 4 x double>, <vscale x 4 x double> } @llvm.vector.deinterleave2.nxv8f64(<vscale x 8 x double> %b) 91 %b.real = extractvalue { <vscale x 4 x double>, <vscale x 4 x double> } %b.deinterleaved, 0 92 %b.imag = extractvalue { <vscale x 4 x double>, <vscale x 4 x double> } %b.deinterleaved, 1 93 %0 = fmul fast <vscale x 4 x double> %b.imag, %a.real 94 %1 = fmul fast <vscale x 4 x double> %b.real, %a.imag 95 %2 = fadd fast <vscale x 4 x double> %1, %0 96 %3 = fmul fast <vscale x 4 x double> %b.real, %a.real 97 %4 = fmul fast <vscale x 4 x double> %a.imag, %b.imag 98 %5 = fsub fast <vscale x 4 x double> %3, %4 99 %interleaved.vec = tail call <vscale x 8 x double> @llvm.vector.interleave2.nxv8f64(<vscale x 4 x double> %5, <vscale x 4 x double> %2) 100 ret <vscale x 8 x double> %interleaved.vec 101} 102 103declare { <vscale x 1 x double>, <vscale x 1 x double> } @llvm.vector.deinterleave2.nxv2f64(<vscale x 2 x double>) 104declare <vscale x 2 x double> @llvm.vector.interleave2.nxv2f64(<vscale x 1 x double>, <vscale x 1 x double>) 105 106declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double>) 107declare <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double>, <vscale x 2 x double>) 108 109declare { <vscale x 4 x double>, <vscale x 4 x double> } @llvm.vector.deinterleave2.nxv8f64(<vscale x 8 x double>) 110declare <vscale x 8 x double> @llvm.vector.interleave2.nxv8f64(<vscale x 4 x double>, <vscale x 4 x double>) 111