1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s --mattr=+sve -o - | FileCheck %s 3 4target triple = "aarch64" 5 6; Expected to transform 7define <vscale x 4 x half> @complex_mul_v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 8; CHECK-LABEL: complex_mul_v4f16: 9; CHECK: // %bb.0: // %entry 10; CHECK-NEXT: uzp2 z2.s, z0.s, z0.s 11; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 12; CHECK-NEXT: ptrue p0.d 13; CHECK-NEXT: uzp2 z3.s, z1.s, z0.s 14; CHECK-NEXT: uunpklo z0.d, z0.s 15; CHECK-NEXT: uunpklo z2.d, z2.s 16; CHECK-NEXT: uunpklo z3.d, z3.s 17; CHECK-NEXT: uzp1 z1.s, z1.s, z0.s 18; CHECK-NEXT: uunpklo z1.d, z1.s 19; CHECK-NEXT: movprfx z4, z3 20; CHECK-NEXT: fmul z4.h, p0/m, z4.h, z0.h 21; CHECK-NEXT: fmul z3.h, p0/m, z3.h, z2.h 22; CHECK-NEXT: fmad z2.h, p0/m, z1.h, z4.h 23; CHECK-NEXT: fnmsb z0.h, p0/m, z1.h, z3.h 24; CHECK-NEXT: zip2 z1.d, z0.d, z2.d 25; CHECK-NEXT: zip1 z0.d, z0.d, z2.d 26; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 27; CHECK-NEXT: ret 28entry: 29 %a.deinterleaved = tail call { <vscale x 2 x half>, <vscale x 2 x half> } @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half> %a) 30 %a.real = extractvalue { <vscale x 2 x half>, <vscale x 2 x half> } %a.deinterleaved, 0 31 %a.imag = extractvalue { <vscale x 2 x half>, <vscale x 2 x half> } %a.deinterleaved, 1 32 %b.deinterleaved = tail call { <vscale x 2 x half>, <vscale x 2 x half> } @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half> %b) 33 %b.real = extractvalue { <vscale x 2 x half>, <vscale x 2 x half> } %b.deinterleaved, 0 34 %b.imag = extractvalue { <vscale x 2 x half>, <vscale x 2 x half> } %b.deinterleaved, 1 35 %0 = fmul fast <vscale x 2 x half> %b.imag, %a.real 36 %1 = fmul fast <vscale x 2 x half> %b.real, %a.imag 37 %2 = fadd fast <vscale x 2 x half> %1, %0 38 %3 = fmul fast <vscale x 2 x half> %b.real, %a.real 39 %4 = fmul fast <vscale x 2 x half> %a.imag, %b.imag 40 %5 = fsub fast <vscale x 2 x half> %3, %4 41 %interleaved.vec = tail call <vscale x 4 x half> @llvm.vector.interleave2.nxv4f16(<vscale x 2 x half> %5, <vscale x 2 x half> %2) 42 ret <vscale x 4 x half> %interleaved.vec 43} 44 45; Expected to transform 46define <vscale x 8 x half> @complex_mul_v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 47; CHECK-LABEL: complex_mul_v8f16: 48; CHECK: // %bb.0: // %entry 49; CHECK-NEXT: mov z2.h, #0 // =0x0 50; CHECK-NEXT: ptrue p0.h 51; CHECK-NEXT: fcmla z2.h, p0/m, z1.h, z0.h, #0 52; CHECK-NEXT: fcmla z2.h, p0/m, z1.h, z0.h, #90 53; CHECK-NEXT: mov z0.d, z2.d 54; CHECK-NEXT: ret 55entry: 56 %a.deinterleaved = tail call { <vscale x 4 x half>, <vscale x 4 x half> } @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half> %a) 57 %a.real = extractvalue { <vscale x 4 x half>, <vscale x 4 x half> } %a.deinterleaved, 0 58 %a.imag = extractvalue { <vscale x 4 x half>, <vscale x 4 x half> } %a.deinterleaved, 1 59 %b.deinterleaved = tail call { <vscale x 4 x half>, <vscale x 4 x half> } @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half> %b) 60 %b.real = extractvalue { <vscale x 4 x half>, <vscale x 4 x half> } %b.deinterleaved, 0 61 %b.imag = extractvalue { <vscale x 4 x half>, <vscale x 4 x half> } %b.deinterleaved, 1 62 %0 = fmul fast <vscale x 4 x half> %b.imag, %a.real 63 %1 = fmul fast <vscale x 4 x half> %b.real, %a.imag 64 %2 = fadd fast <vscale x 4 x half> %1, %0 65 %3 = fmul fast <vscale x 4 x half> %b.real, %a.real 66 %4 = fmul fast <vscale x 4 x half> %a.imag, %b.imag 67 %5 = fsub fast <vscale x 4 x half> %3, %4 68 %interleaved.vec = tail call <vscale x 8 x half> @llvm.vector.interleave2.nxv8f16(<vscale x 4 x half> %5, <vscale x 4 x half> %2) 69 ret <vscale x 8 x half> %interleaved.vec 70} 71; Expected to transform 72define <vscale x 16 x half> @complex_mul_v16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) { 73; CHECK-LABEL: complex_mul_v16f16: 74; CHECK: // %bb.0: // %entry 75; CHECK-NEXT: mov z4.h, #0 // =0x0 76; CHECK-NEXT: ptrue p0.h 77; CHECK-NEXT: mov z5.d, z4.d 78; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #0 79; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #0 80; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #90 81; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #90 82; CHECK-NEXT: mov z1.d, z4.d 83; CHECK-NEXT: mov z0.d, z5.d 84; CHECK-NEXT: ret 85entry: 86 %a.deinterleaved = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half> %a) 87 %a.real = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } %a.deinterleaved, 0 88 %a.imag = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } %a.deinterleaved, 1 89 %b.deinterleaved = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half> %b) 90 %b.real = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } %b.deinterleaved, 0 91 %b.imag = extractvalue { <vscale x 8 x half>, <vscale x 8 x half> } %b.deinterleaved, 1 92 %0 = fmul fast <vscale x 8 x half> %b.imag, %a.real 93 %1 = fmul fast <vscale x 8 x half> %b.real, %a.imag 94 %2 = fadd fast <vscale x 8 x half> %1, %0 95 %3 = fmul fast <vscale x 8 x half> %b.real, %a.real 96 %4 = fmul fast <vscale x 8 x half> %a.imag, %b.imag 97 %5 = fsub fast <vscale x 8 x half> %3, %4 98 %interleaved.vec = tail call <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x half> %5, <vscale x 8 x half> %2) 99 ret <vscale x 16 x half> %interleaved.vec 100} 101 102; Expected to transform 103define <vscale x 32 x half> @complex_mul_v32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b) { 104; CHECK-LABEL: complex_mul_v32f16: 105; CHECK: // %bb.0: // %entry 106; CHECK-NEXT: mov z24.h, #0 // =0x0 107; CHECK-NEXT: ptrue p0.h 108; CHECK-NEXT: mov z25.d, z24.d 109; CHECK-NEXT: mov z26.d, z24.d 110; CHECK-NEXT: mov z27.d, z24.d 111; CHECK-NEXT: fcmla z24.h, p0/m, z7.h, z3.h, #0 112; CHECK-NEXT: fcmla z25.h, p0/m, z4.h, z0.h, #0 113; CHECK-NEXT: fcmla z26.h, p0/m, z5.h, z1.h, #0 114; CHECK-NEXT: fcmla z27.h, p0/m, z6.h, z2.h, #0 115; CHECK-NEXT: fcmla z24.h, p0/m, z7.h, z3.h, #90 116; CHECK-NEXT: fcmla z25.h, p0/m, z4.h, z0.h, #90 117; CHECK-NEXT: fcmla z26.h, p0/m, z5.h, z1.h, #90 118; CHECK-NEXT: fcmla z27.h, p0/m, z6.h, z2.h, #90 119; CHECK-NEXT: mov z3.d, z24.d 120; CHECK-NEXT: mov z0.d, z25.d 121; CHECK-NEXT: mov z1.d, z26.d 122; CHECK-NEXT: mov z2.d, z27.d 123; CHECK-NEXT: ret 124entry: 125 %a.deinterleaved = tail call { <vscale x 16 x half>, <vscale x 16 x half> } @llvm.vector.deinterleave2.nxv32f16(<vscale x 32 x half> %a) 126 %a.real = extractvalue { <vscale x 16 x half>, <vscale x 16 x half> } %a.deinterleaved, 0 127 %a.imag = extractvalue { <vscale x 16 x half>, <vscale x 16 x half> } %a.deinterleaved, 1 128 %b.deinterleaved = tail call { <vscale x 16 x half>, <vscale x 16 x half> } @llvm.vector.deinterleave2.nxv32f16(<vscale x 32 x half> %b) 129 %b.real = extractvalue { <vscale x 16 x half>, <vscale x 16 x half> } %b.deinterleaved, 0 130 %b.imag = extractvalue { <vscale x 16 x half>, <vscale x 16 x half> } %b.deinterleaved, 1 131 %0 = fmul fast <vscale x 16 x half> %b.imag, %a.real 132 %1 = fmul fast <vscale x 16 x half> %b.real, %a.imag 133 %2 = fadd fast <vscale x 16 x half> %1, %0 134 %3 = fmul fast <vscale x 16 x half> %b.real, %a.real 135 %4 = fmul fast <vscale x 16 x half> %a.imag, %b.imag 136 %5 = fsub fast <vscale x 16 x half> %3, %4 137 %interleaved.vec = tail call <vscale x 32 x half> @llvm.vector.interleave2.nxv32f16(<vscale x 16 x half> %5, <vscale x 16 x half> %2) 138 ret <vscale x 32 x half> %interleaved.vec 139} 140 141declare { <vscale x 2 x half>, <vscale x 2 x half> } @llvm.vector.deinterleave2.nxv4f16(<vscale x 4 x half>) 142declare <vscale x 4 x half> @llvm.vector.interleave2.nxv4f16(<vscale x 2 x half>, <vscale x 2 x half>) 143 144declare { <vscale x 4 x half>, <vscale x 4 x half> } @llvm.vector.deinterleave2.nxv8f16(<vscale x 8 x half>) 145declare <vscale x 8 x half> @llvm.vector.interleave2.nxv8f16(<vscale x 4 x half>, <vscale x 4 x half>) 146 147declare { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half>) 148declare <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x half>, <vscale x 8 x half>) 149 150declare { <vscale x 16 x half>, <vscale x 16 x half> } @llvm.vector.deinterleave2.nxv32f16(<vscale x 32 x half>) 151declare <vscale x 32 x half> @llvm.vector.interleave2.nxv32f16(<vscale x 16 x half>, <vscale x 16 x half>) 152 153 154