1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s --mattr=+sve2 -o - | FileCheck %s 3 4target triple = "aarch64" 5 6; Expected to not transform as the type's minimum size is less than 128 bits. 7define <vscale x 4 x i16> @complex_mul_v4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) { 8; CHECK-LABEL: complex_mul_v4i16: 9; CHECK: // %bb.0: // %entry 10; CHECK-NEXT: uunpkhi z2.d, z0.s 11; CHECK-NEXT: uunpklo z0.d, z0.s 12; CHECK-NEXT: uunpkhi z3.d, z1.s 13; CHECK-NEXT: uunpklo z1.d, z1.s 14; CHECK-NEXT: ptrue p0.d 15; CHECK-NEXT: uzp1 z4.d, z0.d, z2.d 16; CHECK-NEXT: uzp2 z0.d, z0.d, z2.d 17; CHECK-NEXT: uzp1 z2.d, z1.d, z3.d 18; CHECK-NEXT: uzp2 z1.d, z1.d, z3.d 19; CHECK-NEXT: mul z5.d, z2.d, z0.d 20; CHECK-NEXT: mul z2.d, z2.d, z4.d 21; CHECK-NEXT: movprfx z3, z5 22; CHECK-NEXT: mla z3.d, p0/m, z1.d, z4.d 23; CHECK-NEXT: msb z0.d, p0/m, z1.d, z2.d 24; CHECK-NEXT: zip2 z1.d, z0.d, z3.d 25; CHECK-NEXT: zip1 z0.d, z0.d, z3.d 26; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 27; CHECK-NEXT: ret 28entry: 29 %a.deinterleaved = tail call { <vscale x 2 x i16>, <vscale x 2 x i16> } @llvm.vector.deinterleave2.nxv4i16(<vscale x 4 x i16> %a) 30 %a.real = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i16> } %a.deinterleaved, 0 31 %a.imag = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i16> } %a.deinterleaved, 1 32 %b.deinterleaved = tail call { <vscale x 2 x i16>, <vscale x 2 x i16> } @llvm.vector.deinterleave2.nxv4i16(<vscale x 4 x i16> %b) 33 %b.real = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i16> } %b.deinterleaved, 0 34 %b.imag = extractvalue { <vscale x 2 x i16>, <vscale x 2 x i16> } %b.deinterleaved, 1 35 %0 = mul <vscale x 2 x i16> %b.imag, %a.real 36 %1 = mul <vscale x 2 x i16> %b.real, %a.imag 37 %2 = add <vscale x 2 x i16> %1, %0 38 %3 = mul <vscale x 2 x i16> %b.real, %a.real 39 %4 = mul <vscale x 2 x i16> %a.imag, %b.imag 40 %5 = sub <vscale x 2 x i16> %3, %4 41 %interleaved.vec = tail call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %5, <vscale x 2 x i16> %2) 42 ret <vscale x 4 x i16> %interleaved.vec 43} 44 45; Expected to transform 46define <vscale x 8 x i16> @complex_mul_v8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 47; CHECK-LABEL: complex_mul_v8i16: 48; CHECK: // %bb.0: // %entry 49; CHECK-NEXT: mov z2.h, #0 // =0x0 50; CHECK-NEXT: cmla z2.h, z1.h, z0.h, #0 51; CHECK-NEXT: cmla z2.h, z1.h, z0.h, #90 52; CHECK-NEXT: mov z0.d, z2.d 53; CHECK-NEXT: ret 54entry: 55 %a.deinterleaved = tail call { <vscale x 4 x i16>, <vscale x 4 x i16> } @llvm.vector.deinterleave2.nxv8i16(<vscale x 8 x i16> %a) 56 %a.real = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16> } %a.deinterleaved, 0 57 %a.imag = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16> } %a.deinterleaved, 1 58 %b.deinterleaved = tail call { <vscale x 4 x i16>, <vscale x 4 x i16> } @llvm.vector.deinterleave2.nxv8i16(<vscale x 8 x i16> %b) 59 %b.real = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16> } %b.deinterleaved, 0 60 %b.imag = extractvalue { <vscale x 4 x i16>, <vscale x 4 x i16> } %b.deinterleaved, 1 61 %0 = mul <vscale x 4 x i16> %b.imag, %a.real 62 %1 = mul <vscale x 4 x i16> %b.real, %a.imag 63 %2 = add <vscale x 4 x i16> %1, %0 64 %3 = mul <vscale x 4 x i16> %b.real, %a.real 65 %4 = mul <vscale x 4 x i16> %a.imag, %b.imag 66 %5 = sub <vscale x 4 x i16> %3, %4 67 %interleaved.vec = tail call <vscale x 8 x i16> @llvm.vector.interleave2.nxv8i16(<vscale x 4 x i16> %5, <vscale x 4 x i16> %2) 68 ret <vscale x 8 x i16> %interleaved.vec 69} 70; Expected to transform 71define <vscale x 16 x i16> @complex_mul_v16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b) { 72; CHECK-LABEL: complex_mul_v16i16: 73; CHECK: // %bb.0: // %entry 74; CHECK-NEXT: mov z4.h, #0 // =0x0 75; CHECK-NEXT: mov z5.d, z4.d 76; CHECK-NEXT: cmla z4.h, z3.h, z1.h, #0 77; CHECK-NEXT: cmla z5.h, z2.h, z0.h, #0 78; CHECK-NEXT: cmla z4.h, z3.h, z1.h, #90 79; CHECK-NEXT: cmla z5.h, z2.h, z0.h, #90 80; CHECK-NEXT: mov z1.d, z4.d 81; CHECK-NEXT: mov z0.d, z5.d 82; CHECK-NEXT: ret 83entry: 84 %a.deinterleaved = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %a) 85 %a.real = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %a.deinterleaved, 0 86 %a.imag = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %a.deinterleaved, 1 87 %b.deinterleaved = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %b) 88 %b.real = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %b.deinterleaved, 0 89 %b.imag = extractvalue { <vscale x 8 x i16>, <vscale x 8 x i16> } %b.deinterleaved, 1 90 %0 = mul <vscale x 8 x i16> %b.imag, %a.real 91 %1 = mul <vscale x 8 x i16> %b.real, %a.imag 92 %2 = add <vscale x 8 x i16> %1, %0 93 %3 = mul <vscale x 8 x i16> %b.real, %a.real 94 %4 = mul <vscale x 8 x i16> %a.imag, %b.imag 95 %5 = sub <vscale x 8 x i16> %3, %4 96 %interleaved.vec = tail call <vscale x 16 x i16> @llvm.vector.interleave2.nxv16i16(<vscale x 8 x i16> %5, <vscale x 8 x i16> %2) 97 ret <vscale x 16 x i16> %interleaved.vec 98} 99 100; Expected to transform 101define <vscale x 32 x i16> @complex_mul_v32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b) { 102; CHECK-LABEL: complex_mul_v32i16: 103; CHECK: // %bb.0: // %entry 104; CHECK-NEXT: mov z24.h, #0 // =0x0 105; CHECK-NEXT: mov z25.d, z24.d 106; CHECK-NEXT: mov z26.d, z24.d 107; CHECK-NEXT: mov z27.d, z24.d 108; CHECK-NEXT: cmla z24.h, z7.h, z3.h, #0 109; CHECK-NEXT: cmla z25.h, z4.h, z0.h, #0 110; CHECK-NEXT: cmla z26.h, z5.h, z1.h, #0 111; CHECK-NEXT: cmla z27.h, z6.h, z2.h, #0 112; CHECK-NEXT: cmla z24.h, z7.h, z3.h, #90 113; CHECK-NEXT: cmla z25.h, z4.h, z0.h, #90 114; CHECK-NEXT: cmla z26.h, z5.h, z1.h, #90 115; CHECK-NEXT: cmla z27.h, z6.h, z2.h, #90 116; CHECK-NEXT: mov z3.d, z24.d 117; CHECK-NEXT: mov z0.d, z25.d 118; CHECK-NEXT: mov z1.d, z26.d 119; CHECK-NEXT: mov z2.d, z27.d 120; CHECK-NEXT: ret 121entry: 122 %a.deinterleaved = tail call { <vscale x 16 x i16>, <vscale x 16 x i16> } @llvm.vector.deinterleave2.nxv32i16(<vscale x 32 x i16> %a) 123 %a.real = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i16> } %a.deinterleaved, 0 124 %a.imag = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i16> } %a.deinterleaved, 1 125 %b.deinterleaved = tail call { <vscale x 16 x i16>, <vscale x 16 x i16> } @llvm.vector.deinterleave2.nxv32i16(<vscale x 32 x i16> %b) 126 %b.real = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i16> } %b.deinterleaved, 0 127 %b.imag = extractvalue { <vscale x 16 x i16>, <vscale x 16 x i16> } %b.deinterleaved, 1 128 %0 = mul <vscale x 16 x i16> %b.imag, %a.real 129 %1 = mul <vscale x 16 x i16> %b.real, %a.imag 130 %2 = add <vscale x 16 x i16> %1, %0 131 %3 = mul <vscale x 16 x i16> %b.real, %a.real 132 %4 = mul <vscale x 16 x i16> %a.imag, %b.imag 133 %5 = sub <vscale x 16 x i16> %3, %4 134 %interleaved.vec = tail call <vscale x 32 x i16> @llvm.vector.interleave2.nxv32i16(<vscale x 16 x i16> %5, <vscale x 16 x i16> %2) 135 ret <vscale x 32 x i16> %interleaved.vec 136} 137 138declare { <vscale x 2 x i16>, <vscale x 2 x i16> } @llvm.vector.deinterleave2.nxv4i16(<vscale x 4 x i16>) 139declare <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16>, <vscale x 2 x i16>) 140 141declare { <vscale x 4 x i16>, <vscale x 4 x i16> } @llvm.vector.deinterleave2.nxv8i16(<vscale x 8 x i16>) 142declare <vscale x 8 x i16> @llvm.vector.interleave2.nxv8i16(<vscale x 4 x i16>, <vscale x 4 x i16>) 143 144declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16>) 145declare <vscale x 16 x i16> @llvm.vector.interleave2.nxv16i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 146 147declare { <vscale x 16 x i16>, <vscale x 16 x i16> } @llvm.vector.deinterleave2.nxv32i16(<vscale x 32 x i16>) 148declare <vscale x 32 x i16> @llvm.vector.interleave2.nxv32i16(<vscale x 16 x i16>, <vscale x 16 x i16>) 149 150 151