1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s --mattr=+sve -o - | FileCheck %s 3 4target triple = "aarch64-unknown-linux-gnu" 5 6; a * b + c 7define <vscale x 4 x double> @mull_add(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c) { 8; CHECK-LABEL: mull_add: 9; CHECK: // %bb.0: // %entry 10; CHECK-NEXT: uzp2 z6.d, z0.d, z1.d 11; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d 12; CHECK-NEXT: uzp2 z1.d, z2.d, z3.d 13; CHECK-NEXT: uzp1 z2.d, z2.d, z3.d 14; CHECK-NEXT: ptrue p0.d 15; CHECK-NEXT: fmul z7.d, z0.d, z1.d 16; CHECK-NEXT: fmul z1.d, z6.d, z1.d 17; CHECK-NEXT: movprfx z3, z7 18; CHECK-NEXT: fmla z3.d, p0/m, z6.d, z2.d 19; CHECK-NEXT: fnmsb z0.d, p0/m, z2.d, z1.d 20; CHECK-NEXT: uzp2 z1.d, z4.d, z5.d 21; CHECK-NEXT: uzp1 z2.d, z4.d, z5.d 22; CHECK-NEXT: fadd z2.d, z2.d, z0.d 23; CHECK-NEXT: fadd z1.d, z3.d, z1.d 24; CHECK-NEXT: zip1 z0.d, z2.d, z1.d 25; CHECK-NEXT: zip2 z1.d, z2.d, z1.d 26; CHECK-NEXT: ret 27entry: 28 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a) 29 %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0 30 %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1 31 %strided.vec29 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b) 32 %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec29, 0 33 %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec29, 1 34 %4 = fmul contract <vscale x 2 x double> %0, %3 35 %5 = fmul contract <vscale x 2 x double> %1, %2 36 %6 = fadd contract <vscale x 2 x double> %5, %4 37 %7 = fmul contract <vscale x 2 x double> %0, %2 38 %8 = fmul contract <vscale x 2 x double> %1, %3 39 %9 = fsub contract <vscale x 2 x double> %7, %8 40 %strided.vec31 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c) 41 %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec31, 0 42 %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec31, 1 43 %12 = fadd contract <vscale x 2 x double> %10, %9 44 %13 = fadd contract <vscale x 2 x double> %6, %11 45 %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %12, <vscale x 2 x double> %13) 46 ret <vscale x 4 x double> %interleaved.vec 47} 48 49; a * b + c * d 50define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) { 51; CHECK-LABEL: mul_add_mull: 52; CHECK: // %bb.0: // %entry 53; CHECK-NEXT: mov z24.d, #0 // =0x0 54; CHECK-NEXT: ptrue p0.d 55; CHECK-NEXT: mov z25.d, z24.d 56; CHECK-NEXT: mov z26.d, z24.d 57; CHECK-NEXT: mov z27.d, z24.d 58; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0 59; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0 60; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0 61; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #0 62; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90 63; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90 64; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90 65; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #90 66; CHECK-NEXT: fadd z1.d, z26.d, z24.d 67; CHECK-NEXT: fadd z0.d, z25.d, z27.d 68; CHECK-NEXT: ret 69entry: 70 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a) 71 %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0 72 %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1 73 %strided.vec52 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b) 74 %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec52, 0 75 %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec52, 1 76 %4 = fmul contract <vscale x 2 x double> %0, %3 77 %5 = fmul contract <vscale x 2 x double> %1, %2 78 %6 = fadd contract <vscale x 2 x double> %5, %4 79 %7 = fmul contract <vscale x 2 x double> %0, %2 80 %8 = fmul contract <vscale x 2 x double> %1, %3 81 %9 = fsub contract <vscale x 2 x double> %7, %8 82 %strided.vec54 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c) 83 %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec54, 0 84 %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec54, 1 85 %strided.vec56 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %d) 86 %12 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec56, 0 87 %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec56, 1 88 %14 = fmul contract <vscale x 2 x double> %10, %13 89 %15 = fmul contract <vscale x 2 x double> %11, %12 90 %16 = fadd contract <vscale x 2 x double> %15, %14 91 %17 = fmul contract <vscale x 2 x double> %10, %12 92 %18 = fmul contract <vscale x 2 x double> %11, %13 93 %19 = fsub contract <vscale x 2 x double> %17, %18 94 %20 = fadd contract <vscale x 2 x double> %9, %19 95 %21 = fadd contract <vscale x 2 x double> %6, %16 96 %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %20, <vscale x 2 x double> %21) 97 ret <vscale x 4 x double> %interleaved.vec 98} 99 100; a * b - c * d 101define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) { 102; CHECK-LABEL: mul_sub_mull: 103; CHECK: // %bb.0: // %entry 104; CHECK-NEXT: mov z24.d, #0 // =0x0 105; CHECK-NEXT: ptrue p0.d 106; CHECK-NEXT: mov z25.d, z24.d 107; CHECK-NEXT: mov z26.d, z24.d 108; CHECK-NEXT: mov z27.d, z24.d 109; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0 110; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0 111; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0 112; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #0 113; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90 114; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90 115; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90 116; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #90 117; CHECK-NEXT: fsub z1.d, z26.d, z24.d 118; CHECK-NEXT: fsub z0.d, z25.d, z27.d 119; CHECK-NEXT: ret 120entry: 121 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a) 122 %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0 123 %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1 124 %strided.vec52 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b) 125 %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec52, 0 126 %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec52, 1 127 %4 = fmul contract <vscale x 2 x double> %0, %3 128 %5 = fmul contract <vscale x 2 x double> %1, %2 129 %6 = fadd contract <vscale x 2 x double> %5, %4 130 %7 = fmul contract <vscale x 2 x double> %0, %2 131 %8 = fmul contract <vscale x 2 x double> %1, %3 132 %9 = fsub contract <vscale x 2 x double> %7, %8 133 %strided.vec54 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c) 134 %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec54, 0 135 %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec54, 1 136 %strided.vec56 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %d) 137 %12 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec56, 0 138 %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec56, 1 139 %14 = fmul contract <vscale x 2 x double> %10, %13 140 %15 = fmul contract <vscale x 2 x double> %11, %12 141 %16 = fadd contract <vscale x 2 x double> %15, %14 142 %17 = fmul contract <vscale x 2 x double> %10, %12 143 %18 = fmul contract <vscale x 2 x double> %11, %13 144 %19 = fsub contract <vscale x 2 x double> %17, %18 145 %20 = fsub contract <vscale x 2 x double> %9, %19 146 %21 = fsub contract <vscale x 2 x double> %6, %16 147 %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %20, <vscale x 2 x double> %21) 148 ret <vscale x 4 x double> %interleaved.vec 149} 150 151; a * b + conj(c) * d 152define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) { 153; CHECK-LABEL: mul_conj_mull: 154; CHECK: // %bb.0: // %entry 155; CHECK-NEXT: mov z24.d, #0 // =0x0 156; CHECK-NEXT: ptrue p0.d 157; CHECK-NEXT: mov z25.d, z24.d 158; CHECK-NEXT: mov z26.d, z24.d 159; CHECK-NEXT: mov z27.d, z24.d 160; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0 161; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0 162; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0 163; CHECK-NEXT: fcmla z27.d, p0/m, z4.d, z6.d, #0 164; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270 165; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90 166; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90 167; CHECK-NEXT: fcmla z27.d, p0/m, z4.d, z6.d, #270 168; CHECK-NEXT: fadd z1.d, z26.d, z24.d 169; CHECK-NEXT: fadd z0.d, z25.d, z27.d 170; CHECK-NEXT: ret 171entry: 172 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a) 173 %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0 174 %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1 175 %strided.vec60 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b) 176 %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec60, 0 177 %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec60, 1 178 %4 = fmul contract <vscale x 2 x double> %0, %3 179 %5 = fmul contract <vscale x 2 x double> %1, %2 180 %6 = fadd contract <vscale x 2 x double> %5, %4 181 %7 = fmul contract <vscale x 2 x double> %0, %2 182 %8 = fmul contract <vscale x 2 x double> %1, %3 183 %9 = fsub contract <vscale x 2 x double> %7, %8 184 %strided.vec62 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c) 185 %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec62, 0 186 %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec62, 1 187 %strided.vec64 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %d) 188 %12 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec64, 0 189 %13 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec64, 1 190 %14 = fmul contract <vscale x 2 x double> %10, %13 191 %15 = fmul contract <vscale x 2 x double> %11, %12 192 %16 = fsub contract <vscale x 2 x double> %14, %15 193 %17 = fmul contract <vscale x 2 x double> %10, %12 194 %18 = fmul contract <vscale x 2 x double> %11, %13 195 %19 = fadd contract <vscale x 2 x double> %17, %18 196 %20 = fadd contract <vscale x 2 x double> %9, %19 197 %21 = fadd contract <vscale x 2 x double> %6, %16 198 %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %20, <vscale x 2 x double> %21) 199 ret <vscale x 4 x double> %interleaved.vec 200} 201 202; a + b + 1i * c * d 203define <vscale x 4 x double> @mul_add_rot_mull(<vscale x 4 x double> %a, <vscale x 4 x double> %b, <vscale x 4 x double> %c, <vscale x 4 x double> %d) { 204; CHECK-LABEL: mul_add_rot_mull: 205; CHECK: // %bb.0: // %entry 206; CHECK-NEXT: uzp2 z24.d, z4.d, z5.d 207; CHECK-NEXT: mov z25.d, #0 // =0x0 208; CHECK-NEXT: uzp1 z4.d, z4.d, z5.d 209; CHECK-NEXT: ptrue p0.d 210; CHECK-NEXT: mov z26.d, z24.d 211; CHECK-NEXT: and z25.d, z25.d, #0x7fffffffffffffff 212; CHECK-NEXT: and z26.d, z26.d, #0x8000000000000000 213; CHECK-NEXT: orr z5.d, z25.d, z26.d 214; CHECK-NEXT: fadd z5.d, z4.d, z5.d 215; CHECK-NEXT: and z4.d, z4.d, #0x8000000000000000 216; CHECK-NEXT: orr z4.d, z25.d, z4.d 217; CHECK-NEXT: uzp2 z25.d, z0.d, z1.d 218; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d 219; CHECK-NEXT: uzp2 z1.d, z2.d, z3.d 220; CHECK-NEXT: uzp1 z2.d, z2.d, z3.d 221; CHECK-NEXT: fsub z4.d, z4.d, z24.d 222; CHECK-NEXT: uzp2 z24.d, z6.d, z7.d 223; CHECK-NEXT: uzp1 z6.d, z6.d, z7.d 224; CHECK-NEXT: fmul z26.d, z0.d, z1.d 225; CHECK-NEXT: fmul z1.d, z25.d, z1.d 226; CHECK-NEXT: fmul z3.d, z4.d, z24.d 227; CHECK-NEXT: fmul z24.d, z5.d, z24.d 228; CHECK-NEXT: movprfx z7, z26 229; CHECK-NEXT: fmla z7.d, p0/m, z25.d, z2.d 230; CHECK-NEXT: fnmsb z0.d, p0/m, z2.d, z1.d 231; CHECK-NEXT: movprfx z1, z3 232; CHECK-NEXT: fmla z1.d, p0/m, z6.d, z5.d 233; CHECK-NEXT: movprfx z2, z24 234; CHECK-NEXT: fnmls z2.d, p0/m, z4.d, z6.d 235; CHECK-NEXT: fadd z2.d, z0.d, z2.d 236; CHECK-NEXT: fadd z1.d, z7.d, z1.d 237; CHECK-NEXT: zip1 z0.d, z2.d, z1.d 238; CHECK-NEXT: zip2 z1.d, z2.d, z1.d 239; CHECK-NEXT: ret 240entry: 241 %strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a) 242 %0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 0 243 %1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec, 1 244 %strided.vec78 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %b) 245 %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec78, 0 246 %3 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec78, 1 247 %4 = fmul contract <vscale x 2 x double> %0, %3 248 %5 = fmul contract <vscale x 2 x double> %1, %2 249 %6 = fadd contract <vscale x 2 x double> %5, %4 250 %7 = fmul contract <vscale x 2 x double> %0, %2 251 %8 = fmul contract <vscale x 2 x double> %1, %3 252 %9 = fsub contract <vscale x 2 x double> %7, %8 253 %strided.vec80 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %c) 254 %10 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec80, 0 255 %11 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec80, 1 256 %12 = tail call contract <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %11) 257 %13 = fadd contract <vscale x 2 x double> %10, %12 258 %14 = tail call contract <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %10) 259 %15 = fsub contract <vscale x 2 x double> %14, %11 260 %strided.vec82 = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %d) 261 %16 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec82, 0 262 %17 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %strided.vec82, 1 263 %18 = fmul contract <vscale x 2 x double> %15, %17 264 %19 = fmul contract <vscale x 2 x double> %16, %13 265 %20 = fadd contract <vscale x 2 x double> %19, %18 266 %21 = fmul contract <vscale x 2 x double> %15, %16 267 %22 = fmul contract <vscale x 2 x double> %13, %17 268 %23 = fsub contract <vscale x 2 x double> %21, %22 269 %24 = fadd contract <vscale x 2 x double> %9, %23 270 %25 = fadd contract <vscale x 2 x double> %6, %20 271 %interleaved.vec = tail call <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %24, <vscale x 2 x double> %25) 272 ret <vscale x 4 x double> %interleaved.vec 273} 274 275declare { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double>) 276declare <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double>, <vscale x 2 x double>) 277declare <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 278