1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s --mattr=+complxnum,+neon -o - | FileCheck %s 3 4target triple = "aarch64" 5 6; a * b + c 7define <4 x double> @mull_add(<4 x double> %a, <4 x double> %b, <4 x double> %c) { 8; CHECK-LABEL: mull_add: 9; CHECK: // %bb.0: // %entry 10; CHECK-NEXT: fcmla v4.2d, v0.2d, v2.2d, #0 11; CHECK-NEXT: fcmla v5.2d, v1.2d, v3.2d, #0 12; CHECK-NEXT: fcmla v4.2d, v0.2d, v2.2d, #90 13; CHECK-NEXT: fcmla v5.2d, v1.2d, v3.2d, #90 14; CHECK-NEXT: mov v0.16b, v4.16b 15; CHECK-NEXT: mov v1.16b, v5.16b 16; CHECK-NEXT: ret 17entry: 18 %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2> 19 %strided.vec28 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3> 20 %strided.vec30 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2> 21 %strided.vec31 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3> 22 %0 = fmul fast <2 x double> %strided.vec31, %strided.vec 23 %1 = fmul fast <2 x double> %strided.vec30, %strided.vec28 24 %2 = fadd fast <2 x double> %0, %1 25 %3 = fmul fast <2 x double> %strided.vec30, %strided.vec 26 %strided.vec33 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2> 27 %strided.vec34 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3> 28 %4 = fadd fast <2 x double> %strided.vec33, %3 29 %5 = fmul fast <2 x double> %strided.vec31, %strided.vec28 30 %6 = fsub fast <2 x double> %4, %5 31 %7 = fadd fast <2 x double> %2, %strided.vec34 32 %interleaved.vec = shufflevector <2 x double> %6, <2 x double> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 33 ret <4 x double> %interleaved.vec 34} 35 36; a * b + c * d 37define <4 x double> @mul_add_mull(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) { 38; CHECK-LABEL: mul_add_mull: 39; CHECK: // %bb.0: // %entry 40; CHECK-NEXT: movi v16.2d, #0000000000000000 41; CHECK-NEXT: movi v17.2d, #0000000000000000 42; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #0 43; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #0 44; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #0 45; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #0 46; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #90 47; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #90 48; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #90 49; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #90 50; CHECK-NEXT: mov v0.16b, v17.16b 51; CHECK-NEXT: mov v1.16b, v16.16b 52; CHECK-NEXT: ret 53entry: 54 %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2> 55 %strided.vec51 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3> 56 %strided.vec53 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2> 57 %strided.vec54 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3> 58 %0 = fmul fast <2 x double> %strided.vec54, %strided.vec 59 %1 = fmul fast <2 x double> %strided.vec53, %strided.vec51 60 %2 = fmul fast <2 x double> %strided.vec53, %strided.vec 61 %3 = fmul fast <2 x double> %strided.vec54, %strided.vec51 62 %strided.vec56 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2> 63 %strided.vec57 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3> 64 %strided.vec59 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 0, i32 2> 65 %strided.vec60 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 1, i32 3> 66 %4 = fmul fast <2 x double> %strided.vec60, %strided.vec56 67 %5 = fmul fast <2 x double> %strided.vec59, %strided.vec57 68 %6 = fmul fast <2 x double> %strided.vec59, %strided.vec56 69 %7 = fmul fast <2 x double> %strided.vec60, %strided.vec57 70 %8 = fadd fast <2 x double> %7, %3 71 %9 = fadd fast <2 x double> %6, %2 72 %10 = fsub fast <2 x double> %9, %8 73 %11 = fadd fast <2 x double> %0, %1 74 %12 = fadd fast <2 x double> %11, %5 75 %13 = fadd fast <2 x double> %12, %4 76 %interleaved.vec = shufflevector <2 x double> %10, <2 x double> %13, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 77 ret <4 x double> %interleaved.vec 78} 79 80; a * b - c * d 81define <4 x double> @mul_sub_mull(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) { 82; CHECK-LABEL: mul_sub_mull: 83; CHECK: // %bb.0: // %entry 84; CHECK-NEXT: movi v16.2d, #0000000000000000 85; CHECK-NEXT: movi v17.2d, #0000000000000000 86; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #270 87; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #270 88; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #0 89; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #0 90; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #180 91; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #180 92; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #90 93; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #90 94; CHECK-NEXT: mov v0.16b, v17.16b 95; CHECK-NEXT: mov v1.16b, v16.16b 96; CHECK-NEXT: ret 97entry: 98 %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2> 99 %strided.vec53 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3> 100 %strided.vec55 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2> 101 %strided.vec56 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3> 102 %0 = fmul fast <2 x double> %strided.vec56, %strided.vec 103 %1 = fmul fast <2 x double> %strided.vec55, %strided.vec53 104 %2 = fmul fast <2 x double> %strided.vec55, %strided.vec 105 %3 = fmul fast <2 x double> %strided.vec56, %strided.vec53 106 %strided.vec58 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2> 107 %strided.vec59 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3> 108 %strided.vec61 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 0, i32 2> 109 %strided.vec62 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 1, i32 3> 110 %4 = fmul fast <2 x double> %strided.vec62, %strided.vec59 111 %5 = fmul fast <2 x double> %strided.vec61, %strided.vec58 112 %6 = fadd fast <2 x double> %5, %3 113 %7 = fadd fast <2 x double> %4, %2 114 %8 = fsub fast <2 x double> %7, %6 115 %9 = fmul fast <2 x double> %strided.vec61, %strided.vec59 116 %10 = fmul fast <2 x double> %strided.vec62, %strided.vec58 117 %11 = fadd fast <2 x double> %10, %9 118 %12 = fadd fast <2 x double> %0, %1 119 %13 = fsub fast <2 x double> %12, %11 120 %interleaved.vec = shufflevector <2 x double> %8, <2 x double> %13, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 121 ret <4 x double> %interleaved.vec 122} 123 124; a * b + conj(c) * d 125define <4 x double> @mul_conj_mull(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) { 126; CHECK-LABEL: mul_conj_mull: 127; CHECK: // %bb.0: // %entry 128; CHECK-NEXT: movi v16.2d, #0000000000000000 129; CHECK-NEXT: movi v17.2d, #0000000000000000 130; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #0 131; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #0 132; CHECK-NEXT: fcmla v17.2d, v0.2d, v2.2d, #90 133; CHECK-NEXT: fcmla v16.2d, v1.2d, v3.2d, #90 134; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #0 135; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #0 136; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #270 137; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #270 138; CHECK-NEXT: mov v0.16b, v17.16b 139; CHECK-NEXT: mov v1.16b, v16.16b 140; CHECK-NEXT: ret 141entry: 142 %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2> 143 %strided.vec59 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3> 144 %strided.vec61 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2> 145 %strided.vec62 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3> 146 %0 = fmul fast <2 x double> %strided.vec62, %strided.vec 147 %1 = fmul fast <2 x double> %strided.vec61, %strided.vec59 148 %2 = fmul fast <2 x double> %strided.vec61, %strided.vec 149 %strided.vec64 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2> 150 %strided.vec65 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3> 151 %strided.vec67 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 0, i32 2> 152 %strided.vec68 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 1, i32 3> 153 %3 = fmul fast <2 x double> %strided.vec68, %strided.vec64 154 %4 = fmul fast <2 x double> %strided.vec67, %strided.vec64 155 %5 = fmul fast <2 x double> %strided.vec68, %strided.vec65 156 %6 = fmul fast <2 x double> %strided.vec62, %strided.vec59 157 %7 = fsub fast <2 x double> %2, %6 158 %8 = fadd fast <2 x double> %7, %4 159 %9 = fadd fast <2 x double> %8, %5 160 %10 = fadd fast <2 x double> %0, %1 161 %11 = fmul fast <2 x double> %strided.vec67, %strided.vec65 162 %12 = fsub fast <2 x double> %10, %11 163 %13 = fadd fast <2 x double> %12, %3 164 %interleaved.vec = shufflevector <2 x double> %9, <2 x double> %13, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 165 ret <4 x double> %interleaved.vec 166} 167 168; a + b + 1i * c * d 169define <4 x double> @mul_add_rot_mull(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) { 170; CHECK-LABEL: mul_add_rot_mull: 171; CHECK: // %bb.0: // %entry 172; CHECK-NEXT: zip2 v16.2d, v2.2d, v3.2d 173; CHECK-NEXT: zip2 v17.2d, v0.2d, v1.2d 174; CHECK-NEXT: zip1 v2.2d, v2.2d, v3.2d 175; CHECK-NEXT: zip2 v18.2d, v4.2d, v5.2d 176; CHECK-NEXT: zip1 v19.2d, v6.2d, v7.2d 177; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d 178; CHECK-NEXT: zip1 v1.2d, v4.2d, v5.2d 179; CHECK-NEXT: zip2 v5.2d, v6.2d, v7.2d 180; CHECK-NEXT: fmul v3.2d, v16.2d, v17.2d 181; CHECK-NEXT: fmul v4.2d, v2.2d, v17.2d 182; CHECK-NEXT: fmla v3.2d, v18.2d, v19.2d 183; CHECK-NEXT: fmla v4.2d, v0.2d, v16.2d 184; CHECK-NEXT: fmla v3.2d, v1.2d, v5.2d 185; CHECK-NEXT: fmla v4.2d, v1.2d, v19.2d 186; CHECK-NEXT: fneg v3.2d, v3.2d 187; CHECK-NEXT: fmls v4.2d, v18.2d, v5.2d 188; CHECK-NEXT: fmla v3.2d, v0.2d, v2.2d 189; CHECK-NEXT: zip1 v0.2d, v3.2d, v4.2d 190; CHECK-NEXT: zip2 v1.2d, v3.2d, v4.2d 191; CHECK-NEXT: ret 192entry: 193 %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2> 194 %strided.vec79 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3> 195 %strided.vec81 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2> 196 %strided.vec82 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3> 197 %0 = fmul fast <2 x double> %strided.vec82, %strided.vec 198 %1 = fmul fast <2 x double> %strided.vec81, %strided.vec79 199 %2 = fmul fast <2 x double> %strided.vec81, %strided.vec 200 %3 = fmul fast <2 x double> %strided.vec82, %strided.vec79 201 %strided.vec84 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 0, i32 2> 202 %strided.vec85 = shufflevector <4 x double> %c, <4 x double> poison, <2 x i32> <i32 1, i32 3> 203 %strided.vec87 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 0, i32 2> 204 %strided.vec88 = shufflevector <4 x double> %d, <4 x double> poison, <2 x i32> <i32 1, i32 3> 205 %4 = fmul fast <2 x double> %strided.vec87, %strided.vec84 206 %5 = fmul fast <2 x double> %strided.vec87, %strided.vec85 207 %6 = fmul fast <2 x double> %strided.vec88, %strided.vec84 208 %7 = fadd fast <2 x double> %5, %3 209 %8 = fadd fast <2 x double> %7, %6 210 %9 = fsub fast <2 x double> %2, %8 211 %10 = fadd fast <2 x double> %0, %1 212 %11 = fadd fast <2 x double> %10, %4 213 %12 = fmul fast <2 x double> %strided.vec88, %strided.vec85 214 %13 = fsub fast <2 x double> %11, %12 215 %interleaved.vec = shufflevector <2 x double> %9, <2 x double> %13, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 216 ret <4 x double> %interleaved.vec 217} 218