1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s 3 4target triple = "aarch64" 5 6; Expected to transform 7define <4 x float> @mul_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 8; CHECK-LABEL: mul_mul: 9; CHECK: // %bb.0: // %entry 10; CHECK-NEXT: movi v4.2d, #0000000000000000 11; CHECK-NEXT: movi v3.2d, #0000000000000000 12; CHECK-NEXT: fcmla v4.4s, v1.4s, v0.4s, #0 13; CHECK-NEXT: fcmla v4.4s, v1.4s, v0.4s, #90 14; CHECK-NEXT: fcmla v3.4s, v2.4s, v4.4s, #0 15; CHECK-NEXT: fcmla v3.4s, v2.4s, v4.4s, #90 16; CHECK-NEXT: mov v0.16b, v3.16b 17; CHECK-NEXT: ret 18entry: 19 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> 20 %strided.vec151 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> 21 %strided.vec153 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> 22 %strided.vec154 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> 23 %0 = fmul fast <2 x float> %strided.vec154, %strided.vec151 24 %1 = fmul fast <2 x float> %strided.vec153, %strided.vec 25 %2 = fmul fast <2 x float> %strided.vec154, %strided.vec 26 %3 = fmul fast <2 x float> %strided.vec153, %strided.vec151 27 %4 = fadd fast <2 x float> %3, %2 28 %5 = fsub fast <2 x float> %1, %0 29 %strided.vec156 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2> 30 %strided.vec157 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3> 31 %6 = fmul fast <2 x float> %4, %strided.vec156 32 %7 = fmul fast <2 x float> %5, %strided.vec157 33 %8 = fadd fast <2 x float> %6, %7 34 %9 = fmul fast <2 x float> %strided.vec156, %5 35 %10 = fmul fast <2 x float> %4, %strided.vec157 36 %11 = fsub fast <2 x float> %9, %10 37 %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 38 ret <4 x float> %interleaved.vec 39} 40 41; Expected to not transform 42define <4 x float> @add_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 43; CHECK-LABEL: add_mul: 44; CHECK: // %bb.0: // %entry 45; CHECK-NEXT: fsub v0.4s, v1.4s, v0.4s 46; CHECK-NEXT: fsub v1.4s, v1.4s, v2.4s 47; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8 48; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8 49; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #8 50; CHECK-NEXT: zip2 v0.2s, v0.2s, v4.2s 51; CHECK-NEXT: zip2 v4.2s, v2.2s, v3.2s 52; CHECK-NEXT: zip1 v1.2s, v1.2s, v5.2s 53; CHECK-NEXT: zip1 v2.2s, v2.2s, v3.2s 54; CHECK-NEXT: fmul v5.2s, v4.2s, v0.2s 55; CHECK-NEXT: fmul v3.2s, v1.2s, v4.2s 56; CHECK-NEXT: fneg v4.2s, v5.2s 57; CHECK-NEXT: fmla v3.2s, v0.2s, v2.2s 58; CHECK-NEXT: fmla v4.2s, v1.2s, v2.2s 59; CHECK-NEXT: zip1 v0.4s, v4.4s, v3.4s 60; CHECK-NEXT: ret 61entry: 62 %0 = fsub fast <4 x float> %b, %c 63 %1 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 0, i32 2> 64 %strided.vec58 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2> 65 %strided.vec59 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3> 66 %2 = fmul fast <2 x float> %1, %strided.vec59 67 %3 = fsub fast <4 x float> %b, %a 68 %4 = shufflevector <4 x float> %3, <4 x float> poison, <2 x i32> <i32 1, i32 3> 69 %5 = fmul fast <2 x float> %strided.vec58, %4 70 %6 = fadd fast <2 x float> %5, %2 71 %7 = fmul fast <2 x float> %strided.vec58, %1 72 %8 = fmul fast <2 x float> %strided.vec59, %4 73 %9 = fsub fast <2 x float> %7, %8 74 %interleaved.vec = shufflevector <2 x float> %9, <2 x float> %6, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 75 ret <4 x float> %interleaved.vec 76} 77 78; Expected to not transform 79define <4 x float> @mul_mul270_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 80; CHECK-LABEL: mul_mul270_mul: 81; CHECK: // %bb.0: // %entry 82; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8 83; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 84; CHECK-NEXT: zip1 v5.2s, v2.2s, v3.2s 85; CHECK-NEXT: zip1 v6.2s, v1.2s, v4.2s 86; CHECK-NEXT: zip2 v2.2s, v2.2s, v3.2s 87; CHECK-NEXT: zip2 v1.2s, v1.2s, v4.2s 88; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 89; CHECK-NEXT: fmul v7.2s, v6.2s, v5.2s 90; CHECK-NEXT: fneg v4.2s, v7.2s 91; CHECK-NEXT: zip2 v7.2s, v0.2s, v3.2s 92; CHECK-NEXT: zip1 v0.2s, v0.2s, v3.2s 93; CHECK-NEXT: fmla v4.2s, v2.2s, v1.2s 94; CHECK-NEXT: fmul v1.2s, v1.2s, v5.2s 95; CHECK-NEXT: fmul v3.2s, v4.2s, v7.2s 96; CHECK-NEXT: fmla v1.2s, v2.2s, v6.2s 97; CHECK-NEXT: fmul v2.2s, v4.2s, v0.2s 98; CHECK-NEXT: fneg v3.2s, v3.2s 99; CHECK-NEXT: fmla v2.2s, v7.2s, v1.2s 100; CHECK-NEXT: fmla v3.2s, v0.2s, v1.2s 101; CHECK-NEXT: zip1 v0.4s, v3.4s, v2.4s 102; CHECK-NEXT: ret 103entry: 104 %strided.vec = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2> 105 %strided.vec81 = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3> 106 %strided.vec83 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> 107 %strided.vec84 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> 108 %0 = fmul fast <2 x float> %strided.vec84, %strided.vec 109 %1 = fmul fast <2 x float> %strided.vec83, %strided.vec81 110 %2 = fadd fast <2 x float> %1, %0 111 %strided.vec86 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> 112 %strided.vec87 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> 113 %3 = fmul fast <2 x float> %2, %strided.vec87 114 %4 = fmul fast <2 x float> %strided.vec84, %strided.vec81 115 %5 = fmul fast <2 x float> %strided.vec83, %strided.vec 116 %6 = fsub fast <2 x float> %4, %5 117 %7 = fmul fast <2 x float> %6, %strided.vec86 118 %8 = fadd fast <2 x float> %3, %7 119 %9 = fmul fast <2 x float> %2, %strided.vec86 120 %10 = fmul fast <2 x float> %6, %strided.vec87 121 %11 = fsub fast <2 x float> %9, %10 122 %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 123 ret <4 x float> %interleaved.vec 124} 125 126; (a * b) * a 127; Expected to transform 128define <4 x float> @mul_triangle(<4 x float> %a, <4 x float> %b) { 129; CHECK-LABEL: mul_triangle: 130; CHECK: // %bb.0: // %entry 131; CHECK-NEXT: movi v3.2d, #0000000000000000 132; CHECK-NEXT: movi v2.2d, #0000000000000000 133; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #0 134; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #90 135; CHECK-NEXT: fcmla v2.4s, v3.4s, v0.4s, #0 136; CHECK-NEXT: fcmla v2.4s, v3.4s, v0.4s, #90 137; CHECK-NEXT: mov v0.16b, v2.16b 138; CHECK-NEXT: ret 139entry: 140 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> 141 %strided.vec35 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> 142 %strided.vec37 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> 143 %strided.vec38 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> 144 %0 = fmul fast <2 x float> %strided.vec37, %strided.vec 145 %1 = fmul fast <2 x float> %strided.vec38, %strided.vec35 146 %2 = fsub fast <2 x float> %0, %1 147 %3 = fmul fast <2 x float> %2, %strided.vec35 148 %4 = fmul fast <2 x float> %strided.vec38, %strided.vec 149 %5 = fmul fast <2 x float> %strided.vec35, %strided.vec37 150 %6 = fadd fast <2 x float> %4, %5 151 %7 = fmul fast <2 x float> %6, %strided.vec 152 %8 = fadd fast <2 x float> %3, %7 153 %9 = fmul fast <2 x float> %2, %strided.vec 154 %10 = fmul fast <2 x float> %6, %strided.vec35 155 %11 = fsub fast <2 x float> %9, %10 156 %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 157 ret <4 x float> %interleaved.vec 158} 159 160 161; d * (b * a) * (c * a) 162; Expected to transform 163define <4 x float> @mul_diamond(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) { 164; CHECK-LABEL: mul_diamond: 165; CHECK: // %bb.0: // %entry 166; CHECK-NEXT: movi v4.2d, #0000000000000000 167; CHECK-NEXT: movi v5.2d, #0000000000000000 168; CHECK-NEXT: movi v6.2d, #0000000000000000 169; CHECK-NEXT: fcmla v4.4s, v0.4s, v1.4s, #0 170; CHECK-NEXT: fcmla v6.4s, v0.4s, v2.4s, #0 171; CHECK-NEXT: fcmla v4.4s, v0.4s, v1.4s, #90 172; CHECK-NEXT: movi v1.2d, #0000000000000000 173; CHECK-NEXT: fcmla v6.4s, v0.4s, v2.4s, #90 174; CHECK-NEXT: fcmla v5.4s, v3.4s, v4.4s, #0 175; CHECK-NEXT: fcmla v5.4s, v3.4s, v4.4s, #90 176; CHECK-NEXT: fcmla v1.4s, v5.4s, v6.4s, #0 177; CHECK-NEXT: fcmla v1.4s, v5.4s, v6.4s, #90 178; CHECK-NEXT: mov v0.16b, v1.16b 179; CHECK-NEXT: ret 180entry: 181 %a.real = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> 182 %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> 183 %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> 184 %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> 185 %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2> 186 %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3> 187 %d.real = shufflevector <4 x float> %d, <4 x float> poison, <2 x i32> <i32 0, i32 2> 188 %d.imag = shufflevector <4 x float> %d, <4 x float> poison, <2 x i32> <i32 1, i32 3> 189 %0 = fmul fast <2 x float> %a.imag, %b.real 190 %1 = fmul fast <2 x float> %a.real, %b.imag 191 %2 = fadd fast <2 x float> %1, %0 192 %3 = fmul fast <2 x float> %a.real, %b.real 193 %4 = fmul fast <2 x float> %b.imag, %a.imag 194 %5 = fsub fast <2 x float> %3, %4 195 %6 = fmul fast <2 x float> %d.real, %5 196 %7 = fmul fast <2 x float> %2, %d.imag 197 %8 = fmul fast <2 x float> %d.real, %2 198 %9 = fmul fast <2 x float> %5, %d.imag 199 %10 = fsub fast <2 x float> %6, %7 200 %11 = fadd fast <2 x float> %8, %9 201 %12 = fmul fast <2 x float> %c.real, %a.imag 202 %13 = fmul fast <2 x float> %c.imag, %a.real 203 %14 = fadd fast <2 x float> %13, %12 204 %15 = fmul fast <2 x float> %14, %10 205 %16 = fmul fast <2 x float> %c.real, %a.real 206 %17 = fmul fast <2 x float> %c.imag, %a.imag 207 %18 = fsub fast <2 x float> %16, %17 208 %19 = fmul fast <2 x float> %18, %11 209 %20 = fadd fast <2 x float> %15, %19 210 %21 = fmul fast <2 x float> %18, %10 211 %22 = fmul fast <2 x float> %14, %11 212 %23 = fsub fast <2 x float> %21, %22 213 %interleaved.vec = shufflevector <2 x float> %23, <2 x float> %20, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 214 ret <4 x float> %interleaved.vec 215} 216 217; Expected to transform 218define <4 x float> @mul_add90_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 219; CHECK-LABEL: mul_add90_mul: 220; CHECK: // %bb.0: // %entry 221; CHECK-NEXT: movi v3.2d, #0000000000000000 222; CHECK-NEXT: movi v4.2d, #0000000000000000 223; CHECK-NEXT: fcmla v4.4s, v0.4s, v2.4s, #0 224; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #0 225; CHECK-NEXT: fcmla v4.4s, v0.4s, v2.4s, #90 226; CHECK-NEXT: fcmla v3.4s, v0.4s, v1.4s, #90 227; CHECK-NEXT: fcadd v0.4s, v4.4s, v3.4s, #90 228; CHECK-NEXT: ret 229entry: 230 %ar = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> 231 %ai = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> 232 %br = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> 233 %bi = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> 234 %cr = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2> 235 %ci = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3> 236 237 %i6 = fmul fast <2 x float> %br, %ar 238 %i7 = fmul fast <2 x float> %bi, %ai 239 %xr = fsub fast <2 x float> %i6, %i7 240 %i9 = fmul fast <2 x float> %bi, %ar 241 %i10 = fmul fast <2 x float> %br, %ai 242 %xi = fadd fast <2 x float> %i9, %i10 243 244 %j6 = fmul fast <2 x float> %cr, %ar 245 %j7 = fmul fast <2 x float> %ci, %ai 246 %yr = fsub fast <2 x float> %j6, %j7 247 %j9 = fmul fast <2 x float> %ci, %ar 248 %j10 = fmul fast <2 x float> %cr, %ai 249 %yi = fadd fast <2 x float> %j9, %j10 250 251 %zr = fsub fast <2 x float> %yr, %xi 252 %zi = fadd fast <2 x float> %yi, %xr 253 %interleaved.vec = shufflevector <2 x float> %zr, <2 x float> %zi, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 254 ret <4 x float> %interleaved.vec 255} 256 257; Expected to not transform 258define <4 x float> @mul_triangle_addmul(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 259; CHECK-LABEL: mul_triangle_addmul: 260; CHECK: // %bb.0: // %entry 261; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 262; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 263; CHECK-NEXT: zip1 v5.2s, v0.2s, v3.2s 264; CHECK-NEXT: zip1 v6.2s, v1.2s, v4.2s 265; CHECK-NEXT: zip2 v1.2s, v1.2s, v4.2s 266; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8 267; CHECK-NEXT: zip2 v0.2s, v0.2s, v3.2s 268; CHECK-NEXT: fmul v7.2s, v6.2s, v5.2s 269; CHECK-NEXT: fmul v5.2s, v1.2s, v5.2s 270; CHECK-NEXT: zip1 v3.2s, v2.2s, v4.2s 271; CHECK-NEXT: zip2 v2.2s, v2.2s, v4.2s 272; CHECK-NEXT: fmov d4, d7 273; CHECK-NEXT: fmov d16, d5 274; CHECK-NEXT: fmls v7.2s, v0.2s, v2.2s 275; CHECK-NEXT: fmla v5.2s, v0.2s, v3.2s 276; CHECK-NEXT: fmls v4.2s, v0.2s, v1.2s 277; CHECK-NEXT: fmla v16.2s, v0.2s, v6.2s 278; CHECK-NEXT: fsub v0.2s, v7.2s, v16.2s 279; CHECK-NEXT: fadd v1.2s, v5.2s, v4.2s 280; CHECK-NEXT: zip1 v0.4s, v0.4s, v1.4s 281; CHECK-NEXT: ret 282entry: 283 %ar = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> 284 %ai = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> 285 %br = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> 286 %bi = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> 287 %cr = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2> 288 %ci = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3> 289 290 %i6 = fmul fast <2 x float> %br, %ar 291 %i7 = fmul fast <2 x float> %bi, %ai 292 %xr = fsub fast <2 x float> %i6, %i7 293 %i9 = fmul fast <2 x float> %bi, %ar 294 %i10 = fmul fast <2 x float> %br, %ai 295 %xi = fadd fast <2 x float> %i9, %i10 296 297 ;%j6 = fmul fast <2 x float> %cr, %ar 298 %j7 = fmul fast <2 x float> %ci, %ai 299 %yr = fsub fast <2 x float> %i6, %j7 300 ;%j9 = fmul fast <2 x float> %ci, %ar 301 %j10 = fmul fast <2 x float> %cr, %ai 302 %yi = fadd fast <2 x float> %i9, %j10 303 304 %zr = fsub fast <2 x float> %yr, %xi 305 %zi = fadd fast <2 x float> %yi, %xr 306 %interleaved.vec = shufflevector <2 x float> %zr, <2 x float> %zi, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 307 ret <4 x float> %interleaved.vec 308} 309 310; Expected to not transform 311define <4 x float> @mul_triangle_multiuses(<4 x float> %a, <4 x float> %b, ptr %p) { 312; CHECK-LABEL: mul_triangle_multiuses: 313; CHECK: // %bb.0: // %entry 314; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 315; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 316; CHECK-NEXT: zip2 v4.2s, v0.2s, v2.2s 317; CHECK-NEXT: zip1 v5.2s, v1.2s, v3.2s 318; CHECK-NEXT: zip1 v0.2s, v0.2s, v2.2s 319; CHECK-NEXT: zip2 v1.2s, v1.2s, v3.2s 320; CHECK-NEXT: fmul v2.2s, v4.2s, v5.2s 321; CHECK-NEXT: fmul v3.2s, v1.2s, v4.2s 322; CHECK-NEXT: fmla v2.2s, v0.2s, v1.2s 323; CHECK-NEXT: fneg v1.2s, v3.2s 324; CHECK-NEXT: fmul v3.2s, v2.2s, v4.2s 325; CHECK-NEXT: fmla v1.2s, v0.2s, v5.2s 326; CHECK-NEXT: fmul v5.2s, v2.2s, v0.2s 327; CHECK-NEXT: fneg v3.2s, v3.2s 328; CHECK-NEXT: fmla v5.2s, v4.2s, v1.2s 329; CHECK-NEXT: fmla v3.2s, v0.2s, v1.2s 330; CHECK-NEXT: mov v1.d[1], v2.d[0] 331; CHECK-NEXT: zip1 v0.4s, v3.4s, v5.4s 332; CHECK-NEXT: str q1, [x0] 333; CHECK-NEXT: ret 334entry: 335 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> 336 %strided.vec35 = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> 337 %strided.vec37 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> 338 %strided.vec38 = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> 339 %0 = fmul fast <2 x float> %strided.vec37, %strided.vec 340 %1 = fmul fast <2 x float> %strided.vec38, %strided.vec35 341 %2 = fsub fast <2 x float> %0, %1 342 %3 = fmul fast <2 x float> %2, %strided.vec35 343 %4 = fmul fast <2 x float> %strided.vec38, %strided.vec 344 %5 = fmul fast <2 x float> %strided.vec35, %strided.vec37 345 %6 = fadd fast <2 x float> %4, %5 346 %otheruse = shufflevector <2 x float> %2, <2 x float> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 347 store <4 x float> %otheruse, ptr %p 348 %7 = fmul fast <2 x float> %6, %strided.vec 349 %8 = fadd fast <2 x float> %3, %7 350 %9 = fmul fast <2 x float> %2, %strided.vec 351 %10 = fmul fast <2 x float> %6, %strided.vec35 352 %11 = fsub fast <2 x float> %9, %10 353 %interleaved.vec = shufflevector <2 x float> %11, <2 x float> %8, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 354 ret <4 x float> %interleaved.vec 355} 356 357; Expected to transform 358define <4 x float> @mul_addequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 359; CHECK-LABEL: mul_addequal: 360; CHECK: // %bb.0: // %entry 361; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #0 362; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #90 363; CHECK-NEXT: mov v0.16b, v2.16b 364; CHECK-NEXT: ret 365entry: 366 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> 367 %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> 368 %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> 369 %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> 370 %0 = fmul fast <2 x float> %b.imag, %strided.vec 371 %1 = fmul fast <2 x float> %b.real, %a.imag 372 %2 = fadd fast <2 x float> %1, %0 373 %3 = fmul fast <2 x float> %b.real, %strided.vec 374 %4 = fmul fast <2 x float> %a.imag, %b.imag 375 %5 = fsub fast <2 x float> %3, %4 376 %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2> 377 %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3> 378 %6 = fadd fast <2 x float> %5, %c.real 379 %7 = fadd fast <2 x float> %2, %c.imag 380 %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 381 ret <4 x float> %interleaved.vec 382} 383 384; Expected to transform 385define <4 x float> @mul_subequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 386; CHECK-LABEL: mul_subequal: 387; CHECK: // %bb.0: // %entry 388; CHECK-NEXT: movi v3.2d, #0000000000000000 389; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #0 390; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #90 391; CHECK-NEXT: fsub v0.4s, v3.4s, v2.4s 392; CHECK-NEXT: ret 393entry: 394 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> 395 %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> 396 %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> 397 %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> 398 %0 = fmul fast <2 x float> %b.imag, %strided.vec 399 %1 = fmul fast <2 x float> %b.real, %a.imag 400 %2 = fadd fast <2 x float> %1, %0 401 %3 = fmul fast <2 x float> %b.real, %strided.vec 402 %4 = fmul fast <2 x float> %a.imag, %b.imag 403 %5 = fsub fast <2 x float> %3, %4 404 %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2> 405 %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3> 406 %6 = fsub fast <2 x float> %5, %c.real 407 %7 = fsub fast <2 x float> %2, %c.imag 408 %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 409 ret <4 x float> %interleaved.vec 410} 411 412 413; Expected to transform 414define <4 x float> @mul_mulequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 415; CHECK-LABEL: mul_mulequal: 416; CHECK: // %bb.0: // %entry 417; CHECK-NEXT: movi v3.2d, #0000000000000000 418; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #0 419; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #90 420; CHECK-NEXT: fmul v0.4s, v3.4s, v2.4s 421; CHECK-NEXT: ret 422entry: 423 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> 424 %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> 425 %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> 426 %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> 427 %0 = fmul fast <2 x float> %b.imag, %strided.vec 428 %1 = fmul fast <2 x float> %b.real, %a.imag 429 %2 = fadd fast <2 x float> %1, %0 430 %3 = fmul fast <2 x float> %b.real, %strided.vec 431 %4 = fmul fast <2 x float> %a.imag, %b.imag 432 %5 = fsub fast <2 x float> %3, %4 433 %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2> 434 %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3> 435 %6 = fmul fast <2 x float> %5, %c.real 436 %7 = fmul fast <2 x float> %2, %c.imag 437 %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 438 ret <4 x float> %interleaved.vec 439} 440 441; Expected to not transform 442define <4 x float> @mul_divequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 443; CHECK-LABEL: mul_divequal: 444; CHECK: // %bb.0: // %entry 445; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 446; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 447; CHECK-NEXT: zip2 v5.2s, v0.2s, v3.2s 448; CHECK-NEXT: zip2 v6.2s, v1.2s, v4.2s 449; CHECK-NEXT: zip1 v0.2s, v0.2s, v3.2s 450; CHECK-NEXT: zip1 v1.2s, v1.2s, v4.2s 451; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8 452; CHECK-NEXT: fmul v7.2s, v5.2s, v6.2s 453; CHECK-NEXT: fneg v4.2s, v7.2s 454; CHECK-NEXT: zip1 v7.2s, v2.2s, v3.2s 455; CHECK-NEXT: zip2 v2.2s, v2.2s, v3.2s 456; CHECK-NEXT: fmla v4.2s, v0.2s, v1.2s 457; CHECK-NEXT: fmul v0.2s, v6.2s, v0.2s 458; CHECK-NEXT: fmla v0.2s, v5.2s, v1.2s 459; CHECK-NEXT: fdiv v4.2s, v4.2s, v7.2s 460; CHECK-NEXT: fdiv v0.2s, v0.2s, v2.2s 461; CHECK-NEXT: zip1 v0.4s, v4.4s, v0.4s 462; CHECK-NEXT: ret 463entry: 464 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> 465 %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> 466 %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> 467 %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> 468 %0 = fmul fast <2 x float> %b.imag, %strided.vec 469 %1 = fmul fast <2 x float> %b.real, %a.imag 470 %2 = fadd fast <2 x float> %1, %0 471 %3 = fmul fast <2 x float> %b.real, %strided.vec 472 %4 = fmul fast <2 x float> %a.imag, %b.imag 473 %5 = fsub fast <2 x float> %3, %4 474 %c.real = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 0, i32 2> 475 %c.imag = shufflevector <4 x float> %c, <4 x float> poison, <2 x i32> <i32 1, i32 3> 476 %6 = fdiv fast <2 x float> %5, %c.real 477 %7 = fdiv fast <2 x float> %2, %c.imag 478 %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 479 ret <4 x float> %interleaved.vec 480} 481 482; Expected to transform 483define <4 x float> @mul_negequal(<4 x float> %a, <4 x float> %b) { 484; CHECK-LABEL: mul_negequal: 485; CHECK: // %bb.0: // %entry 486; CHECK-NEXT: movi v2.2d, #0000000000000000 487; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #180 488; CHECK-NEXT: fcmla v2.4s, v1.4s, v0.4s, #270 489; CHECK-NEXT: mov v0.16b, v2.16b 490; CHECK-NEXT: ret 491entry: 492 %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> 493 %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> 494 %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> 495 %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> 496 %0 = fmul fast <2 x float> %b.imag, %strided.vec 497 %1 = fmul fast <2 x float> %b.real, %a.imag 498 %2 = fadd fast <2 x float> %1, %0 499 %3 = fmul fast <2 x float> %b.real, %strided.vec 500 %4 = fmul fast <2 x float> %a.imag, %b.imag 501 %5 = fsub fast <2 x float> %3, %4 502 %6 = fneg fast <2 x float> %5 503 %7 = fneg fast <2 x float> %2 504 %interleaved.vec = shufflevector <2 x float> %6, <2 x float> %7, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 505 ret <4 x float> %interleaved.vec 506} 507