1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s --mattr=+mve.fp -o - | FileCheck %s 3 4target triple = "thumbv8.1m.main-none-none-eabi" 5 6; Expected to not transform 7define arm_aapcs_vfpcc <2 x float> @complex_mul_v2f32(<2 x float> %a, <2 x float> %b) { 8; CHECK-LABEL: complex_mul_v2f32: 9; CHECK: @ %bb.0: @ %entry 10; CHECK-NEXT: vmul.f32 s9, s5, s0 11; CHECK-NEXT: vmul.f32 s8, s1, s5 12; CHECK-NEXT: vfma.f32 s9, s4, s1 13; CHECK-NEXT: vfnms.f32 s8, s4, s0 14; CHECK-NEXT: vmov q0, q2 15; CHECK-NEXT: bx lr 16entry: 17 %a.real = shufflevector <2 x float> %a, <2 x float> poison, <1 x i32> <i32 0> 18 %a.imag = shufflevector <2 x float> %a, <2 x float> poison, <1 x i32> <i32 1> 19 %b.real = shufflevector <2 x float> %b, <2 x float> poison, <1 x i32> <i32 0> 20 %b.imag = shufflevector <2 x float> %b, <2 x float> poison, <1 x i32> <i32 1> 21 %0 = fmul fast <1 x float> %b.imag, %a.real 22 %1 = fmul fast <1 x float> %b.real, %a.imag 23 %2 = fadd fast <1 x float> %1, %0 24 %3 = fmul fast <1 x float> %b.real, %a.real 25 %4 = fmul fast <1 x float> %a.imag, %b.imag 26 %5 = fsub fast <1 x float> %3, %4 27 %interleaved.vec = shufflevector <1 x float> %5, <1 x float> %2, <2 x i32> <i32 0, i32 1> 28 ret <2 x float> %interleaved.vec 29} 30 31; Expected to transform 32define arm_aapcs_vfpcc <4 x float> @complex_mul_v4f32(<4 x float> %a, <4 x float> %b) { 33; CHECK-LABEL: complex_mul_v4f32: 34; CHECK: @ %bb.0: @ %entry 35; CHECK-NEXT: vcmul.f32 q2, q0, q1, #0 36; CHECK-NEXT: vcmla.f32 q2, q0, q1, #90 37; CHECK-NEXT: vmov q0, q2 38; CHECK-NEXT: bx lr 39entry: 40 %a.real = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2> 41 %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3> 42 %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2> 43 %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3> 44 %0 = fmul fast <2 x float> %b.imag, %a.real 45 %1 = fmul fast <2 x float> %b.real, %a.imag 46 %2 = fadd fast <2 x float> %1, %0 47 %3 = fmul fast <2 x float> %b.real, %a.real 48 %4 = fmul fast <2 x float> %a.imag, %b.imag 49 %5 = fsub fast <2 x float> %3, %4 50 %interleaved.vec = shufflevector <2 x float> %5, <2 x float> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 51 ret <4 x float> %interleaved.vec 52} 53 54; Expected to transform 55define arm_aapcs_vfpcc <8 x float> @complex_mul_v8f32(<8 x float> %a, <8 x float> %b) { 56; CHECK-LABEL: complex_mul_v8f32: 57; CHECK: @ %bb.0: @ %entry 58; CHECK-NEXT: .vsave {d8, d9} 59; CHECK-NEXT: vpush {d8, d9} 60; CHECK-NEXT: vcmul.f32 q4, q0, q2, #0 61; CHECK-NEXT: vcmla.f32 q4, q0, q2, #90 62; CHECK-NEXT: vcmul.f32 q2, q1, q3, #0 63; CHECK-NEXT: vcmla.f32 q2, q1, q3, #90 64; CHECK-NEXT: vmov q0, q4 65; CHECK-NEXT: vmov q1, q2 66; CHECK-NEXT: vpop {d8, d9} 67; CHECK-NEXT: bx lr 68entry: 69 %a.real = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 70 %a.imag = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 71 %b.real = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 72 %b.imag = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 73 %0 = fmul fast <4 x float> %b.imag, %a.real 74 %1 = fmul fast <4 x float> %b.real, %a.imag 75 %2 = fadd fast <4 x float> %1, %0 76 %3 = fmul fast <4 x float> %b.real, %a.real 77 %4 = fmul fast <4 x float> %a.imag, %b.imag 78 %5 = fsub fast <4 x float> %3, %4 79 %interleaved.vec = shufflevector <4 x float> %5, <4 x float> %2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 80 ret <8 x float> %interleaved.vec 81} 82 83; Expected to transform 84define arm_aapcs_vfpcc <16 x float> @complex_mul_v16f32(<16 x float> %a, <16 x float> %b) { 85; CHECK-LABEL: complex_mul_v16f32: 86; CHECK: @ %bb.0: @ %entry 87; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} 88; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 89; CHECK-NEXT: add r3, sp, #64 90; CHECK-NEXT: add r2, sp, #80 91; CHECK-NEXT: vldrw.u32 q5, [r3] 92; CHECK-NEXT: add r1, sp, #96 93; CHECK-NEXT: add r0, sp, #112 94; CHECK-NEXT: vcmul.f32 q4, q0, q5, #0 95; CHECK-NEXT: vcmla.f32 q4, q0, q5, #90 96; CHECK-NEXT: vldrw.u32 q0, [r2] 97; CHECK-NEXT: vcmul.f32 q5, q1, q0, #0 98; CHECK-NEXT: vcmla.f32 q5, q1, q0, #90 99; CHECK-NEXT: vldrw.u32 q0, [r1] 100; CHECK-NEXT: vmov q1, q5 101; CHECK-NEXT: vcmul.f32 q6, q2, q0, #0 102; CHECK-NEXT: vcmla.f32 q6, q2, q0, #90 103; CHECK-NEXT: vldrw.u32 q0, [r0] 104; CHECK-NEXT: vmov q2, q6 105; CHECK-NEXT: vcmul.f32 q7, q3, q0, #0 106; CHECK-NEXT: vcmla.f32 q7, q3, q0, #90 107; CHECK-NEXT: vmov q0, q4 108; CHECK-NEXT: vmov q3, q7 109; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 110; CHECK-NEXT: bx lr 111entry: 112 %a.real = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 113 %a.imag = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 114 %b.real = shufflevector <16 x float> %b, <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 115 %b.imag = shufflevector <16 x float> %b, <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 116 %0 = fmul fast <8 x float> %b.imag, %a.real 117 %1 = fmul fast <8 x float> %b.real, %a.imag 118 %2 = fadd fast <8 x float> %1, %0 119 %3 = fmul fast <8 x float> %b.real, %a.real 120 %4 = fmul fast <8 x float> %a.imag, %b.imag 121 %5 = fsub fast <8 x float> %3, %4 122 %interleaved.vec = shufflevector <8 x float> %5, <8 x float> %2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 123 ret <16 x float> %interleaved.vec 124} 125