1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve,-vfp2 -o - %s | FileCheck %s --check-prefix=CHECK-NOFP 3; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -o - %s | FileCheck --check-prefix=CHECK-FP %s 4 5; This file tests tests that we expand floating point operations correctly, 6; even if we do not have an fpu. 7 8define arm_aapcs_vfpcc <8 x half> @vector_add_f16(<8 x half> %lhs, <8 x half> %rhs) { 9; CHECK-NOFP-LABEL: vector_add_f16: 10; CHECK-NOFP: @ %bb.0: @ %entry 11; CHECK-NOFP-NEXT: .save {r4, lr} 12; CHECK-NOFP-NEXT: push {r4, lr} 13; CHECK-NOFP-NEXT: .vsave {d8, d9, d10, d11, d12, d13} 14; CHECK-NOFP-NEXT: vpush {d8, d9, d10, d11, d12, d13} 15; CHECK-NOFP-NEXT: vmov.u16 r0, q1[0] 16; CHECK-NOFP-NEXT: vmov q5, q1 17; CHECK-NOFP-NEXT: vmov q4, q0 18; CHECK-NOFP-NEXT: bl __aeabi_h2f 19; CHECK-NOFP-NEXT: mov r4, r0 20; CHECK-NOFP-NEXT: vmov.u16 r0, q4[0] 21; CHECK-NOFP-NEXT: bl __aeabi_h2f 22; CHECK-NOFP-NEXT: mov r1, r4 23; CHECK-NOFP-NEXT: bl __aeabi_fadd 24; CHECK-NOFP-NEXT: bl __aeabi_f2h 25; CHECK-NOFP-NEXT: vmov.16 q6[0], r0 26; CHECK-NOFP-NEXT: vmov.u16 r0, q5[1] 27; CHECK-NOFP-NEXT: bl __aeabi_h2f 28; CHECK-NOFP-NEXT: mov r4, r0 29; CHECK-NOFP-NEXT: vmov.u16 r0, q4[1] 30; CHECK-NOFP-NEXT: bl __aeabi_h2f 31; CHECK-NOFP-NEXT: mov r1, r4 32; CHECK-NOFP-NEXT: bl __aeabi_fadd 33; CHECK-NOFP-NEXT: bl __aeabi_f2h 34; CHECK-NOFP-NEXT: vmov.16 q6[1], r0 35; CHECK-NOFP-NEXT: vmov.u16 r0, q5[2] 36; CHECK-NOFP-NEXT: bl __aeabi_h2f 37; CHECK-NOFP-NEXT: mov r4, r0 38; CHECK-NOFP-NEXT: vmov.u16 r0, q4[2] 39; CHECK-NOFP-NEXT: bl __aeabi_h2f 40; CHECK-NOFP-NEXT: mov r1, r4 41; CHECK-NOFP-NEXT: bl __aeabi_fadd 42; CHECK-NOFP-NEXT: bl __aeabi_f2h 43; CHECK-NOFP-NEXT: vmov.16 q6[2], r0 44; CHECK-NOFP-NEXT: vmov.u16 r0, q5[3] 45; CHECK-NOFP-NEXT: bl __aeabi_h2f 46; CHECK-NOFP-NEXT: mov r4, r0 47; CHECK-NOFP-NEXT: vmov.u16 r0, q4[3] 48; CHECK-NOFP-NEXT: bl __aeabi_h2f 49; CHECK-NOFP-NEXT: mov r1, r4 50; CHECK-NOFP-NEXT: bl __aeabi_fadd 51; CHECK-NOFP-NEXT: bl __aeabi_f2h 52; CHECK-NOFP-NEXT: vmov.16 q6[3], r0 53; CHECK-NOFP-NEXT: vmov.u16 r0, q5[4] 54; CHECK-NOFP-NEXT: bl __aeabi_h2f 55; CHECK-NOFP-NEXT: mov r4, r0 56; CHECK-NOFP-NEXT: vmov.u16 r0, q4[4] 57; CHECK-NOFP-NEXT: bl __aeabi_h2f 58; CHECK-NOFP-NEXT: mov r1, r4 59; CHECK-NOFP-NEXT: bl __aeabi_fadd 60; CHECK-NOFP-NEXT: bl __aeabi_f2h 61; CHECK-NOFP-NEXT: vmov.16 q6[4], r0 62; CHECK-NOFP-NEXT: vmov.u16 r0, q5[5] 63; CHECK-NOFP-NEXT: bl __aeabi_h2f 64; CHECK-NOFP-NEXT: mov r4, r0 65; CHECK-NOFP-NEXT: vmov.u16 r0, q4[5] 66; CHECK-NOFP-NEXT: bl __aeabi_h2f 67; CHECK-NOFP-NEXT: mov r1, r4 68; CHECK-NOFP-NEXT: bl __aeabi_fadd 69; CHECK-NOFP-NEXT: bl __aeabi_f2h 70; CHECK-NOFP-NEXT: vmov.16 q6[5], r0 71; CHECK-NOFP-NEXT: vmov.u16 r0, q5[6] 72; CHECK-NOFP-NEXT: bl __aeabi_h2f 73; CHECK-NOFP-NEXT: mov r4, r0 74; CHECK-NOFP-NEXT: vmov.u16 r0, q4[6] 75; CHECK-NOFP-NEXT: bl __aeabi_h2f 76; CHECK-NOFP-NEXT: mov r1, r4 77; CHECK-NOFP-NEXT: bl __aeabi_fadd 78; CHECK-NOFP-NEXT: bl __aeabi_f2h 79; CHECK-NOFP-NEXT: vmov.16 q6[6], r0 80; CHECK-NOFP-NEXT: vmov.u16 r0, q5[7] 81; CHECK-NOFP-NEXT: bl __aeabi_h2f 82; CHECK-NOFP-NEXT: mov r4, r0 83; CHECK-NOFP-NEXT: vmov.u16 r0, q4[7] 84; CHECK-NOFP-NEXT: bl __aeabi_h2f 85; CHECK-NOFP-NEXT: mov r1, r4 86; CHECK-NOFP-NEXT: bl __aeabi_fadd 87; CHECK-NOFP-NEXT: bl __aeabi_f2h 88; CHECK-NOFP-NEXT: vmov.16 q6[7], r0 89; CHECK-NOFP-NEXT: vmov q0, q6 90; CHECK-NOFP-NEXT: vpop {d8, d9, d10, d11, d12, d13} 91; CHECK-NOFP-NEXT: pop {r4, pc} 92; 93; CHECK-FP-LABEL: vector_add_f16: 94; CHECK-FP: @ %bb.0: @ %entry 95; CHECK-FP-NEXT: vadd.f16 q0, q0, q1 96; CHECK-FP-NEXT: bx lr 97entry: 98 %sum = fadd <8 x half> %lhs, %rhs 99 ret <8 x half> %sum 100} 101 102define arm_aapcs_vfpcc <4 x float> @vector_add_f32(<4 x float> %lhs, <4 x float> %rhs) { 103; CHECK-NOFP-LABEL: vector_add_f32: 104; CHECK-NOFP: @ %bb.0: @ %entry 105; CHECK-NOFP-NEXT: .save {r4, r5, r7, lr} 106; CHECK-NOFP-NEXT: push {r4, r5, r7, lr} 107; CHECK-NOFP-NEXT: .vsave {d8, d9, d10, d11} 108; CHECK-NOFP-NEXT: vpush {d8, d9, d10, d11} 109; CHECK-NOFP-NEXT: vmov q4, q1 110; CHECK-NOFP-NEXT: vmov q5, q0 111; CHECK-NOFP-NEXT: vmov r4, r0, d11 112; CHECK-NOFP-NEXT: vmov r5, r1, d9 113; CHECK-NOFP-NEXT: bl __aeabi_fadd 114; CHECK-NOFP-NEXT: vmov s19, r0 115; CHECK-NOFP-NEXT: mov r0, r4 116; CHECK-NOFP-NEXT: mov r1, r5 117; CHECK-NOFP-NEXT: bl __aeabi_fadd 118; CHECK-NOFP-NEXT: vmov s18, r0 119; CHECK-NOFP-NEXT: vmov r4, r0, d10 120; CHECK-NOFP-NEXT: vmov r5, r1, d8 121; CHECK-NOFP-NEXT: bl __aeabi_fadd 122; CHECK-NOFP-NEXT: vmov s17, r0 123; CHECK-NOFP-NEXT: mov r0, r4 124; CHECK-NOFP-NEXT: mov r1, r5 125; CHECK-NOFP-NEXT: bl __aeabi_fadd 126; CHECK-NOFP-NEXT: vmov s16, r0 127; CHECK-NOFP-NEXT: vmov q0, q4 128; CHECK-NOFP-NEXT: vpop {d8, d9, d10, d11} 129; CHECK-NOFP-NEXT: pop {r4, r5, r7, pc} 130; 131; CHECK-FP-LABEL: vector_add_f32: 132; CHECK-FP: @ %bb.0: @ %entry 133; CHECK-FP-NEXT: vadd.f32 q0, q0, q1 134; CHECK-FP-NEXT: bx lr 135entry: 136 %sum = fadd <4 x float> %lhs, %rhs 137 ret <4 x float> %sum 138} 139 140