1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4declare <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32, <8 x half>, <8 x half>, <8 x half>) 5declare <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32, <4 x float>, <4 x float>, <4 x float>) 6declare <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32, <8 x half>, <8 x half>) 7declare <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32, <4 x float>, <4 x float>) 8 9 10define arm_aapcs_vfpcc <4 x float> @reassoc_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 11; CHECK-LABEL: reassoc_f32x4: 12; CHECK: @ %bb.0: @ %entry 13; CHECK-NEXT: vcmla.f32 q0, q1, q2, #0 14; CHECK-NEXT: bx lr 15entry: 16 %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c) 17 %res = fadd fast <4 x float> %d, %a 18 ret <4 x float> %res 19} 20 21define arm_aapcs_vfpcc <4 x float> @reassoc_c_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 22; CHECK-LABEL: reassoc_c_f32x4: 23; CHECK: @ %bb.0: @ %entry 24; CHECK-NEXT: vcmla.f32 q0, q1, q2, #90 25; CHECK-NEXT: bx lr 26entry: 27 %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 1, <4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c) 28 %res = fadd fast <4 x float> %a, %d 29 ret <4 x float> %res 30} 31 32define arm_aapcs_vfpcc <8 x half> @reassoc_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 33; CHECK-LABEL: reassoc_f16x4: 34; CHECK: @ %bb.0: @ %entry 35; CHECK-NEXT: vcmla.f16 q0, q1, q2, #180 36; CHECK-NEXT: bx lr 37entry: 38 %d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 2, <8 x half> zeroinitializer, <8 x half> %b, <8 x half> %c) 39 %res = fadd fast <8 x half> %d, %a 40 ret <8 x half> %res 41} 42 43define arm_aapcs_vfpcc <8 x half> @reassoc_c_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 44; CHECK-LABEL: reassoc_c_f16x4: 45; CHECK: @ %bb.0: @ %entry 46; CHECK-NEXT: vcmla.f16 q0, q1, q2, #270 47; CHECK-NEXT: bx lr 48entry: 49 %d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 3, <8 x half> zeroinitializer, <8 x half> %b, <8 x half> %c) 50 %res = fadd fast <8 x half> %a, %d 51 ret <8 x half> %res 52} 53 54define arm_aapcs_vfpcc <4 x float> @reassoc_nonfast_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 55; CHECK-LABEL: reassoc_nonfast_f32x4: 56; CHECK: @ %bb.0: @ %entry 57; CHECK-NEXT: vmov.i32 q3, #0x0 58; CHECK-NEXT: vcmla.f32 q3, q1, q2, #0 59; CHECK-NEXT: vadd.f32 q0, q3, q0 60; CHECK-NEXT: bx lr 61entry: 62 %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c) 63 %res = fadd <4 x float> %d, %a 64 ret <4 x float> %res 65} 66 67 68 69define arm_aapcs_vfpcc <4 x float> @muladd_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 70; CHECK-LABEL: muladd_f32x4: 71; CHECK: @ %bb.0: @ %entry 72; CHECK-NEXT: vcmla.f32 q0, q1, q2, #0 73; CHECK-NEXT: bx lr 74entry: 75 %d = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %b, <4 x float> %c) 76 %res = fadd fast <4 x float> %d, %a 77 ret <4 x float> %res 78} 79 80define arm_aapcs_vfpcc <4 x float> @muladd_c_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 81; CHECK-LABEL: muladd_c_f32x4: 82; CHECK: @ %bb.0: @ %entry 83; CHECK-NEXT: vcmla.f32 q0, q1, q2, #90 84; CHECK-NEXT: bx lr 85entry: 86 %d = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 1, <4 x float> %b, <4 x float> %c) 87 %res = fadd fast <4 x float> %a, %d 88 ret <4 x float> %res 89} 90 91define arm_aapcs_vfpcc <8 x half> @muladd_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 92; CHECK-LABEL: muladd_f16x4: 93; CHECK: @ %bb.0: @ %entry 94; CHECK-NEXT: vcmla.f16 q0, q1, q2, #180 95; CHECK-NEXT: bx lr 96entry: 97 %d = tail call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 2, <8 x half> %b, <8 x half> %c) 98 %res = fadd fast <8 x half> %d, %a 99 ret <8 x half> %res 100} 101 102define arm_aapcs_vfpcc <8 x half> @muladd_c_f16x4(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 103; CHECK-LABEL: muladd_c_f16x4: 104; CHECK: @ %bb.0: @ %entry 105; CHECK-NEXT: vcmla.f16 q0, q1, q2, #270 106; CHECK-NEXT: bx lr 107entry: 108 %d = tail call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 3, <8 x half> %b, <8 x half> %c) 109 %res = fadd fast <8 x half> %a, %d 110 ret <8 x half> %res 111} 112 113define arm_aapcs_vfpcc <4 x float> @muladd_nonfast_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { 114; CHECK-LABEL: muladd_nonfast_f32x4: 115; CHECK: @ %bb.0: @ %entry 116; CHECK-NEXT: vcmul.f32 q3, q1, q2, #0 117; CHECK-NEXT: vadd.f32 q0, q3, q0 118; CHECK-NEXT: bx lr 119entry: 120 %d = tail call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %b, <4 x float> %c) 121 %res = fadd <4 x float> %d, %a 122 ret <4 x float> %res 123} 124 125define arm_aapcs_vfpcc <8 x half> @same_register_f16(<8 x half> %a) { 126; CHECK-LABEL: same_register_f16: 127; CHECK: @ %bb.0: @ %entry 128; CHECK-NEXT: vcmla.f16 q0, q0, q0, #0 129; CHECK-NEXT: bx lr 130entry: 131 %d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 0, <8 x half> zeroinitializer, <8 x half> %a, <8 x half> %a) 132 %res = fadd fast <8 x half> %d, %a 133 ret <8 x half> %res 134} 135 136define arm_aapcs_vfpcc <4 x float> @same_register_f32(<4 x float> %a) { 137; CHECK-LABEL: same_register_f32: 138; CHECK: @ %bb.0: @ %entry 139; CHECK-NEXT: vmov q1, q0 140; CHECK-NEXT: vcmla.f32 q1, q0, q0, #0 141; CHECK-NEXT: vmov q0, q1 142; CHECK-NEXT: bx lr 143entry: 144 %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %a, <4 x float> %a) 145 %res = fadd fast <4 x float> %d, %a 146 ret <4 x float> %res 147} 148