1*ca603d25SDavid Green // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 2*ca603d25SDavid Green // RUN: %clang_cc1 -triple arm64 -target-feature +neon \ 3207e5cccSFangrui Song // RUN: -target-feature +v8.3a \ 4207e5cccSFangrui Song // RUN: -target-feature +fullfp16 \ 5*ca603d25SDavid Green // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes="mem2reg,instsimplify" | FileCheck %s 6207e5cccSFangrui Song 7207e5cccSFangrui Song // REQUIRES: aarch64-registered-target 8207e5cccSFangrui Song 9207e5cccSFangrui Song #include <arm_neon.h> 10207e5cccSFangrui Song 11*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_f16( 12*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0:[0-9]+]] { 13*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 14*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]]) 15*ca603d25SDavid Green // CHECK-NEXT: ret <4 x half> [[VCMLA_F163_I]] 16*ca603d25SDavid Green // 17207e5cccSFangrui Song float16x4_t test_vcmla_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) { 18207e5cccSFangrui Song return vcmla_f16(acc, lhs, rhs); 19207e5cccSFangrui Song } 20207e5cccSFangrui Song 21*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_f32( 22*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 23*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 24*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]]) 25*ca603d25SDavid Green // CHECK-NEXT: ret <2 x float> [[VCMLA_F323_I]] 26*ca603d25SDavid Green // 27207e5cccSFangrui Song float32x2_t test_vcmla_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) { 28207e5cccSFangrui Song return vcmla_f32(acc, lhs, rhs); 29207e5cccSFangrui Song } 30207e5cccSFangrui Song 31*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_f16( 32*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 33*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 34*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]]) 35*ca603d25SDavid Green // CHECK-NEXT: ret <8 x half> [[VCMLAQ_F163_I]] 36*ca603d25SDavid Green // 37207e5cccSFangrui Song float16x8_t test_vcmlaq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) { 38207e5cccSFangrui Song return vcmlaq_f16(acc, lhs, rhs); 39207e5cccSFangrui Song } 40207e5cccSFangrui Song 41*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_f32( 42*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 43*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 44*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]]) 45*ca603d25SDavid Green // CHECK-NEXT: ret <4 x float> [[VCMLAQ_F323_I]] 46*ca603d25SDavid Green // 47207e5cccSFangrui Song float32x4_t test_vcmlaq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) { 48207e5cccSFangrui Song return vcmlaq_f32(acc, lhs, rhs); 49207e5cccSFangrui Song } 50207e5cccSFangrui Song 51*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_f64( 52*ca603d25SDavid Green // CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) #[[ATTR0]] { 53*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 54*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]]) 55*ca603d25SDavid Green // CHECK-NEXT: ret <2 x double> [[VCMLAQ_F643_I]] 56*ca603d25SDavid Green // 57207e5cccSFangrui Song float64x2_t test_vcmlaq_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) { 58207e5cccSFangrui Song return vcmlaq_f64(acc, lhs, rhs); 59207e5cccSFangrui Song } 60207e5cccSFangrui Song 61*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_f16( 62*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 63*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 64*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT90_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]]) 65*ca603d25SDavid Green // CHECK-NEXT: ret <4 x half> [[VCMLA_ROT90_F163_I]] 66*ca603d25SDavid Green // 67207e5cccSFangrui Song float16x4_t test_vcmla_rot90_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) { 68207e5cccSFangrui Song return vcmla_rot90_f16(acc, lhs, rhs); 69207e5cccSFangrui Song } 70207e5cccSFangrui Song 71*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_f32( 72*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 73*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 74*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT90_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]]) 75*ca603d25SDavid Green // CHECK-NEXT: ret <2 x float> [[VCMLA_ROT90_F323_I]] 76*ca603d25SDavid Green // 77207e5cccSFangrui Song float32x2_t test_vcmla_rot90_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) { 78207e5cccSFangrui Song return vcmla_rot90_f32(acc, lhs, rhs); 79207e5cccSFangrui Song } 80207e5cccSFangrui Song 81*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_f16( 82*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 83*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 84*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT90_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]]) 85*ca603d25SDavid Green // CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT90_F163_I]] 86*ca603d25SDavid Green // 87207e5cccSFangrui Song float16x8_t test_vcmlaq_rot90_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) { 88207e5cccSFangrui Song return vcmlaq_rot90_f16(acc, lhs, rhs); 89207e5cccSFangrui Song } 90207e5cccSFangrui Song 91*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_f32( 92*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 93*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 94*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT90_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]]) 95*ca603d25SDavid Green // CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT90_F323_I]] 96*ca603d25SDavid Green // 97207e5cccSFangrui Song float32x4_t test_vcmlaq_rot90_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) { 98207e5cccSFangrui Song return vcmlaq_rot90_f32(acc, lhs, rhs); 99207e5cccSFangrui Song } 100207e5cccSFangrui Song 101*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot90_f64( 102*ca603d25SDavid Green // CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) #[[ATTR0]] { 103*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 104*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT90_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]]) 105*ca603d25SDavid Green // CHECK-NEXT: ret <2 x double> [[VCMLAQ_ROT90_F643_I]] 106*ca603d25SDavid Green // 107207e5cccSFangrui Song float64x2_t test_vcmlaq_rot90_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) { 108207e5cccSFangrui Song return vcmlaq_rot90_f64(acc, lhs, rhs); 109207e5cccSFangrui Song } 110207e5cccSFangrui Song 111*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_f16( 112*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 113*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 114*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT180_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]]) 115*ca603d25SDavid Green // CHECK-NEXT: ret <4 x half> [[VCMLA_ROT180_F163_I]] 116*ca603d25SDavid Green // 117207e5cccSFangrui Song float16x4_t test_vcmla_rot180_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) { 118207e5cccSFangrui Song return vcmla_rot180_f16(acc, lhs, rhs); 119207e5cccSFangrui Song } 120207e5cccSFangrui Song 121*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_f32( 122*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 123*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 124*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT180_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]]) 125*ca603d25SDavid Green // CHECK-NEXT: ret <2 x float> [[VCMLA_ROT180_F323_I]] 126*ca603d25SDavid Green // 127207e5cccSFangrui Song float32x2_t test_vcmla_rot180_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) { 128207e5cccSFangrui Song return vcmla_rot180_f32(acc, lhs, rhs); 129207e5cccSFangrui Song } 130207e5cccSFangrui Song 131*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_f16( 132*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 133*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 134*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT180_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]]) 135*ca603d25SDavid Green // CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT180_F163_I]] 136*ca603d25SDavid Green // 137207e5cccSFangrui Song float16x8_t test_vcmlaq_rot180_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) { 138207e5cccSFangrui Song return vcmlaq_rot180_f16(acc, lhs, rhs); 139207e5cccSFangrui Song } 140207e5cccSFangrui Song 141*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_f32( 142*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 143*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 144*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT180_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]]) 145*ca603d25SDavid Green // CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT180_F323_I]] 146*ca603d25SDavid Green // 147207e5cccSFangrui Song float32x4_t test_vcmlaq_rot180_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) { 148207e5cccSFangrui Song return vcmlaq_rot180_f32(acc, lhs, rhs); 149207e5cccSFangrui Song } 150207e5cccSFangrui Song 151*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot180_f64( 152*ca603d25SDavid Green // CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) #[[ATTR0]] { 153*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 154*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT180_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]]) 155*ca603d25SDavid Green // CHECK-NEXT: ret <2 x double> [[VCMLAQ_ROT180_F643_I]] 156*ca603d25SDavid Green // 157207e5cccSFangrui Song float64x2_t test_vcmlaq_rot180_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) { 158207e5cccSFangrui Song return vcmlaq_rot180_f64(acc, lhs, rhs); 159207e5cccSFangrui Song } 160207e5cccSFangrui Song 161*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_f16( 162*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 163*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 164*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT270_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[RHS]]) 165*ca603d25SDavid Green // CHECK-NEXT: ret <4 x half> [[VCMLA_ROT270_F163_I]] 166*ca603d25SDavid Green // 167207e5cccSFangrui Song float16x4_t test_vcmla_rot270_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) { 168207e5cccSFangrui Song return vcmla_rot270_f16(acc, lhs, rhs); 169207e5cccSFangrui Song } 170207e5cccSFangrui Song 171*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_f32( 172*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 173*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 174*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT270_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[RHS]]) 175*ca603d25SDavid Green // CHECK-NEXT: ret <2 x float> [[VCMLA_ROT270_F323_I]] 176*ca603d25SDavid Green // 177207e5cccSFangrui Song float32x2_t test_vcmla_rot270_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) { 178207e5cccSFangrui Song return vcmla_rot270_f32(acc, lhs, rhs); 179207e5cccSFangrui Song } 180207e5cccSFangrui Song 181*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_f16( 182*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 183*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 184*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT270_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[RHS]]) 185*ca603d25SDavid Green // CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT270_F163_I]] 186*ca603d25SDavid Green // 187207e5cccSFangrui Song float16x8_t test_vcmlaq_rot270_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) { 188207e5cccSFangrui Song return vcmlaq_rot270_f16(acc, lhs, rhs); 189207e5cccSFangrui Song } 190207e5cccSFangrui Song 191*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_f32( 192*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 193*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 194*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT270_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[RHS]]) 195*ca603d25SDavid Green // CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT270_F323_I]] 196*ca603d25SDavid Green // 197207e5cccSFangrui Song float32x4_t test_vcmlaq_rot270_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) { 198207e5cccSFangrui Song return vcmlaq_rot270_f32(acc, lhs, rhs); 199207e5cccSFangrui Song } 200207e5cccSFangrui Song 201*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x double> @test_vcmlaq_rot270_f64( 202*ca603d25SDavid Green // CHECK-SAME: <2 x double> noundef [[ACC:%.*]], <2 x double> noundef [[LHS:%.*]], <2 x double> noundef [[RHS:%.*]]) #[[ATTR0]] { 203*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 204*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT270_F643_I:%.*]] = call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> [[ACC]], <2 x double> [[LHS]], <2 x double> [[RHS]]) 205*ca603d25SDavid Green // CHECK-NEXT: ret <2 x double> [[VCMLAQ_ROT270_F643_I]] 206*ca603d25SDavid Green // 207207e5cccSFangrui Song float64x2_t test_vcmlaq_rot270_f64(float64x2_t acc, float64x2_t lhs, float64x2_t rhs) { 208207e5cccSFangrui Song return vcmlaq_rot270_f64(acc, lhs, rhs); 209207e5cccSFangrui Song } 210207e5cccSFangrui Song 211*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_lane_f16( 212*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 213*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 214*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_150:%.*]] = alloca <4 x half>, align 8 215*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_150:%.*]] = alloca <2 x i32>, align 8 216*ca603d25SDavid Green // CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_150]], align 8 217*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_150]], align 8 218*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 219*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGET_LANE]], i32 0 220*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_150]], align 8 221*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 222*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1 223*ca603d25SDavid Green // CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_150]], align 8 224*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_150]], align 8 225*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]]) 226*ca603d25SDavid Green // CHECK-NEXT: ret <4 x half> [[VCMLA_F163_I]] 227*ca603d25SDavid Green // 228207e5cccSFangrui Song float16x4_t test_vcmla_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) { 229207e5cccSFangrui Song return vcmla_lane_f16(acc, lhs, rhs, 1); 230207e5cccSFangrui Song } 231207e5cccSFangrui Song 232207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1. 233*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_laneq_f16( 234*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 235*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 236*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_154:%.*]] = alloca <8 x half>, align 16 237*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_154:%.*]] = alloca <2 x i32>, align 8 238*ca603d25SDavid Green // CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_154]], align 16 239*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_154]], align 16 240*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 241*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGETQ_LANE]], i32 0 242*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_154]], align 16 243*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 244*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1 245*ca603d25SDavid Green // CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_154]], align 8 246*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_154]], align 8 247*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]]) 248*ca603d25SDavid Green // CHECK-NEXT: ret <4 x half> [[VCMLA_F163_I]] 249*ca603d25SDavid Green // 250207e5cccSFangrui Song float16x4_t test_vcmla_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) { 251207e5cccSFangrui Song return vcmla_laneq_f16(acc, lhs, rhs, 3); 252207e5cccSFangrui Song } 253207e5cccSFangrui Song 254*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_lane_f16( 255*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 256*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 257*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_152:%.*]] = alloca <4 x half>, align 8 258*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_152:%.*]] = alloca <4 x i32>, align 16 259*ca603d25SDavid Green // CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_152]], align 8 260*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_152]], align 8 261*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 262*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGET_LANE]], i32 0 263*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_152]], align 8 264*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 265*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1 266*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[__REINT_152]], align 8 267*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 268*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGET_LANE8]], i32 2 269*ca603d25SDavid Green // CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[__REINT_152]], align 8 270*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE13:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 271*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGET_LANE13]], i32 3 272*ca603d25SDavid Green // CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_152]], align 16 273*ca603d25SDavid Green // CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_152]], align 16 274*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]]) 275*ca603d25SDavid Green // CHECK-NEXT: ret <8 x half> [[VCMLAQ_F163_I]] 276*ca603d25SDavid Green // 277207e5cccSFangrui Song float16x8_t test_vcmlaq_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) { 278207e5cccSFangrui Song return vcmlaq_lane_f16(acc, lhs, rhs, 1); 279207e5cccSFangrui Song } 280207e5cccSFangrui Song 281*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_laneq_f16( 282*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 283*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 284*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_156:%.*]] = alloca <8 x half>, align 16 285*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_156:%.*]] = alloca <4 x i32>, align 16 286*ca603d25SDavid Green // CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_156]], align 16 287*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_156]], align 16 288*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 289*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGETQ_LANE]], i32 0 290*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_156]], align 16 291*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 292*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1 293*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[__REINT_156]], align 16 294*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 295*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGETQ_LANE8]], i32 2 296*ca603d25SDavid Green // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[__REINT_156]], align 16 297*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE13:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 298*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGETQ_LANE13]], i32 3 299*ca603d25SDavid Green // CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_156]], align 16 300*ca603d25SDavid Green // CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_156]], align 16 301*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]]) 302*ca603d25SDavid Green // CHECK-NEXT: ret <8 x half> [[VCMLAQ_F163_I]] 303*ca603d25SDavid Green // 304207e5cccSFangrui Song float16x8_t test_vcmlaq_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) { 305207e5cccSFangrui Song return vcmlaq_laneq_f16(acc, lhs, rhs, 3); 306207e5cccSFangrui Song } 307207e5cccSFangrui Song 308*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_lane_f32( 309*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 310*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 311*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_182:%.*]] = alloca <2 x float>, align 8 312*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_182:%.*]] = alloca <1 x i64>, align 8 313*ca603d25SDavid Green // CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_182]], align 8 314*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_182]], align 8 315*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0 316*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGET_LANE]], i32 0 317*ca603d25SDavid Green // CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_182]], align 8 318*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_182]], align 8 319*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]]) 320*ca603d25SDavid Green // CHECK-NEXT: ret <2 x float> [[VCMLA_F323_I]] 321*ca603d25SDavid Green // 322207e5cccSFangrui Song float32x2_t test_vcmla_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) { 323207e5cccSFangrui Song return vcmla_lane_f32(acc, lhs, rhs, 0); 324207e5cccSFangrui Song } 325207e5cccSFangrui Song 326207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1. 327*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_laneq_f32( 328*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 329*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 330*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_186:%.*]] = alloca <4 x float>, align 16 331*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_186:%.*]] = alloca <1 x i64>, align 8 332*ca603d25SDavid Green // CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_186]], align 16 333*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_186]], align 16 334*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 335*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGETQ_LANE]], i32 0 336*ca603d25SDavid Green // CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_186]], align 8 337*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_186]], align 8 338*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]]) 339*ca603d25SDavid Green // CHECK-NEXT: ret <2 x float> [[VCMLA_F323_I]] 340*ca603d25SDavid Green // 341207e5cccSFangrui Song float32x2_t test_vcmla_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) { 342207e5cccSFangrui Song return vcmla_laneq_f32(acc, lhs, rhs, 1); 343207e5cccSFangrui Song } 344207e5cccSFangrui Song 345*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_lane_f32( 346*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 347*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 348*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_184:%.*]] = alloca <2 x float>, align 8 349*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_184:%.*]] = alloca <2 x i64>, align 16 350*ca603d25SDavid Green // CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_184]], align 8 351*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_184]], align 8 352*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0 353*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGET_LANE]], i32 0 354*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[__REINT_184]], align 8 355*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 356*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE3]], i32 1 357*ca603d25SDavid Green // CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_184]], align 16 358*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_184]], align 16 359*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]]) 360*ca603d25SDavid Green // CHECK-NEXT: ret <4 x float> [[VCMLAQ_F323_I]] 361*ca603d25SDavid Green // 362207e5cccSFangrui Song float32x4_t test_vcmlaq_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) { 363207e5cccSFangrui Song return vcmlaq_lane_f32(acc, lhs, rhs, 0); 364207e5cccSFangrui Song } 365207e5cccSFangrui Song 366*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_laneq_f32( 367*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 368*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 369*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_188:%.*]] = alloca <4 x float>, align 16 370*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_188:%.*]] = alloca <2 x i64>, align 16 371*ca603d25SDavid Green // CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_188]], align 16 372*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_188]], align 16 373*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 374*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGETQ_LANE]], i32 0 375*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[__REINT_188]], align 16 376*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 377*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE3]], i32 1 378*ca603d25SDavid Green // CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_188]], align 16 379*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_188]], align 16 380*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]]) 381*ca603d25SDavid Green // CHECK-NEXT: ret <4 x float> [[VCMLAQ_F323_I]] 382*ca603d25SDavid Green // 383207e5cccSFangrui Song float32x4_t test_vcmlaq_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) { 384207e5cccSFangrui Song return vcmlaq_laneq_f32(acc, lhs, rhs, 1); 385207e5cccSFangrui Song } 386207e5cccSFangrui Song 387*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_lane_f16( 388*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 389*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 390*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_174:%.*]] = alloca <4 x half>, align 8 391*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_174:%.*]] = alloca <2 x i32>, align 8 392*ca603d25SDavid Green // CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_174]], align 8 393*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_174]], align 8 394*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 395*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGET_LANE]], i32 0 396*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_174]], align 8 397*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 398*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1 399*ca603d25SDavid Green // CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_174]], align 8 400*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_174]], align 8 401*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT90_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]]) 402*ca603d25SDavid Green // CHECK-NEXT: ret <4 x half> [[VCMLA_ROT90_F163_I]] 403*ca603d25SDavid Green // 404207e5cccSFangrui Song float16x4_t test_vcmla_rot90_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) { 405207e5cccSFangrui Song return vcmla_rot90_lane_f16(acc, lhs, rhs, 1); 406207e5cccSFangrui Song } 407207e5cccSFangrui Song 408207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1. 409*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot90_laneq_f16( 410*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 411*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 412*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_178:%.*]] = alloca <8 x half>, align 16 413*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_178:%.*]] = alloca <2 x i32>, align 8 414*ca603d25SDavid Green // CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_178]], align 16 415*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_178]], align 16 416*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 417*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGETQ_LANE]], i32 0 418*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_178]], align 16 419*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 420*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1 421*ca603d25SDavid Green // CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_178]], align 8 422*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_178]], align 8 423*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT90_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]]) 424*ca603d25SDavid Green // CHECK-NEXT: ret <4 x half> [[VCMLA_ROT90_F163_I]] 425*ca603d25SDavid Green // 426207e5cccSFangrui Song float16x4_t test_vcmla_rot90_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) { 427207e5cccSFangrui Song return vcmla_rot90_laneq_f16(acc, lhs, rhs, 3); 428207e5cccSFangrui Song } 429207e5cccSFangrui Song 430*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_lane_f16( 431*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 432*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 433*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_176:%.*]] = alloca <4 x half>, align 8 434*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_176:%.*]] = alloca <4 x i32>, align 16 435*ca603d25SDavid Green // CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_176]], align 8 436*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_176]], align 8 437*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 438*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGET_LANE]], i32 0 439*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_176]], align 8 440*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 441*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1 442*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[__REINT_176]], align 8 443*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 444*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGET_LANE8]], i32 2 445*ca603d25SDavid Green // CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[__REINT_176]], align 8 446*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE13:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 447*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGET_LANE13]], i32 3 448*ca603d25SDavid Green // CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_176]], align 16 449*ca603d25SDavid Green // CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_176]], align 16 450*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT90_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]]) 451*ca603d25SDavid Green // CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT90_F163_I]] 452*ca603d25SDavid Green // 453207e5cccSFangrui Song float16x8_t test_vcmlaq_rot90_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) { 454207e5cccSFangrui Song return vcmlaq_rot90_lane_f16(acc, lhs, rhs, 1); 455207e5cccSFangrui Song } 456207e5cccSFangrui Song 457*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot90_laneq_f16( 458*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 459*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 460*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_180:%.*]] = alloca <8 x half>, align 16 461*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_180:%.*]] = alloca <4 x i32>, align 16 462*ca603d25SDavid Green // CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_180]], align 16 463*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_180]], align 16 464*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 465*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGETQ_LANE]], i32 0 466*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_180]], align 16 467*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 468*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1 469*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[__REINT_180]], align 16 470*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 471*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGETQ_LANE8]], i32 2 472*ca603d25SDavid Green // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[__REINT_180]], align 16 473*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE13:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 474*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGETQ_LANE13]], i32 3 475*ca603d25SDavid Green // CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_180]], align 16 476*ca603d25SDavid Green // CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_180]], align 16 477*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT90_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]]) 478*ca603d25SDavid Green // CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT90_F163_I]] 479*ca603d25SDavid Green // 480207e5cccSFangrui Song float16x8_t test_vcmlaq_rot90_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) { 481207e5cccSFangrui Song return vcmlaq_rot90_laneq_f16(acc, lhs, rhs, 3); 482207e5cccSFangrui Song } 483207e5cccSFangrui Song 484*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_lane_f32( 485*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 486*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 487*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_206:%.*]] = alloca <2 x float>, align 8 488*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_206:%.*]] = alloca <1 x i64>, align 8 489*ca603d25SDavid Green // CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_206]], align 8 490*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_206]], align 8 491*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0 492*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGET_LANE]], i32 0 493*ca603d25SDavid Green // CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_206]], align 8 494*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_206]], align 8 495*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT90_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]]) 496*ca603d25SDavid Green // CHECK-NEXT: ret <2 x float> [[VCMLA_ROT90_F323_I]] 497*ca603d25SDavid Green // 498207e5cccSFangrui Song float32x2_t test_vcmla_rot90_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) { 499207e5cccSFangrui Song return vcmla_rot90_lane_f32(acc, lhs, rhs, 0); 500207e5cccSFangrui Song } 501207e5cccSFangrui Song 502207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1. 503*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot90_laneq_f32( 504*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 505*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 506*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_210:%.*]] = alloca <4 x float>, align 16 507*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_210:%.*]] = alloca <1 x i64>, align 8 508*ca603d25SDavid Green // CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_210]], align 16 509*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_210]], align 16 510*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 511*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGETQ_LANE]], i32 0 512*ca603d25SDavid Green // CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_210]], align 8 513*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_210]], align 8 514*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT90_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]]) 515*ca603d25SDavid Green // CHECK-NEXT: ret <2 x float> [[VCMLA_ROT90_F323_I]] 516*ca603d25SDavid Green // 517207e5cccSFangrui Song float32x2_t test_vcmla_rot90_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) { 518207e5cccSFangrui Song return vcmla_rot90_laneq_f32(acc, lhs, rhs, 1); 519207e5cccSFangrui Song } 520207e5cccSFangrui Song 521*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_lane_f32( 522*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 523*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 524*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_208:%.*]] = alloca <2 x float>, align 8 525*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_208:%.*]] = alloca <2 x i64>, align 16 526*ca603d25SDavid Green // CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_208]], align 8 527*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_208]], align 8 528*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0 529*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGET_LANE]], i32 0 530*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[__REINT_208]], align 8 531*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 532*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE3]], i32 1 533*ca603d25SDavid Green // CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_208]], align 16 534*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_208]], align 16 535*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT90_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]]) 536*ca603d25SDavid Green // CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT90_F323_I]] 537*ca603d25SDavid Green // 538207e5cccSFangrui Song float32x4_t test_vcmlaq_rot90_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) { 539207e5cccSFangrui Song return vcmlaq_rot90_lane_f32(acc, lhs, rhs, 0); 540207e5cccSFangrui Song } 541207e5cccSFangrui Song 542*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot90_laneq_f32( 543*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 544*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 545*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_212:%.*]] = alloca <4 x float>, align 16 546*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_212:%.*]] = alloca <2 x i64>, align 16 547*ca603d25SDavid Green // CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_212]], align 16 548*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_212]], align 16 549*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 550*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGETQ_LANE]], i32 0 551*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[__REINT_212]], align 16 552*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 553*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE3]], i32 1 554*ca603d25SDavid Green // CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_212]], align 16 555*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_212]], align 16 556*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT90_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]]) 557*ca603d25SDavid Green // CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT90_F323_I]] 558*ca603d25SDavid Green // 559207e5cccSFangrui Song float32x4_t test_vcmlaq_rot90_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) { 560207e5cccSFangrui Song return vcmlaq_rot90_laneq_f32(acc, lhs, rhs, 1); 561207e5cccSFangrui Song } 562207e5cccSFangrui Song 563*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_lane_f16( 564*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 565*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 566*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_158:%.*]] = alloca <4 x half>, align 8 567*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_158:%.*]] = alloca <2 x i32>, align 8 568*ca603d25SDavid Green // CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_158]], align 8 569*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_158]], align 8 570*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 571*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGET_LANE]], i32 0 572*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_158]], align 8 573*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 574*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1 575*ca603d25SDavid Green // CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_158]], align 8 576*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_158]], align 8 577*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT180_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]]) 578*ca603d25SDavid Green // CHECK-NEXT: ret <4 x half> [[VCMLA_ROT180_F163_I]] 579*ca603d25SDavid Green // 580207e5cccSFangrui Song float16x4_t test_vcmla_rot180_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) { 581207e5cccSFangrui Song return vcmla_rot180_lane_f16(acc, lhs, rhs, 1); 582207e5cccSFangrui Song } 583207e5cccSFangrui Song 584207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1. 585*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot180_laneq_f16( 586*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 587*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 588*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_162:%.*]] = alloca <8 x half>, align 16 589*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_162:%.*]] = alloca <2 x i32>, align 8 590*ca603d25SDavid Green // CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_162]], align 16 591*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_162]], align 16 592*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 593*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGETQ_LANE]], i32 0 594*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_162]], align 16 595*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 596*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1 597*ca603d25SDavid Green // CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_162]], align 8 598*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_162]], align 8 599*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT180_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]]) 600*ca603d25SDavid Green // CHECK-NEXT: ret <4 x half> [[VCMLA_ROT180_F163_I]] 601*ca603d25SDavid Green // 602207e5cccSFangrui Song float16x4_t test_vcmla_rot180_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) { 603207e5cccSFangrui Song return vcmla_rot180_laneq_f16(acc, lhs, rhs, 3); 604207e5cccSFangrui Song } 605207e5cccSFangrui Song 606*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_lane_f16( 607*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 608*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 609*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_160:%.*]] = alloca <4 x half>, align 8 610*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_160:%.*]] = alloca <4 x i32>, align 16 611*ca603d25SDavid Green // CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_160]], align 8 612*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_160]], align 8 613*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 614*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGET_LANE]], i32 0 615*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_160]], align 8 616*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 617*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1 618*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[__REINT_160]], align 8 619*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 620*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGET_LANE8]], i32 2 621*ca603d25SDavid Green // CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[__REINT_160]], align 8 622*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE13:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 623*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGET_LANE13]], i32 3 624*ca603d25SDavid Green // CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_160]], align 16 625*ca603d25SDavid Green // CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_160]], align 16 626*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT180_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]]) 627*ca603d25SDavid Green // CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT180_F163_I]] 628*ca603d25SDavid Green // 629207e5cccSFangrui Song float16x8_t test_vcmlaq_rot180_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) { 630207e5cccSFangrui Song return vcmlaq_rot180_lane_f16(acc, lhs, rhs, 1); 631207e5cccSFangrui Song } 632207e5cccSFangrui Song 633*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot180_laneq_f16( 634*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 635*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 636*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_164:%.*]] = alloca <8 x half>, align 16 637*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_164:%.*]] = alloca <4 x i32>, align 16 638*ca603d25SDavid Green // CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_164]], align 16 639*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_164]], align 16 640*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 641*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGETQ_LANE]], i32 0 642*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_164]], align 16 643*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 644*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1 645*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[__REINT_164]], align 16 646*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 647*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGETQ_LANE8]], i32 2 648*ca603d25SDavid Green // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[__REINT_164]], align 16 649*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE13:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 650*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGETQ_LANE13]], i32 3 651*ca603d25SDavid Green // CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_164]], align 16 652*ca603d25SDavid Green // CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_164]], align 16 653*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT180_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]]) 654*ca603d25SDavid Green // CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT180_F163_I]] 655*ca603d25SDavid Green // 656207e5cccSFangrui Song float16x8_t test_vcmlaq_rot180_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) { 657207e5cccSFangrui Song return vcmlaq_rot180_laneq_f16(acc, lhs, rhs, 3); 658207e5cccSFangrui Song } 659207e5cccSFangrui Song 660*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_lane_f32( 661*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 662*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 663*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_190:%.*]] = alloca <2 x float>, align 8 664*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_190:%.*]] = alloca <1 x i64>, align 8 665*ca603d25SDavid Green // CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_190]], align 8 666*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_190]], align 8 667*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0 668*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGET_LANE]], i32 0 669*ca603d25SDavid Green // CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_190]], align 8 670*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_190]], align 8 671*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT180_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]]) 672*ca603d25SDavid Green // CHECK-NEXT: ret <2 x float> [[VCMLA_ROT180_F323_I]] 673*ca603d25SDavid Green // 674207e5cccSFangrui Song float32x2_t test_vcmla_rot180_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) { 675207e5cccSFangrui Song return vcmla_rot180_lane_f32(acc, lhs, rhs, 0); 676207e5cccSFangrui Song } 677207e5cccSFangrui Song 678207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1. 679*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot180_laneq_f32( 680*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 681*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 682*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_194:%.*]] = alloca <4 x float>, align 16 683*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_194:%.*]] = alloca <1 x i64>, align 8 684*ca603d25SDavid Green // CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_194]], align 16 685*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_194]], align 16 686*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 687*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGETQ_LANE]], i32 0 688*ca603d25SDavid Green // CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_194]], align 8 689*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_194]], align 8 690*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT180_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]]) 691*ca603d25SDavid Green // CHECK-NEXT: ret <2 x float> [[VCMLA_ROT180_F323_I]] 692*ca603d25SDavid Green // 693207e5cccSFangrui Song float32x2_t test_vcmla_rot180_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) { 694207e5cccSFangrui Song return vcmla_rot180_laneq_f32(acc, lhs, rhs, 1); 695207e5cccSFangrui Song } 696207e5cccSFangrui Song 697*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_lane_f32( 698*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 699*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 700*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_192:%.*]] = alloca <2 x float>, align 8 701*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_192:%.*]] = alloca <2 x i64>, align 16 702*ca603d25SDavid Green // CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_192]], align 8 703*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_192]], align 8 704*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0 705*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGET_LANE]], i32 0 706*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[__REINT_192]], align 8 707*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 708*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE3]], i32 1 709*ca603d25SDavid Green // CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_192]], align 16 710*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_192]], align 16 711*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT180_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]]) 712*ca603d25SDavid Green // CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT180_F323_I]] 713*ca603d25SDavid Green // 714207e5cccSFangrui Song float32x4_t test_vcmlaq_rot180_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) { 715207e5cccSFangrui Song return vcmlaq_rot180_lane_f32(acc, lhs, rhs, 0); 716207e5cccSFangrui Song } 717207e5cccSFangrui Song 718*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot180_laneq_f32( 719*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 720*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 721*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_196:%.*]] = alloca <4 x float>, align 16 722*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_196:%.*]] = alloca <2 x i64>, align 16 723*ca603d25SDavid Green // CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_196]], align 16 724*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_196]], align 16 725*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 726*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGETQ_LANE]], i32 0 727*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[__REINT_196]], align 16 728*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 729*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE3]], i32 1 730*ca603d25SDavid Green // CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_196]], align 16 731*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_196]], align 16 732*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT180_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]]) 733*ca603d25SDavid Green // CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT180_F323_I]] 734*ca603d25SDavid Green // 735207e5cccSFangrui Song float32x4_t test_vcmlaq_rot180_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) { 736207e5cccSFangrui Song return vcmlaq_rot180_laneq_f32(acc, lhs, rhs, 1); 737207e5cccSFangrui Song } 738207e5cccSFangrui Song 739*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_lane_f16( 740*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 741*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 742*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_166:%.*]] = alloca <4 x half>, align 8 743*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_166:%.*]] = alloca <2 x i32>, align 8 744*ca603d25SDavid Green // CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_166]], align 8 745*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_166]], align 8 746*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 747*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGET_LANE]], i32 0 748*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_166]], align 8 749*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 750*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1 751*ca603d25SDavid Green // CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_166]], align 8 752*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_166]], align 8 753*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT270_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]]) 754*ca603d25SDavid Green // CHECK-NEXT: ret <4 x half> [[VCMLA_ROT270_F163_I]] 755*ca603d25SDavid Green // 756207e5cccSFangrui Song float16x4_t test_vcmla_rot270_lane_f16(float16x4_t acc, float16x4_t lhs, float16x4_t rhs) { 757207e5cccSFangrui Song return vcmla_rot270_lane_f16(acc, lhs, rhs, 1); 758207e5cccSFangrui Song } 759207e5cccSFangrui Song 760207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1. 761*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x half> @test_vcmla_rot270_laneq_f16( 762*ca603d25SDavid Green // CHECK-SAME: <4 x half> noundef [[ACC:%.*]], <4 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 763*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 764*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_170:%.*]] = alloca <8 x half>, align 16 765*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_170:%.*]] = alloca <2 x i32>, align 8 766*ca603d25SDavid Green // CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_170]], align 16 767*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_170]], align 16 768*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 769*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i32> poison, i32 [[VGETQ_LANE]], i32 0 770*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_170]], align 16 771*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 772*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1 773*ca603d25SDavid Green // CHECK-NEXT: store <2 x i32> [[VECINIT5]], ptr [[__REINT1_170]], align 8 774*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x half>, ptr [[__REINT1_170]], align 8 775*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT270_F163_I:%.*]] = call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> [[ACC]], <4 x half> [[LHS]], <4 x half> [[TMP2]]) 776*ca603d25SDavid Green // CHECK-NEXT: ret <4 x half> [[VCMLA_ROT270_F163_I]] 777*ca603d25SDavid Green // 778207e5cccSFangrui Song float16x4_t test_vcmla_rot270_laneq_f16(float16x4_t acc, float16x4_t lhs, float16x8_t rhs) { 779207e5cccSFangrui Song return vcmla_rot270_laneq_f16(acc, lhs, rhs, 3); 780207e5cccSFangrui Song } 781207e5cccSFangrui Song 782*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_lane_f16( 783*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <4 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 784*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 785*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_168:%.*]] = alloca <4 x half>, align 8 786*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_168:%.*]] = alloca <4 x i32>, align 16 787*ca603d25SDavid Green // CHECK-NEXT: store <4 x half> [[RHS]], ptr [[__REINT_168]], align 8 788*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[__REINT_168]], align 8 789*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 790*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGET_LANE]], i32 0 791*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[__REINT_168]], align 8 792*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 793*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGET_LANE3]], i32 1 794*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[__REINT_168]], align 8 795*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE8:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 796*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGET_LANE8]], i32 2 797*ca603d25SDavid Green // CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[__REINT_168]], align 8 798*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE13:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 799*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGET_LANE13]], i32 3 800*ca603d25SDavid Green // CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_168]], align 16 801*ca603d25SDavid Green // CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_168]], align 16 802*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT270_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]]) 803*ca603d25SDavid Green // CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT270_F163_I]] 804*ca603d25SDavid Green // 805207e5cccSFangrui Song float16x8_t test_vcmlaq_rot270_lane_f16(float16x8_t acc, float16x8_t lhs, float16x4_t rhs) { 806207e5cccSFangrui Song return vcmlaq_rot270_lane_f16(acc, lhs, rhs, 1); 807207e5cccSFangrui Song } 808207e5cccSFangrui Song 809*ca603d25SDavid Green // CHECK-LABEL: define dso_local <8 x half> @test_vcmlaq_rot270_laneq_f16( 810*ca603d25SDavid Green // CHECK-SAME: <8 x half> noundef [[ACC:%.*]], <8 x half> noundef [[LHS:%.*]], <8 x half> noundef [[RHS:%.*]]) #[[ATTR0]] { 811*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 812*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_172:%.*]] = alloca <8 x half>, align 16 813*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_172:%.*]] = alloca <4 x i32>, align 16 814*ca603d25SDavid Green // CHECK-NEXT: store <8 x half> [[RHS]], ptr [[__REINT_172]], align 16 815*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[__REINT_172]], align 16 816*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 817*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[VGETQ_LANE]], i32 0 818*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__REINT_172]], align 16 819*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 820*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[VGETQ_LANE3]], i32 1 821*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[__REINT_172]], align 16 822*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE8:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 823*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT10:%.*]] = insertelement <4 x i32> [[VECINIT5]], i32 [[VGETQ_LANE8]], i32 2 824*ca603d25SDavid Green // CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[__REINT_172]], align 16 825*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE13:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 826*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT15:%.*]] = insertelement <4 x i32> [[VECINIT10]], i32 [[VGETQ_LANE13]], i32 3 827*ca603d25SDavid Green // CHECK-NEXT: store <4 x i32> [[VECINIT15]], ptr [[__REINT1_172]], align 16 828*ca603d25SDavid Green // CHECK-NEXT: [[TMP4:%.*]] = load <8 x half>, ptr [[__REINT1_172]], align 16 829*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT270_F163_I:%.*]] = call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> [[ACC]], <8 x half> [[LHS]], <8 x half> [[TMP4]]) 830*ca603d25SDavid Green // CHECK-NEXT: ret <8 x half> [[VCMLAQ_ROT270_F163_I]] 831*ca603d25SDavid Green // 832207e5cccSFangrui Song float16x8_t test_vcmlaq_rot270_laneq_f16(float16x8_t acc, float16x8_t lhs, float16x8_t rhs) { 833207e5cccSFangrui Song return vcmlaq_rot270_laneq_f16(acc, lhs, rhs, 3); 834207e5cccSFangrui Song } 835207e5cccSFangrui Song 836*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_lane_f32( 837*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 838*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 839*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_198:%.*]] = alloca <2 x float>, align 8 840*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_198:%.*]] = alloca <1 x i64>, align 8 841*ca603d25SDavid Green // CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_198]], align 8 842*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_198]], align 8 843*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0 844*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGET_LANE]], i32 0 845*ca603d25SDavid Green // CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_198]], align 8 846*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_198]], align 8 847*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT270_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]]) 848*ca603d25SDavid Green // CHECK-NEXT: ret <2 x float> [[VCMLA_ROT270_F323_I]] 849*ca603d25SDavid Green // 850207e5cccSFangrui Song float32x2_t test_vcmla_rot270_lane_f32(float32x2_t acc, float32x2_t lhs, float32x2_t rhs) { 851207e5cccSFangrui Song return vcmla_rot270_lane_f32(acc, lhs, rhs, 0); 852207e5cccSFangrui Song } 853207e5cccSFangrui Song 854207e5cccSFangrui Song // ACLE says this exists, but it won't map to a single instruction if lane > 1. 855*ca603d25SDavid Green // CHECK-LABEL: define dso_local <2 x float> @test_vcmla_rot270_laneq_f32( 856*ca603d25SDavid Green // CHECK-SAME: <2 x float> noundef [[ACC:%.*]], <2 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 857*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 858*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_202:%.*]] = alloca <4 x float>, align 16 859*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_202:%.*]] = alloca <1 x i64>, align 8 860*ca603d25SDavid Green // CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_202]], align 16 861*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_202]], align 16 862*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 863*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <1 x i64> poison, i64 [[VGETQ_LANE]], i32 0 864*ca603d25SDavid Green // CHECK-NEXT: store <1 x i64> [[VECINIT]], ptr [[__REINT1_202]], align 8 865*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[__REINT1_202]], align 8 866*ca603d25SDavid Green // CHECK-NEXT: [[VCMLA_ROT270_F323_I:%.*]] = call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> [[ACC]], <2 x float> [[LHS]], <2 x float> [[TMP1]]) 867*ca603d25SDavid Green // CHECK-NEXT: ret <2 x float> [[VCMLA_ROT270_F323_I]] 868*ca603d25SDavid Green // 869207e5cccSFangrui Song float32x2_t test_vcmla_rot270_laneq_f32(float32x2_t acc, float32x2_t lhs, float32x4_t rhs) { 870207e5cccSFangrui Song return vcmla_rot270_laneq_f32(acc, lhs, rhs, 1); 871207e5cccSFangrui Song } 872207e5cccSFangrui Song 873*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_lane_f32( 874*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <2 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 875*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 876*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_200:%.*]] = alloca <2 x float>, align 8 877*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_200:%.*]] = alloca <2 x i64>, align 16 878*ca603d25SDavid Green // CHECK-NEXT: store <2 x float> [[RHS]], ptr [[__REINT_200]], align 8 879*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[__REINT_200]], align 8 880*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP0]], i32 0 881*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGET_LANE]], i32 0 882*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[__REINT_200]], align 8 883*ca603d25SDavid Green // CHECK-NEXT: [[VGET_LANE3:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 884*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGET_LANE3]], i32 1 885*ca603d25SDavid Green // CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_200]], align 16 886*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_200]], align 16 887*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT270_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]]) 888*ca603d25SDavid Green // CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT270_F323_I]] 889*ca603d25SDavid Green // 890207e5cccSFangrui Song float32x4_t test_vcmlaq_rot270_lane_f32(float32x4_t acc, float32x4_t lhs, float32x2_t rhs) { 891207e5cccSFangrui Song return vcmlaq_rot270_lane_f32(acc, lhs, rhs, 0); 892207e5cccSFangrui Song } 893207e5cccSFangrui Song 894*ca603d25SDavid Green // CHECK-LABEL: define dso_local <4 x float> @test_vcmlaq_rot270_laneq_f32( 895*ca603d25SDavid Green // CHECK-SAME: <4 x float> noundef [[ACC:%.*]], <4 x float> noundef [[LHS:%.*]], <4 x float> noundef [[RHS:%.*]]) #[[ATTR0]] { 896*ca603d25SDavid Green // CHECK-NEXT: [[ENTRY:.*:]] 897*ca603d25SDavid Green // CHECK-NEXT: [[__REINT_204:%.*]] = alloca <4 x float>, align 16 898*ca603d25SDavid Green // CHECK-NEXT: [[__REINT1_204:%.*]] = alloca <2 x i64>, align 16 899*ca603d25SDavid Green // CHECK-NEXT: store <4 x float> [[RHS]], ptr [[__REINT_204]], align 16 900*ca603d25SDavid Green // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[__REINT_204]], align 16 901*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP0]], i32 1 902*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT:%.*]] = insertelement <2 x i64> poison, i64 [[VGETQ_LANE]], i32 0 903*ca603d25SDavid Green // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[__REINT_204]], align 16 904*ca603d25SDavid Green // CHECK-NEXT: [[VGETQ_LANE3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 905*ca603d25SDavid Green // CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <2 x i64> [[VECINIT]], i64 [[VGETQ_LANE3]], i32 1 906*ca603d25SDavid Green // CHECK-NEXT: store <2 x i64> [[VECINIT5]], ptr [[__REINT1_204]], align 16 907*ca603d25SDavid Green // CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[__REINT1_204]], align 16 908*ca603d25SDavid Green // CHECK-NEXT: [[VCMLAQ_ROT270_F323_I:%.*]] = call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> [[ACC]], <4 x float> [[LHS]], <4 x float> [[TMP2]]) 909*ca603d25SDavid Green // CHECK-NEXT: ret <4 x float> [[VCMLAQ_ROT270_F323_I]] 910*ca603d25SDavid Green // 911207e5cccSFangrui Song float32x4_t test_vcmlaq_rot270_laneq_f32(float32x4_t acc, float32x4_t lhs, float32x4_t rhs) { 912207e5cccSFangrui Song return vcmlaq_rot270_laneq_f32(acc, lhs, rhs, 1); 913207e5cccSFangrui Song } 914