1*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\ 2*207e5cccSFangrui Song // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \ 3*207e5cccSFangrui Song // RUN: | opt -S -passes=mem2reg \ 4*207e5cccSFangrui Song // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s 5*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\ 6*207e5cccSFangrui Song // RUN: -ffp-exception-behavior=maytrap -DEXCEPT=1 \ 7*207e5cccSFangrui Song // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \ 8*207e5cccSFangrui Song // RUN: | opt -S -passes=mem2reg \ 9*207e5cccSFangrui Song // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED --implicit-check-not=fpexcept.maytrap %s 10*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\ 11*207e5cccSFangrui Song // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \ 12*207e5cccSFangrui Song // RUN: | opt -S -passes=mem2reg | llc -o=- - \ 13*207e5cccSFangrui Song // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s 14*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\ 15*207e5cccSFangrui Song // RUN: -ffp-exception-behavior=maytrap -DEXCEPT=1 \ 16*207e5cccSFangrui Song // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \ 17*207e5cccSFangrui Song // RUN: | opt -S -passes=mem2reg | llc -o=- - \ 18*207e5cccSFangrui Song // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM --implicit-check-not=fpexcept.maytrap %s 19*207e5cccSFangrui Song 20*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target 21*207e5cccSFangrui Song 22*207e5cccSFangrui Song // Test that the constrained intrinsics are picking up the exception 23*207e5cccSFangrui Song // metadata from the AST instead of the global default from the command line. 24*207e5cccSFangrui Song // Any cases of "fpexcept.maytrap" in this test are clang bugs. 25*207e5cccSFangrui Song 26*207e5cccSFangrui Song #if EXCEPT 27*207e5cccSFangrui Song #pragma float_control(except, on) 28*207e5cccSFangrui Song #endif 29*207e5cccSFangrui Song 30*207e5cccSFangrui Song #include <arm_neon.h> 31*207e5cccSFangrui Song 32*207e5cccSFangrui Song // COMMON-LABEL: test_vsqrt_f16 33*207e5cccSFangrui Song // UNCONSTRAINED: [[SQR:%.*]] = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %a) 34*207e5cccSFangrui Song // CONSTRAINED: [[SQR:%.*]] = call <4 x half> @llvm.experimental.constrained.sqrt.v4f16(<4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 35*207e5cccSFangrui Song // CHECK-ASM: fsqrt v{{[0-9]+}}.4h, v{{[0-9]+}}.4h 36*207e5cccSFangrui Song // COMMONIR: ret <4 x half> [[SQR]] 37*207e5cccSFangrui Song float16x4_t test_vsqrt_f16(float16x4_t a) { 38*207e5cccSFangrui Song return vsqrt_f16(a); 39*207e5cccSFangrui Song } 40*207e5cccSFangrui Song 41*207e5cccSFangrui Song // COMMON-LABEL: test_vsqrtq_f16 42*207e5cccSFangrui Song // UNCONSTRAINED: [[SQR:%.*]] = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a) 43*207e5cccSFangrui Song // CONSTRAINED: [[SQR:%.*]] = call <8 x half> @llvm.experimental.constrained.sqrt.v8f16(<8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 44*207e5cccSFangrui Song // CHECK-ASM: fsqrt v{{[0-9]+}}.8h, v{{[0-9]+}}.8h 45*207e5cccSFangrui Song // COMMONIR: ret <8 x half> [[SQR]] 46*207e5cccSFangrui Song float16x8_t test_vsqrtq_f16(float16x8_t a) { 47*207e5cccSFangrui Song return vsqrtq_f16(a); 48*207e5cccSFangrui Song } 49*207e5cccSFangrui Song 50*207e5cccSFangrui Song // COMMON-LABEL: test_vfma_f16 51*207e5cccSFangrui Song // UNCONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a) 52*207e5cccSFangrui Song // CONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 53*207e5cccSFangrui Song // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h 54*207e5cccSFangrui Song // COMMONIR: ret <4 x half> [[ADD]] 55*207e5cccSFangrui Song float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) { 56*207e5cccSFangrui Song return vfma_f16(a, b, c); 57*207e5cccSFangrui Song } 58*207e5cccSFangrui Song 59*207e5cccSFangrui Song // COMMON-LABEL: test_vfmaq_f16 60*207e5cccSFangrui Song // UNCONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a) 61*207e5cccSFangrui Song // CONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 62*207e5cccSFangrui Song // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h 63*207e5cccSFangrui Song // COMMONIR: ret <8 x half> [[ADD]] 64*207e5cccSFangrui Song float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { 65*207e5cccSFangrui Song return vfmaq_f16(a, b, c); 66*207e5cccSFangrui Song } 67*207e5cccSFangrui Song 68*207e5cccSFangrui Song // COMMON-LABEL: test_vfms_f16 69*207e5cccSFangrui Song // COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b 70*207e5cccSFangrui Song // UNCONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a) 71*207e5cccSFangrui Song // CONSTRAINED: [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 72*207e5cccSFangrui Song // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h 73*207e5cccSFangrui Song // COMMONIR: ret <4 x half> [[ADD]] 74*207e5cccSFangrui Song float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) { 75*207e5cccSFangrui Song return vfms_f16(a, b, c); 76*207e5cccSFangrui Song } 77*207e5cccSFangrui Song 78*207e5cccSFangrui Song // COMMON-LABEL: test_vfmsq_f16 79*207e5cccSFangrui Song // COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b 80*207e5cccSFangrui Song // UNCONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a) 81*207e5cccSFangrui Song // CONSTRAINED: [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 82*207e5cccSFangrui Song // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h 83*207e5cccSFangrui Song // COMMONIR: ret <8 x half> [[ADD]] 84*207e5cccSFangrui Song float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { 85*207e5cccSFangrui Song return vfmsq_f16(a, b, c); 86*207e5cccSFangrui Song } 87*207e5cccSFangrui Song 88*207e5cccSFangrui Song // COMMON-LABEL: test_vfma_lane_f16 89*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 90*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 91*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8> 92*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half> 93*207e5cccSFangrui Song // COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 94*207e5cccSFangrui Song // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> 95*207e5cccSFangrui Song // COMMONIR: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> 96*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]]) 97*207e5cccSFangrui Song // CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict") 98*207e5cccSFangrui Song // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] 99*207e5cccSFangrui Song // COMMONIR: ret <4 x half> [[FMLA]] 100*207e5cccSFangrui Song float16x4_t test_vfma_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) { 101*207e5cccSFangrui Song return vfma_lane_f16(a, b, c, 3); 102*207e5cccSFangrui Song } 103*207e5cccSFangrui Song 104*207e5cccSFangrui Song // COMMON-LABEL: test_vfmaq_lane_f16 105*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 106*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 107*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8> 108*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half> 109*207e5cccSFangrui Song // COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 110*207e5cccSFangrui Song // COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> 111*207e5cccSFangrui Song // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> 112*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]]) 113*207e5cccSFangrui Song // CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict") 114*207e5cccSFangrui Song // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] 115*207e5cccSFangrui Song // COMMONIR: ret <8 x half> [[FMLA]] 116*207e5cccSFangrui Song float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) { 117*207e5cccSFangrui Song return vfmaq_lane_f16(a, b, c, 3); 118*207e5cccSFangrui Song } 119*207e5cccSFangrui Song 120*207e5cccSFangrui Song // COMMON-LABEL: test_vfma_laneq_f16 121*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 122*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 123*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8> 124*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> 125*207e5cccSFangrui Song // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> 126*207e5cccSFangrui Song // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half> 127*207e5cccSFangrui Song // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 128*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]]) 129*207e5cccSFangrui Song // CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") 130*207e5cccSFangrui Song // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] 131*207e5cccSFangrui Song // COMMONIR: ret <4 x half> [[FMLA]] 132*207e5cccSFangrui Song float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) { 133*207e5cccSFangrui Song return vfma_laneq_f16(a, b, c, 7); 134*207e5cccSFangrui Song } 135*207e5cccSFangrui Song 136*207e5cccSFangrui Song // COMMON-LABEL: test_vfmaq_laneq_f16 137*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 138*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 139*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8> 140*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> 141*207e5cccSFangrui Song // COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> 142*207e5cccSFangrui Song // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half> 143*207e5cccSFangrui Song // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 144*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]]) 145*207e5cccSFangrui Song // CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") 146*207e5cccSFangrui Song // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] 147*207e5cccSFangrui Song // COMMONIR: ret <8 x half> [[FMLA]] 148*207e5cccSFangrui Song float16x8_t test_vfmaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { 149*207e5cccSFangrui Song return vfmaq_laneq_f16(a, b, c, 7); 150*207e5cccSFangrui Song } 151*207e5cccSFangrui Song 152*207e5cccSFangrui Song // COMMON-LABEL: test_vfma_n_f16 153*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = insertelement <4 x half> poison, half %c, i32 0 154*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1 155*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2 156*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3 157*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a) 158*207e5cccSFangrui Song // CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 159*207e5cccSFangrui Song // CHECK-ASM: fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] 160*207e5cccSFangrui Song // COMMONIR: ret <4 x half> [[FMA]] 161*207e5cccSFangrui Song float16x4_t test_vfma_n_f16(float16x4_t a, float16x4_t b, float16_t c) { 162*207e5cccSFangrui Song return vfma_n_f16(a, b, c); 163*207e5cccSFangrui Song } 164*207e5cccSFangrui Song 165*207e5cccSFangrui Song // COMMON-LABEL: test_vfmaq_n_f16 166*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = insertelement <8 x half> poison, half %c, i32 0 167*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1 168*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2 169*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3 170*207e5cccSFangrui Song // COMMONIR: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4 171*207e5cccSFangrui Song // COMMONIR: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5 172*207e5cccSFangrui Song // COMMONIR: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6 173*207e5cccSFangrui Song // COMMONIR: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7 174*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a) 175*207e5cccSFangrui Song // CONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 176*207e5cccSFangrui Song // CHECK-ASM: fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] 177*207e5cccSFangrui Song // COMMONIR: ret <8 x half> [[FMA]] 178*207e5cccSFangrui Song float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) { 179*207e5cccSFangrui Song return vfmaq_n_f16(a, b, c); 180*207e5cccSFangrui Song } 181*207e5cccSFangrui Song 182*207e5cccSFangrui Song // COMMON-LABEL: test_vfmah_lane_f16 183*207e5cccSFangrui Song // COMMONIR: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3 184*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a) 185*207e5cccSFangrui Song // CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 186*207e5cccSFangrui Song // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}] 187*207e5cccSFangrui Song // COMMONIR: ret half [[FMA]] 188*207e5cccSFangrui Song float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) { 189*207e5cccSFangrui Song return vfmah_lane_f16(a, b, c, 3); 190*207e5cccSFangrui Song } 191*207e5cccSFangrui Song 192*207e5cccSFangrui Song // COMMON-LABEL: test_vfmah_laneq_f16 193*207e5cccSFangrui Song // COMMONIR: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7 194*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a) 195*207e5cccSFangrui Song // CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 196*207e5cccSFangrui Song // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}] 197*207e5cccSFangrui Song // COMMONIR: ret half [[FMA]] 198*207e5cccSFangrui Song float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) { 199*207e5cccSFangrui Song return vfmah_laneq_f16(a, b, c, 7); 200*207e5cccSFangrui Song } 201*207e5cccSFangrui Song 202*207e5cccSFangrui Song // COMMON-LABEL: test_vfms_lane_f16 203*207e5cccSFangrui Song // COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b 204*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 205*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8> 206*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8> 207*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half> 208*207e5cccSFangrui Song // COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 209*207e5cccSFangrui Song // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> 210*207e5cccSFangrui Song // COMMONIR: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> 211*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]]) 212*207e5cccSFangrui Song // CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict") 213*207e5cccSFangrui Song // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] 214*207e5cccSFangrui Song // COMMONIR: ret <4 x half> [[FMA]] 215*207e5cccSFangrui Song float16x4_t test_vfms_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) { 216*207e5cccSFangrui Song return vfms_lane_f16(a, b, c, 3); 217*207e5cccSFangrui Song } 218*207e5cccSFangrui Song 219*207e5cccSFangrui Song // COMMON-LABEL: test_vfmsq_lane_f16 220*207e5cccSFangrui Song // COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b 221*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 222*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8> 223*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8> 224*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half> 225*207e5cccSFangrui Song // COMMONIR: [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 226*207e5cccSFangrui Song // COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> 227*207e5cccSFangrui Song // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> 228*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]]) 229*207e5cccSFangrui Song // CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict") 230*207e5cccSFangrui Song // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] 231*207e5cccSFangrui Song // COMMONIR: ret <8 x half> [[FMLA]] 232*207e5cccSFangrui Song float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) { 233*207e5cccSFangrui Song return vfmsq_lane_f16(a, b, c, 3); 234*207e5cccSFangrui Song } 235*207e5cccSFangrui Song 236*207e5cccSFangrui Song // COMMON-LABEL: test_vfms_laneq_f16 237*207e5cccSFangrui Song // COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b 238*207e5cccSFangrui Song // CHECK-ASM-NOT: fneg 239*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 240*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8> 241*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8> 242*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> 243*207e5cccSFangrui Song // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> 244*207e5cccSFangrui Song // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half> 245*207e5cccSFangrui Song // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 246*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]]) 247*207e5cccSFangrui Song // CONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") 248*207e5cccSFangrui Song // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] 249*207e5cccSFangrui Song // COMMONIR: ret <4 x half> [[FMLA]] 250*207e5cccSFangrui Song float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) { 251*207e5cccSFangrui Song return vfms_laneq_f16(a, b, c, 7); 252*207e5cccSFangrui Song } 253*207e5cccSFangrui Song 254*207e5cccSFangrui Song // COMMON-LABEL: test_vfmsq_laneq_f16 255*207e5cccSFangrui Song // COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b 256*207e5cccSFangrui Song // CHECK-ASM-NOT: fneg 257*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 258*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8> 259*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8> 260*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> 261*207e5cccSFangrui Song // COMMONIR: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> 262*207e5cccSFangrui Song // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half> 263*207e5cccSFangrui Song // COMMONIR: [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 264*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]]) 265*207e5cccSFangrui Song // CONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict") 266*207e5cccSFangrui Song // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] 267*207e5cccSFangrui Song // COMMONIR: ret <8 x half> [[FMLA]] 268*207e5cccSFangrui Song float16x8_t test_vfmsq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) { 269*207e5cccSFangrui Song return vfmsq_laneq_f16(a, b, c, 7); 270*207e5cccSFangrui Song } 271*207e5cccSFangrui Song 272*207e5cccSFangrui Song // COMMON-LABEL: test_vfms_n_f16 273*207e5cccSFangrui Song // COMMONIR: [[SUB:%.*]] = fneg <4 x half> %b 274*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = insertelement <4 x half> poison, half %c, i32 0 275*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1 276*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2 277*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3 278*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a) 279*207e5cccSFangrui Song // CONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 280*207e5cccSFangrui Song // CHECK-ASM: fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}] 281*207e5cccSFangrui Song // COMMONIR: ret <4 x half> [[FMA]] 282*207e5cccSFangrui Song float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) { 283*207e5cccSFangrui Song return vfms_n_f16(a, b, c); 284*207e5cccSFangrui Song } 285*207e5cccSFangrui Song 286*207e5cccSFangrui Song // COMMON-LABEL: test_vfmsq_n_f16 287*207e5cccSFangrui Song // COMMONIR: [[SUB:%.*]] = fneg <8 x half> %b 288*207e5cccSFangrui Song // COMMONIR: [[TMP0:%.*]] = insertelement <8 x half> poison, half %c, i32 0 289*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1 290*207e5cccSFangrui Song // COMMONIR: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2 291*207e5cccSFangrui Song // COMMONIR: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3 292*207e5cccSFangrui Song // COMMONIR: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4 293*207e5cccSFangrui Song // COMMONIR: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5 294*207e5cccSFangrui Song // COMMONIR: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6 295*207e5cccSFangrui Song // COMMONIR: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7 296*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a) 297*207e5cccSFangrui Song // CONSTRAINED: [[FMA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 298*207e5cccSFangrui Song // CHECK-ASM: fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}] 299*207e5cccSFangrui Song // COMMONIR: ret <8 x half> [[FMA]] 300*207e5cccSFangrui Song float16x8_t test_vfmsq_n_f16(float16x8_t a, float16x8_t b, float16_t c) { 301*207e5cccSFangrui Song return vfmsq_n_f16(a, b, c); 302*207e5cccSFangrui Song } 303*207e5cccSFangrui Song 304*207e5cccSFangrui Song // COMMON-LABEL: test_vfmsh_lane_f16 305*207e5cccSFangrui Song // UNCONSTRAINED: [[TMP0:%.*]] = fpext half %b to float 306*207e5cccSFangrui Song // CONSTRAINED: [[TMP0:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict") 307*207e5cccSFangrui Song // CHECK-ASM: fcvt s{{[0-9]+}}, h{{[0-9]+}} 308*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = fneg float [[TMP0]] 309*207e5cccSFangrui Song // CHECK-ASM: fneg s{{[0-9]+}}, s{{[0-9]+}} 310*207e5cccSFangrui Song // UNCONSTRAINED: [[SUB:%.*]] = fptrunc float [[TMP1]] to half 311*207e5cccSFangrui Song // CONSTRAINED: [[SUB:%.*]] = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict") 312*207e5cccSFangrui Song // CHECK-ASM: fcvt h{{[0-9]+}}, s{{[0-9]+}} 313*207e5cccSFangrui Song // COMMONIR: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3 314*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a) 315*207e5cccSFangrui Song // CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 316*207e5cccSFangrui Song // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}] 317*207e5cccSFangrui Song // COMMONIR: ret half [[FMA]] 318*207e5cccSFangrui Song float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) { 319*207e5cccSFangrui Song return vfmsh_lane_f16(a, b, c, 3); 320*207e5cccSFangrui Song } 321*207e5cccSFangrui Song 322*207e5cccSFangrui Song // COMMON-LABEL: test_vfmsh_laneq_f16 323*207e5cccSFangrui Song // UNCONSTRAINED: [[TMP0:%.*]] = fpext half %b to float 324*207e5cccSFangrui Song // CONSTRAINED: [[TMP0:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict") 325*207e5cccSFangrui Song // CHECK-ASM: fcvt s{{[0-9]+}}, h{{[0-9]+}} 326*207e5cccSFangrui Song // COMMONIR: [[TMP1:%.*]] = fneg float [[TMP0]] 327*207e5cccSFangrui Song // CHECK-ASM: fneg s{{[0-9]+}}, s{{[0-9]+}} 328*207e5cccSFangrui Song // UNCONSTRAINED: [[SUB:%.*]] = fptrunc float [[TMP1]] to half 329*207e5cccSFangrui Song // CONSTRAINED: [[SUB:%.*]] = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict") 330*207e5cccSFangrui Song // CHECK-ASM: fcvt h{{[0-9]+}}, s{{[0-9]+}} 331*207e5cccSFangrui Song // COMMONIR: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7 332*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a) 333*207e5cccSFangrui Song // CONSTRAINED: [[FMA:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict") 334*207e5cccSFangrui Song // CHECK-ASM: fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}] 335*207e5cccSFangrui Song // COMMONIR: ret half [[FMA]] 336*207e5cccSFangrui Song float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) { 337*207e5cccSFangrui Song return vfmsh_laneq_f16(a, b, c, 7); 338*207e5cccSFangrui Song } 339