xref: /llvm-project/clang/test/CodeGen/AArch64/v8.2a-neon-intrinsics-constrained.c (revision 207e5ccceec8d3cc3f32723e78f2a142bc61b07d)
1*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
2*207e5cccSFangrui Song // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
3*207e5cccSFangrui Song // RUN: | opt -S -passes=mem2reg \
4*207e5cccSFangrui Song // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s
5*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
6*207e5cccSFangrui Song // RUN: -ffp-exception-behavior=maytrap -DEXCEPT=1 \
7*207e5cccSFangrui Song // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
8*207e5cccSFangrui Song // RUN: | opt -S -passes=mem2reg \
9*207e5cccSFangrui Song // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED --implicit-check-not=fpexcept.maytrap %s
10*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
11*207e5cccSFangrui Song // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
12*207e5cccSFangrui Song // RUN: | opt -S -passes=mem2reg | llc -o=- - \
13*207e5cccSFangrui Song // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
14*207e5cccSFangrui Song // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\
15*207e5cccSFangrui Song // RUN: -ffp-exception-behavior=maytrap -DEXCEPT=1 \
16*207e5cccSFangrui Song // RUN: -flax-vector-conversions=none -disable-O0-optnone -emit-llvm -o - %s \
17*207e5cccSFangrui Song // RUN: | opt -S -passes=mem2reg | llc -o=- - \
18*207e5cccSFangrui Song // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM --implicit-check-not=fpexcept.maytrap  %s
19*207e5cccSFangrui Song 
20*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target
21*207e5cccSFangrui Song 
22*207e5cccSFangrui Song // Test that the constrained intrinsics are picking up the exception
23*207e5cccSFangrui Song // metadata from the AST instead of the global default from the command line.
24*207e5cccSFangrui Song // Any cases of "fpexcept.maytrap" in this test are clang bugs.
25*207e5cccSFangrui Song 
26*207e5cccSFangrui Song #if EXCEPT
27*207e5cccSFangrui Song #pragma float_control(except, on)
28*207e5cccSFangrui Song #endif
29*207e5cccSFangrui Song 
30*207e5cccSFangrui Song #include <arm_neon.h>
31*207e5cccSFangrui Song 
32*207e5cccSFangrui Song // COMMON-LABEL: test_vsqrt_f16
33*207e5cccSFangrui Song // UNCONSTRAINED:  [[SQR:%.*]] = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %a)
34*207e5cccSFangrui Song // CONSTRAINED:    [[SQR:%.*]] = call <4 x half> @llvm.experimental.constrained.sqrt.v4f16(<4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
35*207e5cccSFangrui Song // CHECK-ASM:      fsqrt v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
36*207e5cccSFangrui Song // COMMONIR:       ret <4 x half> [[SQR]]
37*207e5cccSFangrui Song float16x4_t test_vsqrt_f16(float16x4_t a) {
38*207e5cccSFangrui Song   return vsqrt_f16(a);
39*207e5cccSFangrui Song }
40*207e5cccSFangrui Song 
41*207e5cccSFangrui Song // COMMON-LABEL: test_vsqrtq_f16
42*207e5cccSFangrui Song // UNCONSTRAINED:  [[SQR:%.*]] = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a)
43*207e5cccSFangrui Song // CONSTRAINED:    [[SQR:%.*]] = call <8 x half> @llvm.experimental.constrained.sqrt.v8f16(<8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
44*207e5cccSFangrui Song // CHECK-ASM:      fsqrt v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
45*207e5cccSFangrui Song // COMMONIR:       ret <8 x half> [[SQR]]
46*207e5cccSFangrui Song float16x8_t test_vsqrtq_f16(float16x8_t a) {
47*207e5cccSFangrui Song   return vsqrtq_f16(a);
48*207e5cccSFangrui Song }
49*207e5cccSFangrui Song 
50*207e5cccSFangrui Song // COMMON-LABEL: test_vfma_f16
51*207e5cccSFangrui Song // UNCONSTRAINED:  [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
52*207e5cccSFangrui Song // CONSTRAINED:    [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
53*207e5cccSFangrui Song // CHECK-ASM:      fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
54*207e5cccSFangrui Song // COMMONIR:       ret <4 x half> [[ADD]]
55*207e5cccSFangrui Song float16x4_t test_vfma_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
56*207e5cccSFangrui Song   return vfma_f16(a, b, c);
57*207e5cccSFangrui Song }
58*207e5cccSFangrui Song 
59*207e5cccSFangrui Song // COMMON-LABEL: test_vfmaq_f16
60*207e5cccSFangrui Song // UNCONSTRAINED:  [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
61*207e5cccSFangrui Song // CONSTRAINED:    [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
62*207e5cccSFangrui Song // CHECK-ASM:      fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
63*207e5cccSFangrui Song // COMMONIR:       ret <8 x half> [[ADD]]
64*207e5cccSFangrui Song float16x8_t test_vfmaq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
65*207e5cccSFangrui Song   return vfmaq_f16(a, b, c);
66*207e5cccSFangrui Song }
67*207e5cccSFangrui Song 
68*207e5cccSFangrui Song // COMMON-LABEL: test_vfms_f16
69*207e5cccSFangrui Song // COMMONIR:       [[SUB:%.*]] = fneg <4 x half> %b
70*207e5cccSFangrui Song // UNCONSTRAINED:  [[ADD:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a)
71*207e5cccSFangrui Song // CONSTRAINED:    [[ADD:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> %c, <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
72*207e5cccSFangrui Song // CHECK-ASM:      fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
73*207e5cccSFangrui Song // COMMONIR:       ret <4 x half> [[ADD]]
74*207e5cccSFangrui Song float16x4_t test_vfms_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
75*207e5cccSFangrui Song   return vfms_f16(a, b, c);
76*207e5cccSFangrui Song }
77*207e5cccSFangrui Song 
78*207e5cccSFangrui Song // COMMON-LABEL: test_vfmsq_f16
79*207e5cccSFangrui Song // COMMONIR:       [[SUB:%.*]] = fneg <8 x half> %b
80*207e5cccSFangrui Song // UNCONSTRAINED:  [[ADD:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a)
81*207e5cccSFangrui Song // CONSTRAINED:    [[ADD:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> %c, <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
82*207e5cccSFangrui Song // CHECK-ASM:      fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
83*207e5cccSFangrui Song // COMMONIR:       ret <8 x half> [[ADD]]
84*207e5cccSFangrui Song float16x8_t test_vfmsq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
85*207e5cccSFangrui Song   return vfmsq_f16(a, b, c);
86*207e5cccSFangrui Song }
87*207e5cccSFangrui Song 
88*207e5cccSFangrui Song // COMMON-LABEL: test_vfma_lane_f16
89*207e5cccSFangrui Song // COMMONIR:      [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
90*207e5cccSFangrui Song // COMMONIR:      [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
91*207e5cccSFangrui Song // COMMONIR:      [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
92*207e5cccSFangrui Song // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
93*207e5cccSFangrui Song // COMMONIR:      [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
94*207e5cccSFangrui Song // COMMONIR:      [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
95*207e5cccSFangrui Song // COMMONIR:      [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
96*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]])
97*207e5cccSFangrui Song // CONSTRAINED:   [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
98*207e5cccSFangrui Song // CHECK-ASM:     fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
99*207e5cccSFangrui Song // COMMONIR:      ret <4 x half> [[FMLA]]
100*207e5cccSFangrui Song float16x4_t test_vfma_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
101*207e5cccSFangrui Song   return vfma_lane_f16(a, b, c, 3);
102*207e5cccSFangrui Song }
103*207e5cccSFangrui Song 
104*207e5cccSFangrui Song // COMMON-LABEL: test_vfmaq_lane_f16
105*207e5cccSFangrui Song // COMMONIR:      [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
106*207e5cccSFangrui Song // COMMONIR:      [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
107*207e5cccSFangrui Song // COMMONIR:      [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
108*207e5cccSFangrui Song // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
109*207e5cccSFangrui Song // COMMONIR:      [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
110*207e5cccSFangrui Song // COMMONIR:      [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
111*207e5cccSFangrui Song // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
112*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
113*207e5cccSFangrui Song // CONSTRAINED:   [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
114*207e5cccSFangrui Song // CHECK-ASM:     fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
115*207e5cccSFangrui Song // COMMONIR:      ret <8 x half> [[FMLA]]
116*207e5cccSFangrui Song float16x8_t test_vfmaq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
117*207e5cccSFangrui Song   return vfmaq_lane_f16(a, b, c, 3);
118*207e5cccSFangrui Song }
119*207e5cccSFangrui Song 
120*207e5cccSFangrui Song // COMMON-LABEL: test_vfma_laneq_f16
121*207e5cccSFangrui Song // COMMONIR:      [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
122*207e5cccSFangrui Song // COMMONIR:      [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
123*207e5cccSFangrui Song // COMMONIR:      [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
124*207e5cccSFangrui Song // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
125*207e5cccSFangrui Song // COMMONIR:      [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
126*207e5cccSFangrui Song // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
127*207e5cccSFangrui Song // COMMONIR:      [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
128*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
129*207e5cccSFangrui Song // CONSTRAINED:   [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
130*207e5cccSFangrui Song // CHECK-ASM:     fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
131*207e5cccSFangrui Song // COMMONIR:      ret <4 x half> [[FMLA]]
132*207e5cccSFangrui Song float16x4_t test_vfma_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
133*207e5cccSFangrui Song   return vfma_laneq_f16(a, b, c, 7);
134*207e5cccSFangrui Song }
135*207e5cccSFangrui Song 
136*207e5cccSFangrui Song // COMMON-LABEL: test_vfmaq_laneq_f16
137*207e5cccSFangrui Song // COMMONIR:      [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
138*207e5cccSFangrui Song // COMMONIR:      [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
139*207e5cccSFangrui Song // COMMONIR:      [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
140*207e5cccSFangrui Song // COMMONIR:      [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
141*207e5cccSFangrui Song // COMMONIR:      [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
142*207e5cccSFangrui Song // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
143*207e5cccSFangrui Song // COMMONIR:      [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
144*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
145*207e5cccSFangrui Song // CONSTRAINED:   [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
146*207e5cccSFangrui Song // CHECK-ASM:     fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
147*207e5cccSFangrui Song // COMMONIR:      ret <8 x half> [[FMLA]]
148*207e5cccSFangrui Song float16x8_t test_vfmaq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
149*207e5cccSFangrui Song   return vfmaq_laneq_f16(a, b, c, 7);
150*207e5cccSFangrui Song }
151*207e5cccSFangrui Song 
152*207e5cccSFangrui Song // COMMON-LABEL: test_vfma_n_f16
153*207e5cccSFangrui Song // COMMONIR:      [[TMP0:%.*]] = insertelement <4 x half> poison, half %c, i32 0
154*207e5cccSFangrui Song // COMMONIR:      [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1
155*207e5cccSFangrui Song // COMMONIR:      [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2
156*207e5cccSFangrui Song // COMMONIR:      [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3
157*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]]  = call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a)
158*207e5cccSFangrui Song // CONSTRAINED:   [[FMA:%.*]]  = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %b, <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
159*207e5cccSFangrui Song // CHECK-ASM:     fmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
160*207e5cccSFangrui Song // COMMONIR:      ret <4 x half> [[FMA]]
161*207e5cccSFangrui Song float16x4_t test_vfma_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
162*207e5cccSFangrui Song   return vfma_n_f16(a, b, c);
163*207e5cccSFangrui Song }
164*207e5cccSFangrui Song 
165*207e5cccSFangrui Song // COMMON-LABEL: test_vfmaq_n_f16
166*207e5cccSFangrui Song // COMMONIR:      [[TMP0:%.*]] = insertelement <8 x half> poison, half %c, i32 0
167*207e5cccSFangrui Song // COMMONIR:      [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1
168*207e5cccSFangrui Song // COMMONIR:      [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2
169*207e5cccSFangrui Song // COMMONIR:      [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3
170*207e5cccSFangrui Song // COMMONIR:      [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4
171*207e5cccSFangrui Song // COMMONIR:      [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5
172*207e5cccSFangrui Song // COMMONIR:      [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6
173*207e5cccSFangrui Song // COMMONIR:      [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7
174*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]]  = call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a)
175*207e5cccSFangrui Song // CONSTRAINED:   [[FMA:%.*]]  = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %b, <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
176*207e5cccSFangrui Song // CHECK-ASM:     fmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
177*207e5cccSFangrui Song // COMMONIR:      ret <8 x half> [[FMA]]
178*207e5cccSFangrui Song float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
179*207e5cccSFangrui Song   return vfmaq_n_f16(a, b, c);
180*207e5cccSFangrui Song }
181*207e5cccSFangrui Song 
182*207e5cccSFangrui Song // COMMON-LABEL: test_vfmah_lane_f16
183*207e5cccSFangrui Song // COMMONIR:      [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
184*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]]  = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
185*207e5cccSFangrui Song // CONSTRAINED:   [[FMA:%.*]]  = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
186*207e5cccSFangrui Song // CHECK-ASM:     fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
187*207e5cccSFangrui Song // COMMONIR:      ret half [[FMA]]
188*207e5cccSFangrui Song float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) {
189*207e5cccSFangrui Song   return vfmah_lane_f16(a, b, c, 3);
190*207e5cccSFangrui Song }
191*207e5cccSFangrui Song 
192*207e5cccSFangrui Song // COMMON-LABEL: test_vfmah_laneq_f16
193*207e5cccSFangrui Song // COMMONIR:      [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
194*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]]  = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
195*207e5cccSFangrui Song // CONSTRAINED:   [[FMA:%.*]]  = call half @llvm.experimental.constrained.fma.f16(half %b, half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
196*207e5cccSFangrui Song // CHECK-ASM:     fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
197*207e5cccSFangrui Song // COMMONIR:      ret half [[FMA]]
198*207e5cccSFangrui Song float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
199*207e5cccSFangrui Song   return vfmah_laneq_f16(a, b, c, 7);
200*207e5cccSFangrui Song }
201*207e5cccSFangrui Song 
202*207e5cccSFangrui Song // COMMON-LABEL: test_vfms_lane_f16
203*207e5cccSFangrui Song // COMMONIR:      [[SUB:%.*]]  = fneg <4 x half> %b
204*207e5cccSFangrui Song // COMMONIR:      [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
205*207e5cccSFangrui Song // COMMONIR:      [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8>
206*207e5cccSFangrui Song // COMMONIR:      [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
207*207e5cccSFangrui Song // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
208*207e5cccSFangrui Song // COMMONIR:      [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
209*207e5cccSFangrui Song // COMMONIR:      [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
210*207e5cccSFangrui Song // COMMONIR:      [[TMP5:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
211*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]])
212*207e5cccSFangrui Song // CONSTRAINED:   [[FMA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[TMP4]], <4 x half> [[LANE]], <4 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
213*207e5cccSFangrui Song // CHECK-ASM:     fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
214*207e5cccSFangrui Song // COMMONIR:      ret <4 x half> [[FMA]]
215*207e5cccSFangrui Song float16x4_t test_vfms_lane_f16(float16x4_t a, float16x4_t b, float16x4_t c) {
216*207e5cccSFangrui Song   return vfms_lane_f16(a, b, c, 3);
217*207e5cccSFangrui Song }
218*207e5cccSFangrui Song 
219*207e5cccSFangrui Song // COMMON-LABEL: test_vfmsq_lane_f16
220*207e5cccSFangrui Song // COMMONIR:      [[SUB:%.*]]  = fneg <8 x half> %b
221*207e5cccSFangrui Song // COMMONIR:      [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
222*207e5cccSFangrui Song // COMMONIR:      [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
223*207e5cccSFangrui Song // COMMONIR:      [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
224*207e5cccSFangrui Song // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
225*207e5cccSFangrui Song // COMMONIR:      [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
226*207e5cccSFangrui Song // COMMONIR:      [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
227*207e5cccSFangrui Song // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
228*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]])
229*207e5cccSFangrui Song // CONSTRAINED:   [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[TMP4]], <8 x half> [[LANE]], <8 x half> [[TMP5]], metadata !"round.tonearest", metadata !"fpexcept.strict")
230*207e5cccSFangrui Song // CHECK-ASM:     fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
231*207e5cccSFangrui Song // COMMONIR:      ret <8 x half> [[FMLA]]
232*207e5cccSFangrui Song float16x8_t test_vfmsq_lane_f16(float16x8_t a, float16x8_t b, float16x4_t c) {
233*207e5cccSFangrui Song   return vfmsq_lane_f16(a, b, c, 3);
234*207e5cccSFangrui Song }
235*207e5cccSFangrui Song 
236*207e5cccSFangrui Song // COMMON-LABEL: test_vfms_laneq_f16
237*207e5cccSFangrui Song // COMMONIR:      [[SUB:%.*]]  = fneg <4 x half> %b
238*207e5cccSFangrui Song // CHECK-ASM-NOT: fneg
239*207e5cccSFangrui Song // COMMONIR:      [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
240*207e5cccSFangrui Song // COMMONIR:      [[TMP1:%.*]] = bitcast <4 x half> [[SUB]] to <8 x i8>
241*207e5cccSFangrui Song // COMMONIR:      [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
242*207e5cccSFangrui Song // COMMONIR:      [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
243*207e5cccSFangrui Song // COMMONIR:      [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
244*207e5cccSFangrui Song // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
245*207e5cccSFangrui Song // COMMONIR:      [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
246*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]])
247*207e5cccSFangrui Song // CONSTRAINED:   [[FMLA:%.*]] = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[LANE]], <4 x half> [[TMP4]], <4 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
248*207e5cccSFangrui Song // CHECK-ASM:     fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
249*207e5cccSFangrui Song // COMMONIR:      ret <4 x half> [[FMLA]]
250*207e5cccSFangrui Song float16x4_t test_vfms_laneq_f16(float16x4_t a, float16x4_t b, float16x8_t c) {
251*207e5cccSFangrui Song   return vfms_laneq_f16(a, b, c, 7);
252*207e5cccSFangrui Song }
253*207e5cccSFangrui Song 
254*207e5cccSFangrui Song // COMMON-LABEL: test_vfmsq_laneq_f16
255*207e5cccSFangrui Song // COMMONIR:      [[SUB:%.*]]  = fneg <8 x half> %b
256*207e5cccSFangrui Song // CHECK-ASM-NOT: fneg
257*207e5cccSFangrui Song // COMMONIR:      [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
258*207e5cccSFangrui Song // COMMONIR:      [[TMP1:%.*]] = bitcast <8 x half> [[SUB]] to <16 x i8>
259*207e5cccSFangrui Song // COMMONIR:      [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
260*207e5cccSFangrui Song // COMMONIR:      [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
261*207e5cccSFangrui Song // COMMONIR:      [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
262*207e5cccSFangrui Song // COMMONIR:      [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
263*207e5cccSFangrui Song // COMMONIR:      [[LANE:%.*]] = shufflevector <8 x half> [[TMP5]], <8 x half> [[TMP5]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
264*207e5cccSFangrui Song // UNCONSTRAINED: [[FMLA:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]])
265*207e5cccSFangrui Song // CONSTRAINED:   [[FMLA:%.*]] = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[LANE]], <8 x half> [[TMP4]], <8 x half> [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
266*207e5cccSFangrui Song // CHECK-ASM:     fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
267*207e5cccSFangrui Song // COMMONIR:      ret <8 x half> [[FMLA]]
268*207e5cccSFangrui Song float16x8_t test_vfmsq_laneq_f16(float16x8_t a, float16x8_t b, float16x8_t c) {
269*207e5cccSFangrui Song   return vfmsq_laneq_f16(a, b, c, 7);
270*207e5cccSFangrui Song }
271*207e5cccSFangrui Song 
272*207e5cccSFangrui Song // COMMON-LABEL: test_vfms_n_f16
273*207e5cccSFangrui Song // COMMONIR:      [[SUB:%.*]]  = fneg <4 x half> %b
274*207e5cccSFangrui Song // COMMONIR:      [[TMP0:%.*]] = insertelement <4 x half> poison, half %c, i32 0
275*207e5cccSFangrui Song // COMMONIR:      [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half %c, i32 1
276*207e5cccSFangrui Song // COMMONIR:      [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half %c, i32 2
277*207e5cccSFangrui Song // COMMONIR:      [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half %c, i32 3
278*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]]  = call <4 x half> @llvm.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a)
279*207e5cccSFangrui Song // CONSTRAINED:   [[FMA:%.*]]  = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> [[SUB]], <4 x half> [[TMP3]], <4 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
280*207e5cccSFangrui Song // CHECK-ASM:     fmls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[{{[0-9]+}}]
281*207e5cccSFangrui Song // COMMONIR:      ret <4 x half> [[FMA]]
282*207e5cccSFangrui Song float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) {
283*207e5cccSFangrui Song   return vfms_n_f16(a, b, c);
284*207e5cccSFangrui Song }
285*207e5cccSFangrui Song 
286*207e5cccSFangrui Song // COMMON-LABEL: test_vfmsq_n_f16
287*207e5cccSFangrui Song // COMMONIR:      [[SUB:%.*]]  = fneg <8 x half> %b
288*207e5cccSFangrui Song // COMMONIR:      [[TMP0:%.*]] = insertelement <8 x half> poison, half %c, i32 0
289*207e5cccSFangrui Song // COMMONIR:      [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half %c, i32 1
290*207e5cccSFangrui Song // COMMONIR:      [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half %c, i32 2
291*207e5cccSFangrui Song // COMMONIR:      [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half %c, i32 3
292*207e5cccSFangrui Song // COMMONIR:      [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half %c, i32 4
293*207e5cccSFangrui Song // COMMONIR:      [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half %c, i32 5
294*207e5cccSFangrui Song // COMMONIR:      [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half %c, i32 6
295*207e5cccSFangrui Song // COMMONIR:      [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half %c, i32 7
296*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]]  = call <8 x half> @llvm.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a)
297*207e5cccSFangrui Song // CONSTRAINED:   [[FMA:%.*]]  = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> [[SUB]], <8 x half> [[TMP7]], <8 x half> %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
298*207e5cccSFangrui Song // CHECK-ASM:     fmls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[{{[0-9]+}}]
299*207e5cccSFangrui Song // COMMONIR:      ret <8 x half> [[FMA]]
300*207e5cccSFangrui Song float16x8_t test_vfmsq_n_f16(float16x8_t a, float16x8_t b, float16_t c) {
301*207e5cccSFangrui Song   return vfmsq_n_f16(a, b, c);
302*207e5cccSFangrui Song }
303*207e5cccSFangrui Song 
304*207e5cccSFangrui Song // COMMON-LABEL: test_vfmsh_lane_f16
305*207e5cccSFangrui Song // UNCONSTRAINED: [[TMP0:%.*]] = fpext half %b to float
306*207e5cccSFangrui Song // CONSTRAINED:   [[TMP0:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict")
307*207e5cccSFangrui Song // CHECK-ASM:     fcvt s{{[0-9]+}}, h{{[0-9]+}}
308*207e5cccSFangrui Song // COMMONIR:      [[TMP1:%.*]] = fneg float [[TMP0]]
309*207e5cccSFangrui Song // CHECK-ASM:     fneg s{{[0-9]+}}, s{{[0-9]+}}
310*207e5cccSFangrui Song // UNCONSTRAINED: [[SUB:%.*]]  = fptrunc float [[TMP1]] to half
311*207e5cccSFangrui Song // CONSTRAINED:   [[SUB:%.*]]  = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
312*207e5cccSFangrui Song // CHECK-ASM:     fcvt h{{[0-9]+}}, s{{[0-9]+}}
313*207e5cccSFangrui Song // COMMONIR:      [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
314*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]]  = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
315*207e5cccSFangrui Song // CONSTRAINED:   [[FMA:%.*]]  = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
316*207e5cccSFangrui Song // CHECK-ASM:     fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
317*207e5cccSFangrui Song // COMMONIR:      ret half [[FMA]]
318*207e5cccSFangrui Song float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) {
319*207e5cccSFangrui Song   return vfmsh_lane_f16(a, b, c, 3);
320*207e5cccSFangrui Song }
321*207e5cccSFangrui Song 
322*207e5cccSFangrui Song // COMMON-LABEL: test_vfmsh_laneq_f16
323*207e5cccSFangrui Song // UNCONSTRAINED: [[TMP0:%.*]] = fpext half %b to float
324*207e5cccSFangrui Song // CONSTRAINED:   [[TMP0:%.*]] = call float @llvm.experimental.constrained.fpext.f32.f16(half %b, metadata !"fpexcept.strict")
325*207e5cccSFangrui Song // CHECK-ASM:     fcvt s{{[0-9]+}}, h{{[0-9]+}}
326*207e5cccSFangrui Song // COMMONIR:      [[TMP1:%.*]] = fneg float [[TMP0]]
327*207e5cccSFangrui Song // CHECK-ASM:     fneg s{{[0-9]+}}, s{{[0-9]+}}
328*207e5cccSFangrui Song // UNCONSTRAINED: [[SUB:%.*]]  = fptrunc float [[TMP1]] to half
329*207e5cccSFangrui Song // CONSTRAINED:   [[SUB:%.*]]  = call half @llvm.experimental.constrained.fptrunc.f16.f32(float [[TMP1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
330*207e5cccSFangrui Song // CHECK-ASM:     fcvt h{{[0-9]+}}, s{{[0-9]+}}
331*207e5cccSFangrui Song // COMMONIR:      [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
332*207e5cccSFangrui Song // UNCONSTRAINED: [[FMA:%.*]]  = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
333*207e5cccSFangrui Song // CONSTRAINED:   [[FMA:%.*]]  = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half [[EXTR]], half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
334*207e5cccSFangrui Song // CHECK-ASM:     fmla h{{[0-9]+}}, h{{[0-9]+}}, v{{[0-9]+}}.h[{{[0-9]+}}]
335*207e5cccSFangrui Song // COMMONIR:      ret half [[FMA]]
336*207e5cccSFangrui Song float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
337*207e5cccSFangrui Song   return vfmsh_laneq_f16(a, b, c, 7);
338*207e5cccSFangrui Song }
339