xref: /llvm-project/llvm/test/CodeGen/NVPTX/fexp2.ll (revision 3ba339b5e70231985b2e3f966dd80aa65cfeee1b)
1*3ba339b5SPrinceton Ferro; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2*3ba339b5SPrinceton Ferro; RUN: llc < %s -mcpu=sm_50 -mattr=+ptx32 | FileCheck --check-prefixes=CHECK %s
3*3ba339b5SPrinceton Ferro; RUN: llc < %s -mcpu=sm_75 -mattr=+ptx70 | FileCheck --check-prefixes=CHECK-FP16 %s
4*3ba339b5SPrinceton Ferro; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx78 | FileCheck --check-prefixes=CHECK-BF16 %s
5*3ba339b5SPrinceton Ferro; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_50 -mattr=+ptx32 | %ptxas-verify -arch=sm_50 %}
6*3ba339b5SPrinceton Ferro; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_75 -mattr=+ptx70 | %ptxas-verify -arch=sm_75 %}
7*3ba339b5SPrinceton Ferro; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
8*3ba339b5SPrinceton Ferrotarget triple = "nvptx64-nvidia-cuda"
9*3ba339b5SPrinceton Ferro
10*3ba339b5SPrinceton Ferro; --- f32 ---
11*3ba339b5SPrinceton Ferro
12*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_test
13*3ba339b5SPrinceton Ferrodefine float @exp2_test(float %in) {
14*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_test(
15*3ba339b5SPrinceton Ferro; CHECK:       {
16*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .f32 %f<3>;
17*3ba339b5SPrinceton Ferro; CHECK-EMPTY:
18*3ba339b5SPrinceton Ferro; CHECK-NEXT:  // %bb.0: // %entry
19*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ld.param.f32 %f1, [exp2_test_param_0];
20*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ex2.approx.f32 %f2, %f1;
21*3ba339b5SPrinceton Ferro; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
22*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ret;
23*3ba339b5SPrinceton Ferro;
24*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_test(
25*3ba339b5SPrinceton Ferro; CHECK-FP16:       {
26*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    .reg .f32 %f<3>;
27*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY:
28*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:  // %bb.0: // %entry
29*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ld.param.f32 %f1, [exp2_test_param_0];
30*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ex2.approx.f32 %f2, %f1;
31*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    st.param.f32 [func_retval0], %f2;
32*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ret;
33*3ba339b5SPrinceton Ferro;
34*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_test(
35*3ba339b5SPrinceton Ferro; CHECK-BF16:       {
36*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    .reg .f32 %f<3>;
37*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY:
38*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:  // %bb.0: // %entry
39*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ld.param.f32 %f1, [exp2_test_param_0];
40*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ex2.approx.f32 %f2, %f1;
41*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    st.param.f32 [func_retval0], %f2;
42*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ret;
43*3ba339b5SPrinceton Ferroentry:
44*3ba339b5SPrinceton Ferro  %exp2 = call float @llvm.exp2.f32(float %in)
45*3ba339b5SPrinceton Ferro  ret float %exp2
46*3ba339b5SPrinceton Ferro}
47*3ba339b5SPrinceton Ferro
48*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_ftz_test
49*3ba339b5SPrinceton Ferrodefine float @exp2_ftz_test(float %in) #0 {
50*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_ftz_test(
51*3ba339b5SPrinceton Ferro; CHECK:       {
52*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .f32 %f<3>;
53*3ba339b5SPrinceton Ferro; CHECK-EMPTY:
54*3ba339b5SPrinceton Ferro; CHECK-NEXT:  // %bb.0: // %entry
55*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ld.param.f32 %f1, [exp2_ftz_test_param_0];
56*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ex2.approx.ftz.f32 %f2, %f1;
57*3ba339b5SPrinceton Ferro; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
58*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ret;
59*3ba339b5SPrinceton Ferro;
60*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_ftz_test(
61*3ba339b5SPrinceton Ferro; CHECK-FP16:       {
62*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    .reg .f32 %f<3>;
63*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY:
64*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:  // %bb.0: // %entry
65*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ld.param.f32 %f1, [exp2_ftz_test_param_0];
66*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ex2.approx.ftz.f32 %f2, %f1;
67*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    st.param.f32 [func_retval0], %f2;
68*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ret;
69*3ba339b5SPrinceton Ferro;
70*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_ftz_test(
71*3ba339b5SPrinceton Ferro; CHECK-BF16:       {
72*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    .reg .f32 %f<3>;
73*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY:
74*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:  // %bb.0: // %entry
75*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ld.param.f32 %f1, [exp2_ftz_test_param_0];
76*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ex2.approx.ftz.f32 %f2, %f1;
77*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    st.param.f32 [func_retval0], %f2;
78*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ret;
79*3ba339b5SPrinceton Ferroentry:
80*3ba339b5SPrinceton Ferro  %exp2 = call float @llvm.exp2.f32(float %in)
81*3ba339b5SPrinceton Ferro  ret float %exp2
82*3ba339b5SPrinceton Ferro}
83*3ba339b5SPrinceton Ferro
84*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_test_v
85*3ba339b5SPrinceton Ferrodefine <2 x float> @exp2_test_v(<2 x float> %in) {
86*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_test_v(
87*3ba339b5SPrinceton Ferro; CHECK:       {
88*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .f32 %f<5>;
89*3ba339b5SPrinceton Ferro; CHECK-EMPTY:
90*3ba339b5SPrinceton Ferro; CHECK-NEXT:  // %bb.0: // %entry
91*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ld.param.v2.f32 {%f1, %f2}, [exp2_test_v_param_0];
92*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ex2.approx.f32 %f3, %f2;
93*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ex2.approx.f32 %f4, %f1;
94*3ba339b5SPrinceton Ferro; CHECK-NEXT:    st.param.v2.f32 [func_retval0], {%f4, %f3};
95*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ret;
96*3ba339b5SPrinceton Ferro;
97*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_test_v(
98*3ba339b5SPrinceton Ferro; CHECK-FP16:       {
99*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    .reg .f32 %f<5>;
100*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY:
101*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:  // %bb.0: // %entry
102*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ld.param.v2.f32 {%f1, %f2}, [exp2_test_v_param_0];
103*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ex2.approx.f32 %f3, %f2;
104*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ex2.approx.f32 %f4, %f1;
105*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    st.param.v2.f32 [func_retval0], {%f4, %f3};
106*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ret;
107*3ba339b5SPrinceton Ferro;
108*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_test_v(
109*3ba339b5SPrinceton Ferro; CHECK-BF16:       {
110*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    .reg .f32 %f<5>;
111*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY:
112*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:  // %bb.0: // %entry
113*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ld.param.v2.f32 {%f1, %f2}, [exp2_test_v_param_0];
114*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ex2.approx.f32 %f3, %f2;
115*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ex2.approx.f32 %f4, %f1;
116*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    st.param.v2.f32 [func_retval0], {%f4, %f3};
117*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ret;
118*3ba339b5SPrinceton Ferroentry:
119*3ba339b5SPrinceton Ferro  %exp2 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %in)
120*3ba339b5SPrinceton Ferro  ret <2 x float> %exp2
121*3ba339b5SPrinceton Ferro}
122*3ba339b5SPrinceton Ferro
123*3ba339b5SPrinceton Ferro; --- f16 ---
124*3ba339b5SPrinceton Ferro
125*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_f16_test
126*3ba339b5SPrinceton Ferrodefine half @exp2_f16_test(half %in) {
127*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_f16_test(
128*3ba339b5SPrinceton Ferro; CHECK:       {
129*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .b16 %rs<3>;
130*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .f32 %f<3>;
131*3ba339b5SPrinceton Ferro; CHECK-EMPTY:
132*3ba339b5SPrinceton Ferro; CHECK-NEXT:  // %bb.0: // %entry
133*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ld.param.b16 %rs1, [exp2_f16_test_param_0];
134*3ba339b5SPrinceton Ferro; CHECK-NEXT:    cvt.f32.f16 %f1, %rs1;
135*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ex2.approx.f32 %f2, %f1;
136*3ba339b5SPrinceton Ferro; CHECK-NEXT:    cvt.rn.f16.f32 %rs2, %f2;
137*3ba339b5SPrinceton Ferro; CHECK-NEXT:    st.param.b16 [func_retval0], %rs2;
138*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ret;
139*3ba339b5SPrinceton Ferro;
140*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_f16_test(
141*3ba339b5SPrinceton Ferro; CHECK-FP16:       {
142*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    .reg .b16 %rs<3>;
143*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY:
144*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:  // %bb.0: // %entry
145*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ld.param.b16 %rs1, [exp2_f16_test_param_0];
146*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ex2.approx.f16 %rs2, %rs1;
147*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    st.param.b16 [func_retval0], %rs2;
148*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ret;
149*3ba339b5SPrinceton Ferro;
150*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_f16_test(
151*3ba339b5SPrinceton Ferro; CHECK-BF16:       {
152*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    .reg .b16 %rs<3>;
153*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY:
154*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:  // %bb.0: // %entry
155*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ld.param.b16 %rs1, [exp2_f16_test_param_0];
156*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ex2.approx.f16 %rs2, %rs1;
157*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    st.param.b16 [func_retval0], %rs2;
158*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ret;
159*3ba339b5SPrinceton Ferroentry:
160*3ba339b5SPrinceton Ferro  %exp2 = call half @llvm.exp2.f16(half %in)
161*3ba339b5SPrinceton Ferro  ret half %exp2
162*3ba339b5SPrinceton Ferro}
163*3ba339b5SPrinceton Ferro
164*3ba339b5SPrinceton Ferro; COM: we should never have .ftz for f16
165*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_f16_ftz_test
166*3ba339b5SPrinceton Ferrodefine half @exp2_f16_ftz_test(half %in) #0 {
167*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_f16_ftz_test(
168*3ba339b5SPrinceton Ferro; CHECK:       {
169*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .b16 %rs<3>;
170*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .f32 %f<3>;
171*3ba339b5SPrinceton Ferro; CHECK-EMPTY:
172*3ba339b5SPrinceton Ferro; CHECK-NEXT:  // %bb.0: // %entry
173*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ld.param.b16 %rs1, [exp2_f16_ftz_test_param_0];
174*3ba339b5SPrinceton Ferro; CHECK-NEXT:    cvt.ftz.f32.f16 %f1, %rs1;
175*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ex2.approx.ftz.f32 %f2, %f1;
176*3ba339b5SPrinceton Ferro; CHECK-NEXT:    cvt.rn.f16.f32 %rs2, %f2;
177*3ba339b5SPrinceton Ferro; CHECK-NEXT:    st.param.b16 [func_retval0], %rs2;
178*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ret;
179*3ba339b5SPrinceton Ferro;
180*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_f16_ftz_test(
181*3ba339b5SPrinceton Ferro; CHECK-FP16:       {
182*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    .reg .b16 %rs<3>;
183*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY:
184*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:  // %bb.0: // %entry
185*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ld.param.b16 %rs1, [exp2_f16_ftz_test_param_0];
186*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ex2.approx.f16 %rs2, %rs1;
187*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    st.param.b16 [func_retval0], %rs2;
188*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ret;
189*3ba339b5SPrinceton Ferro;
190*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_f16_ftz_test(
191*3ba339b5SPrinceton Ferro; CHECK-BF16:       {
192*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    .reg .b16 %rs<3>;
193*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY:
194*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:  // %bb.0: // %entry
195*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ld.param.b16 %rs1, [exp2_f16_ftz_test_param_0];
196*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ex2.approx.f16 %rs2, %rs1;
197*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    st.param.b16 [func_retval0], %rs2;
198*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ret;
199*3ba339b5SPrinceton Ferroentry:
200*3ba339b5SPrinceton Ferro  %exp2 = call half @llvm.exp2.f16(half %in)
201*3ba339b5SPrinceton Ferro  ret half %exp2
202*3ba339b5SPrinceton Ferro}
203*3ba339b5SPrinceton Ferro
204*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_f16_test_v
205*3ba339b5SPrinceton Ferrodefine <2 x half> @exp2_f16_test_v(<2 x half> %in) {
206*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_f16_test_v(
207*3ba339b5SPrinceton Ferro; CHECK:       {
208*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .b16 %rs<5>;
209*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .b32 %r<3>;
210*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .f32 %f<5>;
211*3ba339b5SPrinceton Ferro; CHECK-EMPTY:
212*3ba339b5SPrinceton Ferro; CHECK-NEXT:  // %bb.0: // %entry
213*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ld.param.b32 %r1, [exp2_f16_test_v_param_0];
214*3ba339b5SPrinceton Ferro; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
215*3ba339b5SPrinceton Ferro; CHECK-NEXT:    cvt.f32.f16 %f1, %rs2;
216*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ex2.approx.f32 %f2, %f1;
217*3ba339b5SPrinceton Ferro; CHECK-NEXT:    cvt.rn.f16.f32 %rs3, %f2;
218*3ba339b5SPrinceton Ferro; CHECK-NEXT:    cvt.f32.f16 %f3, %rs1;
219*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ex2.approx.f32 %f4, %f3;
220*3ba339b5SPrinceton Ferro; CHECK-NEXT:    cvt.rn.f16.f32 %rs4, %f4;
221*3ba339b5SPrinceton Ferro; CHECK-NEXT:    mov.b32 %r2, {%rs4, %rs3};
222*3ba339b5SPrinceton Ferro; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
223*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ret;
224*3ba339b5SPrinceton Ferro;
225*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_f16_test_v(
226*3ba339b5SPrinceton Ferro; CHECK-FP16:       {
227*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    .reg .b32 %r<3>;
228*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY:
229*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:  // %bb.0: // %entry
230*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ld.param.b32 %r1, [exp2_f16_test_v_param_0];
231*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ex2.approx.f16x2 %r2, %r1;
232*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    st.param.b32 [func_retval0], %r2;
233*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ret;
234*3ba339b5SPrinceton Ferro;
235*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_f16_test_v(
236*3ba339b5SPrinceton Ferro; CHECK-BF16:       {
237*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    .reg .b32 %r<3>;
238*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY:
239*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:  // %bb.0: // %entry
240*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ld.param.b32 %r1, [exp2_f16_test_v_param_0];
241*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ex2.approx.f16x2 %r2, %r1;
242*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    st.param.b32 [func_retval0], %r2;
243*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ret;
244*3ba339b5SPrinceton Ferroentry:
245*3ba339b5SPrinceton Ferro  %exp2 = call <2 x half> @llvm.exp2.v2f16(<2 x half> %in)
246*3ba339b5SPrinceton Ferro  ret <2 x half> %exp2
247*3ba339b5SPrinceton Ferro}
248*3ba339b5SPrinceton Ferro
249*3ba339b5SPrinceton Ferro; --- bf16 ---
250*3ba339b5SPrinceton Ferro
251*3ba339b5SPrinceton Ferro; COM: we should always have .ftz for bf16
252*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_bf16_test
253*3ba339b5SPrinceton Ferrodefine bfloat @exp2_bf16_test(bfloat %in) {
254*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_bf16_test(
255*3ba339b5SPrinceton Ferro; CHECK:       {
256*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .pred %p<2>;
257*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .b16 %rs<2>;
258*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .b32 %r<9>;
259*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .f32 %f<3>;
260*3ba339b5SPrinceton Ferro; CHECK-EMPTY:
261*3ba339b5SPrinceton Ferro; CHECK-NEXT:  // %bb.0: // %entry
262*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ld.param.u16 %r1, [exp2_bf16_test_param_0];
263*3ba339b5SPrinceton Ferro; CHECK-NEXT:    shl.b32 %r2, %r1, 16;
264*3ba339b5SPrinceton Ferro; CHECK-NEXT:    mov.b32 %f1, %r2;
265*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ex2.approx.f32 %f2, %f1;
266*3ba339b5SPrinceton Ferro; CHECK-NEXT:    mov.b32 %r3, %f2;
267*3ba339b5SPrinceton Ferro; CHECK-NEXT:    bfe.u32 %r4, %r3, 16, 1;
268*3ba339b5SPrinceton Ferro; CHECK-NEXT:    add.s32 %r5, %r4, %r3;
269*3ba339b5SPrinceton Ferro; CHECK-NEXT:    add.s32 %r6, %r5, 32767;
270*3ba339b5SPrinceton Ferro; CHECK-NEXT:    setp.nan.f32 %p1, %f2, %f2;
271*3ba339b5SPrinceton Ferro; CHECK-NEXT:    or.b32 %r7, %r3, 4194304;
272*3ba339b5SPrinceton Ferro; CHECK-NEXT:    selp.b32 %r8, %r7, %r6, %p1;
273*3ba339b5SPrinceton Ferro; CHECK-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; }
274*3ba339b5SPrinceton Ferro; CHECK-NEXT:    st.param.b16 [func_retval0], %rs1;
275*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ret;
276*3ba339b5SPrinceton Ferro;
277*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_bf16_test(
278*3ba339b5SPrinceton Ferro; CHECK-FP16:       {
279*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    .reg .pred %p<2>;
280*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    .reg .b16 %rs<2>;
281*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    .reg .b32 %r<9>;
282*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    .reg .f32 %f<3>;
283*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY:
284*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:  // %bb.0: // %entry
285*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ld.param.u16 %r1, [exp2_bf16_test_param_0];
286*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    shl.b32 %r2, %r1, 16;
287*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    mov.b32 %f1, %r2;
288*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ex2.approx.f32 %f2, %f1;
289*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    mov.b32 %r3, %f2;
290*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    bfe.u32 %r4, %r3, 16, 1;
291*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    add.s32 %r5, %r4, %r3;
292*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    add.s32 %r6, %r5, 32767;
293*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    setp.nan.f32 %p1, %f2, %f2;
294*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    or.b32 %r7, %r3, 4194304;
295*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    selp.b32 %r8, %r7, %r6, %p1;
296*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; }
297*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    st.param.b16 [func_retval0], %rs1;
298*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ret;
299*3ba339b5SPrinceton Ferro;
300*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_bf16_test(
301*3ba339b5SPrinceton Ferro; CHECK-BF16:       {
302*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    .reg .b16 %rs<3>;
303*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY:
304*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:  // %bb.0: // %entry
305*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ld.param.b16 %rs1, [exp2_bf16_test_param_0];
306*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ex2.approx.ftz.bf16 %rs2, %rs1;
307*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    st.param.b16 [func_retval0], %rs2;
308*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ret;
309*3ba339b5SPrinceton Ferroentry:
310*3ba339b5SPrinceton Ferro  %exp2 = call bfloat @llvm.exp2.bf16(bfloat %in)
311*3ba339b5SPrinceton Ferro  ret bfloat %exp2
312*3ba339b5SPrinceton Ferro}
313*3ba339b5SPrinceton Ferro
314*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_bf16_test_v
315*3ba339b5SPrinceton Ferrodefine <2 x bfloat> @exp2_bf16_test_v(<2 x bfloat> %in) {
316*3ba339b5SPrinceton Ferro; CHECK-LABEL: exp2_bf16_test_v(
317*3ba339b5SPrinceton Ferro; CHECK:       {
318*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .pred %p<3>;
319*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .b16 %rs<3>;
320*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .b32 %r<19>;
321*3ba339b5SPrinceton Ferro; CHECK-NEXT:    .reg .f32 %f<5>;
322*3ba339b5SPrinceton Ferro; CHECK-EMPTY:
323*3ba339b5SPrinceton Ferro; CHECK-NEXT:  // %bb.0: // %entry
324*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ld.param.b32 %r1, [exp2_bf16_test_v_param_0];
325*3ba339b5SPrinceton Ferro; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
326*3ba339b5SPrinceton Ferro; CHECK-NEXT:    cvt.u32.u16 %r2, %rs2;
327*3ba339b5SPrinceton Ferro; CHECK-NEXT:    shl.b32 %r3, %r2, 16;
328*3ba339b5SPrinceton Ferro; CHECK-NEXT:    mov.b32 %f1, %r3;
329*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ex2.approx.f32 %f2, %f1;
330*3ba339b5SPrinceton Ferro; CHECK-NEXT:    mov.b32 %r4, %f2;
331*3ba339b5SPrinceton Ferro; CHECK-NEXT:    bfe.u32 %r5, %r4, 16, 1;
332*3ba339b5SPrinceton Ferro; CHECK-NEXT:    add.s32 %r6, %r5, %r4;
333*3ba339b5SPrinceton Ferro; CHECK-NEXT:    add.s32 %r7, %r6, 32767;
334*3ba339b5SPrinceton Ferro; CHECK-NEXT:    setp.nan.f32 %p1, %f2, %f2;
335*3ba339b5SPrinceton Ferro; CHECK-NEXT:    or.b32 %r8, %r4, 4194304;
336*3ba339b5SPrinceton Ferro; CHECK-NEXT:    selp.b32 %r9, %r8, %r7, %p1;
337*3ba339b5SPrinceton Ferro; CHECK-NEXT:    cvt.u32.u16 %r10, %rs1;
338*3ba339b5SPrinceton Ferro; CHECK-NEXT:    shl.b32 %r11, %r10, 16;
339*3ba339b5SPrinceton Ferro; CHECK-NEXT:    mov.b32 %f3, %r11;
340*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ex2.approx.f32 %f4, %f3;
341*3ba339b5SPrinceton Ferro; CHECK-NEXT:    mov.b32 %r12, %f4;
342*3ba339b5SPrinceton Ferro; CHECK-NEXT:    bfe.u32 %r13, %r12, 16, 1;
343*3ba339b5SPrinceton Ferro; CHECK-NEXT:    add.s32 %r14, %r13, %r12;
344*3ba339b5SPrinceton Ferro; CHECK-NEXT:    add.s32 %r15, %r14, 32767;
345*3ba339b5SPrinceton Ferro; CHECK-NEXT:    setp.nan.f32 %p2, %f4, %f4;
346*3ba339b5SPrinceton Ferro; CHECK-NEXT:    or.b32 %r16, %r12, 4194304;
347*3ba339b5SPrinceton Ferro; CHECK-NEXT:    selp.b32 %r17, %r16, %r15, %p2;
348*3ba339b5SPrinceton Ferro; CHECK-NEXT:    prmt.b32 %r18, %r17, %r9, 0x7632U;
349*3ba339b5SPrinceton Ferro; CHECK-NEXT:    st.param.b32 [func_retval0], %r18;
350*3ba339b5SPrinceton Ferro; CHECK-NEXT:    ret;
351*3ba339b5SPrinceton Ferro;
352*3ba339b5SPrinceton Ferro; CHECK-FP16-LABEL: exp2_bf16_test_v(
353*3ba339b5SPrinceton Ferro; CHECK-FP16:       {
354*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    .reg .pred %p<3>;
355*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    .reg .b16 %rs<3>;
356*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    .reg .b32 %r<19>;
357*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    .reg .f32 %f<5>;
358*3ba339b5SPrinceton Ferro; CHECK-FP16-EMPTY:
359*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:  // %bb.0: // %entry
360*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ld.param.b32 %r1, [exp2_bf16_test_v_param_0];
361*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
362*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    cvt.u32.u16 %r2, %rs2;
363*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    shl.b32 %r3, %r2, 16;
364*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    mov.b32 %f1, %r3;
365*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ex2.approx.f32 %f2, %f1;
366*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    mov.b32 %r4, %f2;
367*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    bfe.u32 %r5, %r4, 16, 1;
368*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    add.s32 %r6, %r5, %r4;
369*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    add.s32 %r7, %r6, 32767;
370*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    setp.nan.f32 %p1, %f2, %f2;
371*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    or.b32 %r8, %r4, 4194304;
372*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    selp.b32 %r9, %r8, %r7, %p1;
373*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    cvt.u32.u16 %r10, %rs1;
374*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    shl.b32 %r11, %r10, 16;
375*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    mov.b32 %f3, %r11;
376*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ex2.approx.f32 %f4, %f3;
377*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    mov.b32 %r12, %f4;
378*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    bfe.u32 %r13, %r12, 16, 1;
379*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    add.s32 %r14, %r13, %r12;
380*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    add.s32 %r15, %r14, 32767;
381*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    setp.nan.f32 %p2, %f4, %f4;
382*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    or.b32 %r16, %r12, 4194304;
383*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    selp.b32 %r17, %r16, %r15, %p2;
384*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    prmt.b32 %r18, %r17, %r9, 0x7632U;
385*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    st.param.b32 [func_retval0], %r18;
386*3ba339b5SPrinceton Ferro; CHECK-FP16-NEXT:    ret;
387*3ba339b5SPrinceton Ferro;
388*3ba339b5SPrinceton Ferro; CHECK-BF16-LABEL: exp2_bf16_test_v(
389*3ba339b5SPrinceton Ferro; CHECK-BF16:       {
390*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    .reg .b32 %r<3>;
391*3ba339b5SPrinceton Ferro; CHECK-BF16-EMPTY:
392*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:  // %bb.0: // %entry
393*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ld.param.b32 %r1, [exp2_bf16_test_v_param_0];
394*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ex2.approx.ftz.bf16x2 %r2, %r1;
395*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    st.param.b32 [func_retval0], %r2;
396*3ba339b5SPrinceton Ferro; CHECK-BF16-NEXT:    ret;
397*3ba339b5SPrinceton Ferroentry:
398*3ba339b5SPrinceton Ferro  %exp2 = call <2 x bfloat> @llvm.exp2.v2bf16(<2 x bfloat> %in)
399*3ba339b5SPrinceton Ferro  ret <2 x bfloat> %exp2
400*3ba339b5SPrinceton Ferro}
401*3ba339b5SPrinceton Ferro
402*3ba339b5SPrinceton Ferrodeclare float @llvm.exp2.f32(float %val)
403*3ba339b5SPrinceton Ferro
404*3ba339b5SPrinceton Ferrodeclare <2 x float> @llvm.exp2.v2f32(<2 x float> %val)
405*3ba339b5SPrinceton Ferro
406*3ba339b5SPrinceton Ferrodeclare half @llvm.exp2.f16(half %val)
407*3ba339b5SPrinceton Ferro
408*3ba339b5SPrinceton Ferrodeclare <2 x half> @llvm.exp2.v2f16(<2 x half> %val)
409*3ba339b5SPrinceton Ferro
410*3ba339b5SPrinceton Ferrodeclare bfloat @llvm.exp2.bf16(bfloat %val)
411*3ba339b5SPrinceton Ferro
412*3ba339b5SPrinceton Ferrodeclare <2 x bfloat> @llvm.exp2.v2bf16(<2 x bfloat> %val)
413*3ba339b5SPrinceton Ferro
414*3ba339b5SPrinceton Ferroattributes #0 = {"denormal-fp-math"="preserve-sign"}
415