xref: /llvm-project/llvm/test/CodeGen/NVPTX/fexp2.ll (revision 3ba339b5e70231985b2e3f966dd80aa65cfeee1b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mcpu=sm_50 -mattr=+ptx32 | FileCheck --check-prefixes=CHECK %s
3; RUN: llc < %s -mcpu=sm_75 -mattr=+ptx70 | FileCheck --check-prefixes=CHECK-FP16 %s
4; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx78 | FileCheck --check-prefixes=CHECK-BF16 %s
5; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_50 -mattr=+ptx32 | %ptxas-verify -arch=sm_50 %}
6; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_75 -mattr=+ptx70 | %ptxas-verify -arch=sm_75 %}
7; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %}
8target triple = "nvptx64-nvidia-cuda"
9
10; --- f32 ---
11
12; CHECK-LABEL: exp2_test
13define float @exp2_test(float %in) {
14; CHECK-LABEL: exp2_test(
15; CHECK:       {
16; CHECK-NEXT:    .reg .f32 %f<3>;
17; CHECK-EMPTY:
18; CHECK-NEXT:  // %bb.0: // %entry
19; CHECK-NEXT:    ld.param.f32 %f1, [exp2_test_param_0];
20; CHECK-NEXT:    ex2.approx.f32 %f2, %f1;
21; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
22; CHECK-NEXT:    ret;
23;
24; CHECK-FP16-LABEL: exp2_test(
25; CHECK-FP16:       {
26; CHECK-FP16-NEXT:    .reg .f32 %f<3>;
27; CHECK-FP16-EMPTY:
28; CHECK-FP16-NEXT:  // %bb.0: // %entry
29; CHECK-FP16-NEXT:    ld.param.f32 %f1, [exp2_test_param_0];
30; CHECK-FP16-NEXT:    ex2.approx.f32 %f2, %f1;
31; CHECK-FP16-NEXT:    st.param.f32 [func_retval0], %f2;
32; CHECK-FP16-NEXT:    ret;
33;
34; CHECK-BF16-LABEL: exp2_test(
35; CHECK-BF16:       {
36; CHECK-BF16-NEXT:    .reg .f32 %f<3>;
37; CHECK-BF16-EMPTY:
38; CHECK-BF16-NEXT:  // %bb.0: // %entry
39; CHECK-BF16-NEXT:    ld.param.f32 %f1, [exp2_test_param_0];
40; CHECK-BF16-NEXT:    ex2.approx.f32 %f2, %f1;
41; CHECK-BF16-NEXT:    st.param.f32 [func_retval0], %f2;
42; CHECK-BF16-NEXT:    ret;
43entry:
44  %exp2 = call float @llvm.exp2.f32(float %in)
45  ret float %exp2
46}
47
48; CHECK-LABEL: exp2_ftz_test
49define float @exp2_ftz_test(float %in) #0 {
50; CHECK-LABEL: exp2_ftz_test(
51; CHECK:       {
52; CHECK-NEXT:    .reg .f32 %f<3>;
53; CHECK-EMPTY:
54; CHECK-NEXT:  // %bb.0: // %entry
55; CHECK-NEXT:    ld.param.f32 %f1, [exp2_ftz_test_param_0];
56; CHECK-NEXT:    ex2.approx.ftz.f32 %f2, %f1;
57; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
58; CHECK-NEXT:    ret;
59;
60; CHECK-FP16-LABEL: exp2_ftz_test(
61; CHECK-FP16:       {
62; CHECK-FP16-NEXT:    .reg .f32 %f<3>;
63; CHECK-FP16-EMPTY:
64; CHECK-FP16-NEXT:  // %bb.0: // %entry
65; CHECK-FP16-NEXT:    ld.param.f32 %f1, [exp2_ftz_test_param_0];
66; CHECK-FP16-NEXT:    ex2.approx.ftz.f32 %f2, %f1;
67; CHECK-FP16-NEXT:    st.param.f32 [func_retval0], %f2;
68; CHECK-FP16-NEXT:    ret;
69;
70; CHECK-BF16-LABEL: exp2_ftz_test(
71; CHECK-BF16:       {
72; CHECK-BF16-NEXT:    .reg .f32 %f<3>;
73; CHECK-BF16-EMPTY:
74; CHECK-BF16-NEXT:  // %bb.0: // %entry
75; CHECK-BF16-NEXT:    ld.param.f32 %f1, [exp2_ftz_test_param_0];
76; CHECK-BF16-NEXT:    ex2.approx.ftz.f32 %f2, %f1;
77; CHECK-BF16-NEXT:    st.param.f32 [func_retval0], %f2;
78; CHECK-BF16-NEXT:    ret;
79entry:
80  %exp2 = call float @llvm.exp2.f32(float %in)
81  ret float %exp2
82}
83
84; CHECK-LABEL: exp2_test_v
85define <2 x float> @exp2_test_v(<2 x float> %in) {
86; CHECK-LABEL: exp2_test_v(
87; CHECK:       {
88; CHECK-NEXT:    .reg .f32 %f<5>;
89; CHECK-EMPTY:
90; CHECK-NEXT:  // %bb.0: // %entry
91; CHECK-NEXT:    ld.param.v2.f32 {%f1, %f2}, [exp2_test_v_param_0];
92; CHECK-NEXT:    ex2.approx.f32 %f3, %f2;
93; CHECK-NEXT:    ex2.approx.f32 %f4, %f1;
94; CHECK-NEXT:    st.param.v2.f32 [func_retval0], {%f4, %f3};
95; CHECK-NEXT:    ret;
96;
97; CHECK-FP16-LABEL: exp2_test_v(
98; CHECK-FP16:       {
99; CHECK-FP16-NEXT:    .reg .f32 %f<5>;
100; CHECK-FP16-EMPTY:
101; CHECK-FP16-NEXT:  // %bb.0: // %entry
102; CHECK-FP16-NEXT:    ld.param.v2.f32 {%f1, %f2}, [exp2_test_v_param_0];
103; CHECK-FP16-NEXT:    ex2.approx.f32 %f3, %f2;
104; CHECK-FP16-NEXT:    ex2.approx.f32 %f4, %f1;
105; CHECK-FP16-NEXT:    st.param.v2.f32 [func_retval0], {%f4, %f3};
106; CHECK-FP16-NEXT:    ret;
107;
108; CHECK-BF16-LABEL: exp2_test_v(
109; CHECK-BF16:       {
110; CHECK-BF16-NEXT:    .reg .f32 %f<5>;
111; CHECK-BF16-EMPTY:
112; CHECK-BF16-NEXT:  // %bb.0: // %entry
113; CHECK-BF16-NEXT:    ld.param.v2.f32 {%f1, %f2}, [exp2_test_v_param_0];
114; CHECK-BF16-NEXT:    ex2.approx.f32 %f3, %f2;
115; CHECK-BF16-NEXT:    ex2.approx.f32 %f4, %f1;
116; CHECK-BF16-NEXT:    st.param.v2.f32 [func_retval0], {%f4, %f3};
117; CHECK-BF16-NEXT:    ret;
118entry:
119  %exp2 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %in)
120  ret <2 x float> %exp2
121}
122
123; --- f16 ---
124
125; CHECK-LABEL: exp2_f16_test
126define half @exp2_f16_test(half %in) {
127; CHECK-LABEL: exp2_f16_test(
128; CHECK:       {
129; CHECK-NEXT:    .reg .b16 %rs<3>;
130; CHECK-NEXT:    .reg .f32 %f<3>;
131; CHECK-EMPTY:
132; CHECK-NEXT:  // %bb.0: // %entry
133; CHECK-NEXT:    ld.param.b16 %rs1, [exp2_f16_test_param_0];
134; CHECK-NEXT:    cvt.f32.f16 %f1, %rs1;
135; CHECK-NEXT:    ex2.approx.f32 %f2, %f1;
136; CHECK-NEXT:    cvt.rn.f16.f32 %rs2, %f2;
137; CHECK-NEXT:    st.param.b16 [func_retval0], %rs2;
138; CHECK-NEXT:    ret;
139;
140; CHECK-FP16-LABEL: exp2_f16_test(
141; CHECK-FP16:       {
142; CHECK-FP16-NEXT:    .reg .b16 %rs<3>;
143; CHECK-FP16-EMPTY:
144; CHECK-FP16-NEXT:  // %bb.0: // %entry
145; CHECK-FP16-NEXT:    ld.param.b16 %rs1, [exp2_f16_test_param_0];
146; CHECK-FP16-NEXT:    ex2.approx.f16 %rs2, %rs1;
147; CHECK-FP16-NEXT:    st.param.b16 [func_retval0], %rs2;
148; CHECK-FP16-NEXT:    ret;
149;
150; CHECK-BF16-LABEL: exp2_f16_test(
151; CHECK-BF16:       {
152; CHECK-BF16-NEXT:    .reg .b16 %rs<3>;
153; CHECK-BF16-EMPTY:
154; CHECK-BF16-NEXT:  // %bb.0: // %entry
155; CHECK-BF16-NEXT:    ld.param.b16 %rs1, [exp2_f16_test_param_0];
156; CHECK-BF16-NEXT:    ex2.approx.f16 %rs2, %rs1;
157; CHECK-BF16-NEXT:    st.param.b16 [func_retval0], %rs2;
158; CHECK-BF16-NEXT:    ret;
159entry:
160  %exp2 = call half @llvm.exp2.f16(half %in)
161  ret half %exp2
162}
163
164; COM: we should never have .ftz for f16
165; CHECK-LABEL: exp2_f16_ftz_test
166define half @exp2_f16_ftz_test(half %in) #0 {
167; CHECK-LABEL: exp2_f16_ftz_test(
168; CHECK:       {
169; CHECK-NEXT:    .reg .b16 %rs<3>;
170; CHECK-NEXT:    .reg .f32 %f<3>;
171; CHECK-EMPTY:
172; CHECK-NEXT:  // %bb.0: // %entry
173; CHECK-NEXT:    ld.param.b16 %rs1, [exp2_f16_ftz_test_param_0];
174; CHECK-NEXT:    cvt.ftz.f32.f16 %f1, %rs1;
175; CHECK-NEXT:    ex2.approx.ftz.f32 %f2, %f1;
176; CHECK-NEXT:    cvt.rn.f16.f32 %rs2, %f2;
177; CHECK-NEXT:    st.param.b16 [func_retval0], %rs2;
178; CHECK-NEXT:    ret;
179;
180; CHECK-FP16-LABEL: exp2_f16_ftz_test(
181; CHECK-FP16:       {
182; CHECK-FP16-NEXT:    .reg .b16 %rs<3>;
183; CHECK-FP16-EMPTY:
184; CHECK-FP16-NEXT:  // %bb.0: // %entry
185; CHECK-FP16-NEXT:    ld.param.b16 %rs1, [exp2_f16_ftz_test_param_0];
186; CHECK-FP16-NEXT:    ex2.approx.f16 %rs2, %rs1;
187; CHECK-FP16-NEXT:    st.param.b16 [func_retval0], %rs2;
188; CHECK-FP16-NEXT:    ret;
189;
190; CHECK-BF16-LABEL: exp2_f16_ftz_test(
191; CHECK-BF16:       {
192; CHECK-BF16-NEXT:    .reg .b16 %rs<3>;
193; CHECK-BF16-EMPTY:
194; CHECK-BF16-NEXT:  // %bb.0: // %entry
195; CHECK-BF16-NEXT:    ld.param.b16 %rs1, [exp2_f16_ftz_test_param_0];
196; CHECK-BF16-NEXT:    ex2.approx.f16 %rs2, %rs1;
197; CHECK-BF16-NEXT:    st.param.b16 [func_retval0], %rs2;
198; CHECK-BF16-NEXT:    ret;
199entry:
200  %exp2 = call half @llvm.exp2.f16(half %in)
201  ret half %exp2
202}
203
204; CHECK-LABEL: exp2_f16_test_v
205define <2 x half> @exp2_f16_test_v(<2 x half> %in) {
206; CHECK-LABEL: exp2_f16_test_v(
207; CHECK:       {
208; CHECK-NEXT:    .reg .b16 %rs<5>;
209; CHECK-NEXT:    .reg .b32 %r<3>;
210; CHECK-NEXT:    .reg .f32 %f<5>;
211; CHECK-EMPTY:
212; CHECK-NEXT:  // %bb.0: // %entry
213; CHECK-NEXT:    ld.param.b32 %r1, [exp2_f16_test_v_param_0];
214; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
215; CHECK-NEXT:    cvt.f32.f16 %f1, %rs2;
216; CHECK-NEXT:    ex2.approx.f32 %f2, %f1;
217; CHECK-NEXT:    cvt.rn.f16.f32 %rs3, %f2;
218; CHECK-NEXT:    cvt.f32.f16 %f3, %rs1;
219; CHECK-NEXT:    ex2.approx.f32 %f4, %f3;
220; CHECK-NEXT:    cvt.rn.f16.f32 %rs4, %f4;
221; CHECK-NEXT:    mov.b32 %r2, {%rs4, %rs3};
222; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
223; CHECK-NEXT:    ret;
224;
225; CHECK-FP16-LABEL: exp2_f16_test_v(
226; CHECK-FP16:       {
227; CHECK-FP16-NEXT:    .reg .b32 %r<3>;
228; CHECK-FP16-EMPTY:
229; CHECK-FP16-NEXT:  // %bb.0: // %entry
230; CHECK-FP16-NEXT:    ld.param.b32 %r1, [exp2_f16_test_v_param_0];
231; CHECK-FP16-NEXT:    ex2.approx.f16x2 %r2, %r1;
232; CHECK-FP16-NEXT:    st.param.b32 [func_retval0], %r2;
233; CHECK-FP16-NEXT:    ret;
234;
235; CHECK-BF16-LABEL: exp2_f16_test_v(
236; CHECK-BF16:       {
237; CHECK-BF16-NEXT:    .reg .b32 %r<3>;
238; CHECK-BF16-EMPTY:
239; CHECK-BF16-NEXT:  // %bb.0: // %entry
240; CHECK-BF16-NEXT:    ld.param.b32 %r1, [exp2_f16_test_v_param_0];
241; CHECK-BF16-NEXT:    ex2.approx.f16x2 %r2, %r1;
242; CHECK-BF16-NEXT:    st.param.b32 [func_retval0], %r2;
243; CHECK-BF16-NEXT:    ret;
244entry:
245  %exp2 = call <2 x half> @llvm.exp2.v2f16(<2 x half> %in)
246  ret <2 x half> %exp2
247}
248
249; --- bf16 ---
250
251; COM: we should always have .ftz for bf16
252; CHECK-LABEL: exp2_bf16_test
253define bfloat @exp2_bf16_test(bfloat %in) {
254; CHECK-LABEL: exp2_bf16_test(
255; CHECK:       {
256; CHECK-NEXT:    .reg .pred %p<2>;
257; CHECK-NEXT:    .reg .b16 %rs<2>;
258; CHECK-NEXT:    .reg .b32 %r<9>;
259; CHECK-NEXT:    .reg .f32 %f<3>;
260; CHECK-EMPTY:
261; CHECK-NEXT:  // %bb.0: // %entry
262; CHECK-NEXT:    ld.param.u16 %r1, [exp2_bf16_test_param_0];
263; CHECK-NEXT:    shl.b32 %r2, %r1, 16;
264; CHECK-NEXT:    mov.b32 %f1, %r2;
265; CHECK-NEXT:    ex2.approx.f32 %f2, %f1;
266; CHECK-NEXT:    mov.b32 %r3, %f2;
267; CHECK-NEXT:    bfe.u32 %r4, %r3, 16, 1;
268; CHECK-NEXT:    add.s32 %r5, %r4, %r3;
269; CHECK-NEXT:    add.s32 %r6, %r5, 32767;
270; CHECK-NEXT:    setp.nan.f32 %p1, %f2, %f2;
271; CHECK-NEXT:    or.b32 %r7, %r3, 4194304;
272; CHECK-NEXT:    selp.b32 %r8, %r7, %r6, %p1;
273; CHECK-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; }
274; CHECK-NEXT:    st.param.b16 [func_retval0], %rs1;
275; CHECK-NEXT:    ret;
276;
277; CHECK-FP16-LABEL: exp2_bf16_test(
278; CHECK-FP16:       {
279; CHECK-FP16-NEXT:    .reg .pred %p<2>;
280; CHECK-FP16-NEXT:    .reg .b16 %rs<2>;
281; CHECK-FP16-NEXT:    .reg .b32 %r<9>;
282; CHECK-FP16-NEXT:    .reg .f32 %f<3>;
283; CHECK-FP16-EMPTY:
284; CHECK-FP16-NEXT:  // %bb.0: // %entry
285; CHECK-FP16-NEXT:    ld.param.u16 %r1, [exp2_bf16_test_param_0];
286; CHECK-FP16-NEXT:    shl.b32 %r2, %r1, 16;
287; CHECK-FP16-NEXT:    mov.b32 %f1, %r2;
288; CHECK-FP16-NEXT:    ex2.approx.f32 %f2, %f1;
289; CHECK-FP16-NEXT:    mov.b32 %r3, %f2;
290; CHECK-FP16-NEXT:    bfe.u32 %r4, %r3, 16, 1;
291; CHECK-FP16-NEXT:    add.s32 %r5, %r4, %r3;
292; CHECK-FP16-NEXT:    add.s32 %r6, %r5, 32767;
293; CHECK-FP16-NEXT:    setp.nan.f32 %p1, %f2, %f2;
294; CHECK-FP16-NEXT:    or.b32 %r7, %r3, 4194304;
295; CHECK-FP16-NEXT:    selp.b32 %r8, %r7, %r6, %p1;
296; CHECK-FP16-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; }
297; CHECK-FP16-NEXT:    st.param.b16 [func_retval0], %rs1;
298; CHECK-FP16-NEXT:    ret;
299;
300; CHECK-BF16-LABEL: exp2_bf16_test(
301; CHECK-BF16:       {
302; CHECK-BF16-NEXT:    .reg .b16 %rs<3>;
303; CHECK-BF16-EMPTY:
304; CHECK-BF16-NEXT:  // %bb.0: // %entry
305; CHECK-BF16-NEXT:    ld.param.b16 %rs1, [exp2_bf16_test_param_0];
306; CHECK-BF16-NEXT:    ex2.approx.ftz.bf16 %rs2, %rs1;
307; CHECK-BF16-NEXT:    st.param.b16 [func_retval0], %rs2;
308; CHECK-BF16-NEXT:    ret;
309entry:
310  %exp2 = call bfloat @llvm.exp2.bf16(bfloat %in)
311  ret bfloat %exp2
312}
313
314; CHECK-LABEL: exp2_bf16_test_v
315define <2 x bfloat> @exp2_bf16_test_v(<2 x bfloat> %in) {
316; CHECK-LABEL: exp2_bf16_test_v(
317; CHECK:       {
318; CHECK-NEXT:    .reg .pred %p<3>;
319; CHECK-NEXT:    .reg .b16 %rs<3>;
320; CHECK-NEXT:    .reg .b32 %r<19>;
321; CHECK-NEXT:    .reg .f32 %f<5>;
322; CHECK-EMPTY:
323; CHECK-NEXT:  // %bb.0: // %entry
324; CHECK-NEXT:    ld.param.b32 %r1, [exp2_bf16_test_v_param_0];
325; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
326; CHECK-NEXT:    cvt.u32.u16 %r2, %rs2;
327; CHECK-NEXT:    shl.b32 %r3, %r2, 16;
328; CHECK-NEXT:    mov.b32 %f1, %r3;
329; CHECK-NEXT:    ex2.approx.f32 %f2, %f1;
330; CHECK-NEXT:    mov.b32 %r4, %f2;
331; CHECK-NEXT:    bfe.u32 %r5, %r4, 16, 1;
332; CHECK-NEXT:    add.s32 %r6, %r5, %r4;
333; CHECK-NEXT:    add.s32 %r7, %r6, 32767;
334; CHECK-NEXT:    setp.nan.f32 %p1, %f2, %f2;
335; CHECK-NEXT:    or.b32 %r8, %r4, 4194304;
336; CHECK-NEXT:    selp.b32 %r9, %r8, %r7, %p1;
337; CHECK-NEXT:    cvt.u32.u16 %r10, %rs1;
338; CHECK-NEXT:    shl.b32 %r11, %r10, 16;
339; CHECK-NEXT:    mov.b32 %f3, %r11;
340; CHECK-NEXT:    ex2.approx.f32 %f4, %f3;
341; CHECK-NEXT:    mov.b32 %r12, %f4;
342; CHECK-NEXT:    bfe.u32 %r13, %r12, 16, 1;
343; CHECK-NEXT:    add.s32 %r14, %r13, %r12;
344; CHECK-NEXT:    add.s32 %r15, %r14, 32767;
345; CHECK-NEXT:    setp.nan.f32 %p2, %f4, %f4;
346; CHECK-NEXT:    or.b32 %r16, %r12, 4194304;
347; CHECK-NEXT:    selp.b32 %r17, %r16, %r15, %p2;
348; CHECK-NEXT:    prmt.b32 %r18, %r17, %r9, 0x7632U;
349; CHECK-NEXT:    st.param.b32 [func_retval0], %r18;
350; CHECK-NEXT:    ret;
351;
352; CHECK-FP16-LABEL: exp2_bf16_test_v(
353; CHECK-FP16:       {
354; CHECK-FP16-NEXT:    .reg .pred %p<3>;
355; CHECK-FP16-NEXT:    .reg .b16 %rs<3>;
356; CHECK-FP16-NEXT:    .reg .b32 %r<19>;
357; CHECK-FP16-NEXT:    .reg .f32 %f<5>;
358; CHECK-FP16-EMPTY:
359; CHECK-FP16-NEXT:  // %bb.0: // %entry
360; CHECK-FP16-NEXT:    ld.param.b32 %r1, [exp2_bf16_test_v_param_0];
361; CHECK-FP16-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
362; CHECK-FP16-NEXT:    cvt.u32.u16 %r2, %rs2;
363; CHECK-FP16-NEXT:    shl.b32 %r3, %r2, 16;
364; CHECK-FP16-NEXT:    mov.b32 %f1, %r3;
365; CHECK-FP16-NEXT:    ex2.approx.f32 %f2, %f1;
366; CHECK-FP16-NEXT:    mov.b32 %r4, %f2;
367; CHECK-FP16-NEXT:    bfe.u32 %r5, %r4, 16, 1;
368; CHECK-FP16-NEXT:    add.s32 %r6, %r5, %r4;
369; CHECK-FP16-NEXT:    add.s32 %r7, %r6, 32767;
370; CHECK-FP16-NEXT:    setp.nan.f32 %p1, %f2, %f2;
371; CHECK-FP16-NEXT:    or.b32 %r8, %r4, 4194304;
372; CHECK-FP16-NEXT:    selp.b32 %r9, %r8, %r7, %p1;
373; CHECK-FP16-NEXT:    cvt.u32.u16 %r10, %rs1;
374; CHECK-FP16-NEXT:    shl.b32 %r11, %r10, 16;
375; CHECK-FP16-NEXT:    mov.b32 %f3, %r11;
376; CHECK-FP16-NEXT:    ex2.approx.f32 %f4, %f3;
377; CHECK-FP16-NEXT:    mov.b32 %r12, %f4;
378; CHECK-FP16-NEXT:    bfe.u32 %r13, %r12, 16, 1;
379; CHECK-FP16-NEXT:    add.s32 %r14, %r13, %r12;
380; CHECK-FP16-NEXT:    add.s32 %r15, %r14, 32767;
381; CHECK-FP16-NEXT:    setp.nan.f32 %p2, %f4, %f4;
382; CHECK-FP16-NEXT:    or.b32 %r16, %r12, 4194304;
383; CHECK-FP16-NEXT:    selp.b32 %r17, %r16, %r15, %p2;
384; CHECK-FP16-NEXT:    prmt.b32 %r18, %r17, %r9, 0x7632U;
385; CHECK-FP16-NEXT:    st.param.b32 [func_retval0], %r18;
386; CHECK-FP16-NEXT:    ret;
387;
388; CHECK-BF16-LABEL: exp2_bf16_test_v(
389; CHECK-BF16:       {
390; CHECK-BF16-NEXT:    .reg .b32 %r<3>;
391; CHECK-BF16-EMPTY:
392; CHECK-BF16-NEXT:  // %bb.0: // %entry
393; CHECK-BF16-NEXT:    ld.param.b32 %r1, [exp2_bf16_test_v_param_0];
394; CHECK-BF16-NEXT:    ex2.approx.ftz.bf16x2 %r2, %r1;
395; CHECK-BF16-NEXT:    st.param.b32 [func_retval0], %r2;
396; CHECK-BF16-NEXT:    ret;
397entry:
398  %exp2 = call <2 x bfloat> @llvm.exp2.v2bf16(<2 x bfloat> %in)
399  ret <2 x bfloat> %exp2
400}
401
402declare float @llvm.exp2.f32(float %val)
403
404declare <2 x float> @llvm.exp2.v2f32(<2 x float> %val)
405
406declare half @llvm.exp2.f16(half %val)
407
408declare <2 x half> @llvm.exp2.v2f16(<2 x half> %val)
409
410declare bfloat @llvm.exp2.bf16(bfloat %val)
411
412declare <2 x bfloat> @llvm.exp2.v2bf16(<2 x bfloat> %val)
413
414attributes #0 = {"denormal-fp-math"="preserve-sign"}
415