xref: /llvm-project/llvm/test/CodeGen/NVPTX/f16-instructions.ll (revision 0f0a96b8621fcc8e1d6b6a3d047c263bb17a7f39)
1; ## Full FP16 support enabled by default.
2; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
3; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
4; RUN:          -mattr=+ptx60                                                 \
5; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-F16-NOFTZ %s
6; RUN: %if ptxas %{                                                           \
7; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
8; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
9; RUN:          -mattr=+ptx60                                                 \
10; RUN:   | %ptxas-verify -arch=sm_53                                          \
11; RUN: %}
12; ## Full FP16 with FTZ
13; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
14; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
15; RUN:          -denormal-fp-math-f32=preserve-sign -mattr=+ptx60             \
16; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16-FTZ %s
17; RUN: %if ptxas %{                                                           \
18; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
19; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
20; RUN:          -denormal-fp-math-f32=preserve-sign -mattr=+ptx60             \
21; RUN:   | %ptxas-verify -arch=sm_53                                          \
22; RUN: %}
23; ## FP16 support explicitly disabled.
24; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
25; RUN:          -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
26; RUN:          -verify-machineinstrs -mattr=+ptx60                         \
27; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-NOF16 %s
28; RUN: %if ptxas %{                                                           \
29; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
30; RUN:          -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math   \
31; RUN:   | %ptxas-verify -arch=sm_53                                          \
32; RUN: %}
33; ## FP16 is not supported by hardware.
34; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
35; RUN:          -disable-post-ra -frame-pointer=all -verify-machineinstrs \
36; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOFTZ,CHECK-NOF16 %s
37; RUN: %if ptxas %{                                                               \
38; RUN:   llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
39; RUN:          -disable-post-ra -frame-pointer=all -verify-machineinstrs         \
40; RUN:   | %ptxas-verify -arch=sm_52                                              \
41; RUN: %}
42
43target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
44
45; CHECK-LABEL: test_ret_const(
46; CHECK:      mov.b16         [[R:%rs[0-9]+]], 0x3C00;
47; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
48; CHECK-NEXT: ret;
49define half @test_ret_const() #0 {
50  ret half 1.0
51}
52
53; CHECK-LABEL: test_fadd(
54; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fadd_param_0];
55; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fadd_param_1];
56; CHECK-F16-NOFTZ-NEXT:   add.rn.f16     [[R:%rs[0-9]+]], [[A]], [[B]];
57; CHECK-F16-FTZ-NEXT:   add.rn.ftz.f16     [[R:%rs[0-9]+]], [[A]], [[B]];
58; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
59; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
60; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]];
61; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
62; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
63; CHECK-NEXT: ret;
64define half @test_fadd(half %a, half %b) #0 {
65  %r = fadd half %a, %b
66  ret half %r
67}
68
69; CHECK-LABEL: test_fadd_v1f16(
70; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fadd_v1f16_param_0];
71; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fadd_v1f16_param_1];
72; CHECK-F16-NOFTZ-NEXT:   add.rn.f16     [[R:%rs[0-9]+]], [[A]], [[B]];
73; CHECK-F16-FTZ-NEXT:   add.rn.ftz.f16     [[R:%rs[0-9]+]], [[A]], [[B]];
74; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
75; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
76; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]];
77; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
78; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
79; CHECK-NEXT: ret;
80define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 {
81  %r = fadd <1 x half> %a, %b
82  ret <1 x half> %r
83}
84
85; Check that we can lower fadd with immediate arguments.
86; CHECK-LABEL: test_fadd_imm_0(
87; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fadd_imm_0_param_0];
88; CHECK-F16-NOFTZ-DAG:    mov.b16        [[A:%rs[0-9]+]], 0x3C00;
89; CHECK-F16-NOFTZ-NEXT:   add.rn.f16     [[R:%rs[0-9]+]], [[B]], [[A]];
90; CHECK-F16-FTZ-DAG:    mov.b16        [[A:%rs[0-9]+]], 0x3C00;
91; CHECK-F16-FTZ-NEXT:   add.rn.ftz.f16     [[R:%rs[0-9]+]], [[B]], [[A]];
92; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
93; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
94; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
95; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
96; CHECK-NEXT: ret;
97define half @test_fadd_imm_0(half %b) #0 {
98  %r = fadd half 1.0, %b
99  ret half %r
100}
101
102; CHECK-LABEL: test_fadd_imm_1(
103; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fadd_imm_1_param_0];
104; CHECK-F16-NOFTZ-DAG:    mov.b16        [[A:%rs[0-9]+]], 0x3C00;
105; CHECK-F16-NOFTZ-NEXT:   add.rn.f16     [[R:%rs[0-9]+]], [[B]], [[A]];
106; CHECK-F16-FTZ-DAG:    mov.b16        [[A:%rs[0-9]+]], 0x3C00;
107; CHECK-F16-FTZ-NEXT:   add.rn.ftz.f16     [[R:%rs[0-9]+]], [[B]], [[A]];
108; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
109; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
110; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
111; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
112; CHECK-NEXT: ret;
113define half @test_fadd_imm_1(half %a) #0 {
114  %r = fadd half %a, 1.0
115  ret half %r
116}
117
118; CHECK-LABEL: test_fsub(
119; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fsub_param_0];
120; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fsub_param_1];
121; CHECK-F16-NOFTZ-NEXT:   sub.rn.f16     [[R:%rs[0-9]+]], [[A]], [[B]];
122; CHECK-F16-FTZ-NEXT:   sub.rn.ftz.f16     [[R:%rs[0-9]+]], [[A]], [[B]];
123; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
124; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
125; CHECK-NOF16-NEXT: sub.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]];
126; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
127; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
128; CHECK-NEXT: ret;
129define half @test_fsub(half %a, half %b) #0 {
130  %r = fsub half %a, %b
131  ret half %r
132}
133
134; CHECK-LABEL: test_old_fneg(
135; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_old_fneg_param_0];
136; CHECK-F16-NOFTZ-NEXT:   mov.b16        [[Z:%rs[0-9]+]], 0x0000
137; CHECK-F16-NOFTZ-NEXT:   sub.rn.f16     [[R:%rs[0-9]+]], [[Z]], [[A]];
138; CHECK-F16-FTZ-NEXT:   mov.b16        [[Z:%rs[0-9]+]], 0x0000
139; CHECK-F16-FTZ-NEXT:   sub.rn.ftz.f16     [[R:%rs[0-9]+]], [[Z]], [[A]];
140; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
141; CHECK-NOF16-DAG:  mov.f32        [[Z:%f[0-9]+]], 0f00000000;
142; CHECK-NOF16-NEXT: sub.rn.f32     [[R32:%f[0-9]+]], [[Z]], [[A32]];
143; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
144; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
145; CHECK-NEXT: ret;
146define half @test_old_fneg(half %a) #0 {
147  %r = fsub half 0.0, %a
148  ret half %r
149}
150
151; CHECK-LABEL: test_fneg(
152; CHECK:  ld.param.b16    [[A:%rs[0-9]+]], [test_fneg_param_0];
153; CHECK-F16-NOFTZ-NEXT:   neg.f16     [[R:%rs[0-9]+]], [[A]];
154; CHECK-F16-FTZ-NEXT:   neg.ftz.f16     [[R:%rs[0-9]+]], [[A]];
155; CHECK-NOF16-NEXT:  xor.b16    [[R:%rs[0-9]+]], [[A]], -32768;
156; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
157; CHECK-NEXT: ret;
158define half @test_fneg(half %a) #0 {
159  %r = fneg half %a
160  ret half %r
161}
162
163; CHECK-LABEL: test_fmul(
164; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fmul_param_0];
165; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fmul_param_1];
166; CHECK-F16-NOFTZ-NEXT: mul.rn.f16      [[R:%rs[0-9]+]], [[A]], [[B]];
167; CHECK-F16-FTZ-NEXT: mul.rn.ftz.f16      [[R:%rs[0-9]+]], [[A]], [[B]];
168; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
169; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
170; CHECK-NOF16-NEXT: mul.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]];
171; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
172; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
173; CHECK-NEXT: ret;
174define half @test_fmul(half %a, half %b) #0 {
175  %r = fmul half %a, %b
176  ret half %r
177}
178
179; CHECK-LABEL: test_fdiv(
180; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fdiv_param_0];
181; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fdiv_param_1];
182; CHECK-NOFTZ-DAG:  cvt.f32.f16     [[F0:%f[0-9]+]], [[A]];
183; CHECK-NOFTZ-DAG:  cvt.f32.f16     [[F1:%f[0-9]+]], [[B]];
184; CHECK-NOFTZ-NEXT: div.rn.f32      [[FR:%f[0-9]+]], [[F0]], [[F1]];
185; CHECK-F16-FTZ-DAG:  cvt.ftz.f32.f16     [[F0:%f[0-9]+]], [[A]];
186; CHECK-F16-FTZ-DAG:  cvt.ftz.f32.f16     [[F1:%f[0-9]+]], [[B]];
187; CHECK-F16-FTZ-NEXT: div.rn.ftz.f32      [[FR:%f[0-9]+]], [[F0]], [[F1]];
188; CHECK-NEXT: cvt.rn.f16.f32  [[R:%rs[0-9]+]], [[FR]];
189; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
190; CHECK-NEXT: ret;
191define half @test_fdiv(half %a, half %b) #0 {
192  %r = fdiv half %a, %b
193  ret half %r
194}
195
196; CHECK-LABEL: test_frem(
197; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_frem_param_0];
198; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_frem_param_1];
199; CHECK-NOFTZ-DAG:  cvt.f32.f16     [[FA:%f[0-9]+]], [[A]];
200; CHECK-NOFTZ-DAG:  cvt.f32.f16     [[FB:%f[0-9]+]], [[B]];
201; CHECK-NOFTZ-NEXT: div.rn.f32      [[D:%f[0-9]+]], [[FA]], [[FB]];
202; CHECK-NOFTZ-NEXT: cvt.rzi.f32.f32 [[DI:%f[0-9]+]], [[D]];
203; CHECK-NOFTZ-NEXT: mul.f32         [[RI:%f[0-9]+]], [[DI]], [[FB]];
204; CHECK-NOFTZ-NEXT: sub.f32         [[RF:%f[0-9]+]], [[FA]], [[RI]];
205; CHECK-F16-FTZ-DAG:  cvt.ftz.f32.f16     [[FA:%f[0-9]+]], [[A]];
206; CHECK-F16-FTZ-DAG:  cvt.ftz.f32.f16     [[FB:%f[0-9]+]], [[B]];
207; CHECK-F16-FTZ-NEXT: div.rn.ftz.f32      [[D:%f[0-9]+]], [[FA]], [[FB]];
208; CHECK-F16-FTZ-NEXT: cvt.rzi.ftz.f32.f32 [[DI:%f[0-9]+]], [[D]];
209; CHECK-F16-FTZ-NEXT: mul.ftz.f32         [[RI:%f[0-9]+]], [[DI]], [[FB]];
210; CHECK-F16-FTZ-NEXT: sub.ftz.f32         [[RF:%f[0-9]+]], [[FA]], [[RI]];
211; CHECK-NEXT: testp.infinite.f32 [[ISBINF:%p[0-9]+]], [[FB]];
212; CHECK-NEXT: selp.f32           [[RESULT:%f[0-9]+]], [[FA]], [[RF]], [[ISBINF]];
213; CHECK-NEXT: cvt.rn.f16.f32     [[R:%rs[0-9]+]], [[RESULT]];
214; CHECK-NEXT: st.param.b16       [func_retval0], [[R]];
215; CHECK-NEXT: ret;
216define half @test_frem(half %a, half %b) #0 {
217  %r = frem half %a, %b
218  ret half %r
219}
220
221; CHECK-LABEL: test_store(
222; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_store_param_0];
223; CHECK-DAG:  ld.param.u64    %[[PTR:rd[0-9]+]], [test_store_param_1];
224; CHECK-NEXT: st.b16          [%[[PTR]]], [[A]];
225; CHECK-NEXT: ret;
226define void @test_store(half %a, ptr %b) #0 {
227  store half %a, ptr %b
228  ret void
229}
230
231; CHECK-LABEL: test_load(
232; CHECK:      ld.param.u64    %[[PTR:rd[0-9]+]], [test_load_param_0];
233; CHECK-NEXT: ld.b16          [[R:%rs[0-9]+]], [%[[PTR]]];
234; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
235; CHECK-NEXT: ret;
236define half @test_load(ptr %a) #0 {
237  %r = load half, ptr %a
238  ret half %r
239}
240
241; CHECK-LABEL: .visible .func test_halfp0a1(
242; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_halfp0a1_param_0];
243; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_halfp0a1_param_1];
244; CHECK-DAG: ld.u8        [[B0:%r[sd]?[0-9]+]], [%[[FROM]]]
245; CHECK-DAG: st.u8        [%[[TO]]], [[B0]]
246; CHECK-DAG: ld.u8        [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
247; CHECK-DAG: st.u8        [%[[TO]]+1], [[B1]]
248; CHECK: ret
249define void @test_halfp0a1(ptr noalias readonly %from, ptr %to) {
250  %1 = load half, ptr %from , align 1
251  store half %1, ptr %to , align 1
252  ret void
253}
254
255declare half @test_callee(half %a, half %b) #0
256
257; CHECK-LABEL: test_call(
258; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_call_param_0];
259; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_call_param_1];
260; CHECK:      {
261; CHECK-DAG:  .param .align 2 .b8 param0[2];
262; CHECK-DAG:  .param .align 2 .b8 param1[2];
263; CHECK-DAG:  st.param.b16    [param0], [[A]];
264; CHECK-DAG:  st.param.b16    [param1], [[B]];
265; CHECK-DAG:  .param .align 2 .b8 retval0[2];
266; CHECK:      call.uni (retval0),
267; CHECK-NEXT:        test_callee,
268; CHECK-NEXT: (
269; CHECK-NEXT:        param0,
270; CHECK-NEXT:        param1
271; CHECK-NEXT: );
272; CHECK-NEXT: ld.param.b16    [[R:%rs[0-9]+]], [retval0];
273; CHECK-NEXT: }
274; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
275; CHECK-NEXT: ret;
276define half @test_call(half %a, half %b) #0 {
277  %r = call half @test_callee(half %a, half %b)
278  ret half %r
279}
280
281; CHECK-LABEL: test_call_flipped(
282; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_call_flipped_param_0];
283; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_call_flipped_param_1];
284; CHECK:      {
285; CHECK-DAG:  .param .align 2 .b8 param0[2];
286; CHECK-DAG:  .param .align 2 .b8 param1[2];
287; CHECK-DAG:  st.param.b16    [param0], [[B]];
288; CHECK-DAG:  st.param.b16    [param1], [[A]];
289; CHECK-DAG:  .param .align 2 .b8 retval0[2];
290; CHECK:      call.uni (retval0),
291; CHECK-NEXT:        test_callee,
292; CHECK-NEXT: (
293; CHECK-NEXT:        param0,
294; CHECK-NEXT:        param1
295; CHECK-NEXT: );
296; CHECK-NEXT: ld.param.b16    [[R:%rs[0-9]+]], [retval0];
297; CHECK-NEXT: }
298; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
299; CHECK-NEXT: ret;
300define half @test_call_flipped(half %a, half %b) #0 {
301  %r = call half @test_callee(half %b, half %a)
302  ret half %r
303}
304
305; CHECK-LABEL: test_tailcall_flipped(
306; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_tailcall_flipped_param_0];
307; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_tailcall_flipped_param_1];
308; CHECK:      {
309; CHECK-DAG:  .param .align 2 .b8 param0[2];
310; CHECK-DAG:  .param .align 2 .b8 param1[2];
311; CHECK-DAG:  st.param.b16    [param0], [[B]];
312; CHECK-DAG:  st.param.b16    [param1], [[A]];
313; CHECK-DAG:  .param .align 2 .b8 retval0[2];
314; CHECK:      call.uni (retval0),
315; CHECK-NEXT:        test_callee,
316; CHECK-NEXT: (
317; CHECK-NEXT:        param0,
318; CHECK-NEXT:        param1
319; CHECK-NEXT: );
320; CHECK-NEXT: ld.param.b16    [[R:%rs[0-9]+]], [retval0];
321; CHECK-NEXT: }
322; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
323; CHECK-NEXT: ret;
324define half @test_tailcall_flipped(half %a, half %b) #0 {
325  %r = tail call half @test_callee(half %b, half %a)
326  ret half %r
327}
328
329; CHECK-LABEL: test_select(
330; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_select_param_0];
331; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_select_param_1];
332; CHECK-DAG:  setp.eq.b16     [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
333; CHECK-NEXT: selp.b16        [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]];
334; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
335; CHECK-NEXT: ret;
336define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
337  %r = select i1 %c, half %a, half %b
338  ret half %r
339}
340
341; CHECK-LABEL: test_select_cc(
342; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_select_cc_param_0];
343; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_select_cc_param_1];
344; CHECK-DAG:  ld.param.b16    [[C:%rs[0-9]+]], [test_select_cc_param_2];
345; CHECK-DAG:  ld.param.b16    [[D:%rs[0-9]+]], [test_select_cc_param_3];
346; CHECK-F16-NOFTZ:  setp.neu.f16    [[PRED:%p[0-9]+]], [[C]], [[D]]
347; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
348; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
349; CHECK-NOF16: setp.neu.f32    [[PRED:%p[0-9]+]], [[CF]], [[DF]]
350; CHECK:      selp.b16        [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]];
351; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
352; CHECK-NEXT: ret;
353define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
354  %cc = fcmp une half %c, %d
355  %r = select i1 %cc, half %a, half %b
356  ret half %r
357}
358
359; CHECK-LABEL: test_select_cc_f32_f16(
360; CHECK-DAG:  ld.param.f32    [[A:%f[0-9]+]], [test_select_cc_f32_f16_param_0];
361; CHECK-DAG:  ld.param.f32    [[B:%f[0-9]+]], [test_select_cc_f32_f16_param_1];
362; CHECK-DAG:  ld.param.b16    [[C:%rs[0-9]+]], [test_select_cc_f32_f16_param_2];
363; CHECK-DAG:  ld.param.b16    [[D:%rs[0-9]+]], [test_select_cc_f32_f16_param_3];
364; CHECK-F16-NOFTZ:  setp.neu.f16    [[PRED:%p[0-9]+]], [[C]], [[D]]
365; CHECK-F16-FTZ:  setp.neu.ftz.f16    [[PRED:%p[0-9]+]], [[C]], [[D]]
366; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
367; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
368; CHECK-NOF16: setp.neu.f32    [[PRED:%p[0-9]+]], [[CF]], [[DF]]
369; CHECK-NEXT: selp.f32        [[R:%f[0-9]+]], [[A]], [[B]], [[PRED]];
370; CHECK-NEXT: st.param.f32    [func_retval0], [[R]];
371; CHECK-NEXT: ret;
372define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 {
373  %cc = fcmp une half %c, %d
374  %r = select i1 %cc, float %a, float %b
375  ret float %r
376}
377
378; CHECK-LABEL: test_select_cc_f16_f32(
379; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_select_cc_f16_f32_param_0];
380; CHECK-DAG:  ld.param.f32    [[C:%f[0-9]+]], [test_select_cc_f16_f32_param_2];
381; CHECK-DAG:  ld.param.f32    [[D:%f[0-9]+]], [test_select_cc_f16_f32_param_3];
382; CHECK-NOFTZ-DAG:  setp.neu.f32    [[PRED:%p[0-9]+]], [[C]], [[D]]
383; CHECK-F16-FTZ-DAG:  setp.neu.ftz.f32    [[PRED:%p[0-9]+]], [[C]], [[D]]
384; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_select_cc_f16_f32_param_1];
385; CHECK-NEXT: selp.b16        [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]];
386; CHECK-NEXT: st.param.b16    [func_retval0], [[R]];
387; CHECK-NEXT: ret;
388define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
389  %cc = fcmp une float %c, %d
390  %r = select i1 %cc, half %a, half %b
391  ret half %r
392}
393
394; CHECK-LABEL: test_fcmp_une(
395; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fcmp_une_param_0];
396; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fcmp_une_param_1];
397; CHECK-F16-NOFTZ:  setp.neu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
398; CHECK-F16-FTZ:  setp.neu.ftz.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
399; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
400; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
401; CHECK-NOF16: setp.neu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
402; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
403; CHECK-NEXT: st.param.b32    [func_retval0], [[R]];
404; CHECK-NEXT: ret;
405define i1 @test_fcmp_une(half %a, half %b) #0 {
406  %r = fcmp une half %a, %b
407  ret i1 %r
408}
409
410; CHECK-LABEL: test_fcmp_ueq(
411; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fcmp_ueq_param_0];
412; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fcmp_ueq_param_1];
413; CHECK-F16-NOFTZ:  setp.equ.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
414; CHECK-F16-FTZ:  setp.equ.ftz.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
415; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
416; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
417; CHECK-NOF16: setp.equ.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
418; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
419; CHECK-NEXT: st.param.b32    [func_retval0], [[R]];
420; CHECK-NEXT: ret;
421define i1 @test_fcmp_ueq(half %a, half %b) #0 {
422  %r = fcmp ueq half %a, %b
423  ret i1 %r
424}
425
426; CHECK-LABEL: test_fcmp_ugt(
427; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fcmp_ugt_param_0];
428; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fcmp_ugt_param_1];
429; CHECK-F16-NOFTZ:  setp.gtu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
430; CHECK-F16-FTZ:  setp.gtu.ftz.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
431; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
432; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
433; CHECK-NOF16: setp.gtu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
434; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
435; CHECK-NEXT: st.param.b32    [func_retval0], [[R]];
436; CHECK-NEXT: ret;
437define i1 @test_fcmp_ugt(half %a, half %b) #0 {
438  %r = fcmp ugt half %a, %b
439  ret i1 %r
440}
441
442; CHECK-LABEL: test_fcmp_uge(
443; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fcmp_uge_param_0];
444; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fcmp_uge_param_1];
445; CHECK-F16-NOFTZ:  setp.geu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
446; CHECK-F16-FTZ:  setp.geu.ftz.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
447; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
448; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
449; CHECK-NOF16: setp.geu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
450; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
451; CHECK-NEXT: st.param.b32    [func_retval0], [[R]];
452; CHECK-NEXT: ret;
453define i1 @test_fcmp_uge(half %a, half %b) #0 {
454  %r = fcmp uge half %a, %b
455  ret i1 %r
456}
457
458; CHECK-LABEL: test_fcmp_ult(
459; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fcmp_ult_param_0];
460; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fcmp_ult_param_1];
461; CHECK-F16-NOFTZ:  setp.ltu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
462; CHECK-F16-FTZ:  setp.ltu.ftz.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
463; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
464; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
465; CHECK-NOF16: setp.ltu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
466; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
467; CHECK-NEXT: st.param.b32    [func_retval0], [[R]];
468; CHECK-NEXT: ret;
469define i1 @test_fcmp_ult(half %a, half %b) #0 {
470  %r = fcmp ult half %a, %b
471  ret i1 %r
472}
473
474; CHECK-LABEL: test_fcmp_ule(
475; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fcmp_ule_param_0];
476; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fcmp_ule_param_1];
477; CHECK-F16-NOFTZ:  setp.leu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
478; CHECK-F16-FTZ:  setp.leu.ftz.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
479; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
480; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
481; CHECK-NOF16: setp.leu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
482; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
483; CHECK-NEXT: st.param.b32    [func_retval0], [[R]];
484; CHECK-NEXT: ret;
485define i1 @test_fcmp_ule(half %a, half %b) #0 {
486  %r = fcmp ule half %a, %b
487  ret i1 %r
488}
489
490
491; CHECK-LABEL: test_fcmp_uno(
492; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fcmp_uno_param_0];
493; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fcmp_uno_param_1];
494; CHECK-F16-NOFTZ:  setp.nan.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
495; CHECK-F16-FTZ:  setp.nan.ftz.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
496; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
497; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
498; CHECK-NOF16: setp.nan.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
499; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
500; CHECK-NEXT: st.param.b32    [func_retval0], [[R]];
501; CHECK-NEXT: ret;
502define i1 @test_fcmp_uno(half %a, half %b) #0 {
503  %r = fcmp uno half %a, %b
504  ret i1 %r
505}
506
507; CHECK-LABEL: test_fcmp_one(
508; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fcmp_one_param_0];
509; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fcmp_one_param_1];
510; CHECK-F16-NOFTZ:  setp.ne.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
511; CHECK-F16-FTZ:  setp.ne.ftz.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
512; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
513; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
514; CHECK-NOF16: setp.ne.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
515; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
516; CHECK-NEXT: st.param.b32    [func_retval0], [[R]];
517; CHECK-NEXT: ret;
518define i1 @test_fcmp_one(half %a, half %b) #0 {
519  %r = fcmp one half %a, %b
520  ret i1 %r
521}
522
523; CHECK-LABEL: test_fcmp_oeq(
524; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fcmp_oeq_param_0];
525; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fcmp_oeq_param_1];
526; CHECK-F16-NOFTZ:  setp.eq.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
527; CHECK-F16-FTZ:  setp.eq.ftz.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
528; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
529; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
530; CHECK-NOF16: setp.eq.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
531; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
532; CHECK-NEXT: st.param.b32    [func_retval0], [[R]];
533; CHECK-NEXT: ret;
534define i1 @test_fcmp_oeq(half %a, half %b) #0 {
535  %r = fcmp oeq half %a, %b
536  ret i1 %r
537}
538
539; CHECK-LABEL: test_fcmp_ogt(
540; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fcmp_ogt_param_0];
541; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fcmp_ogt_param_1];
542; CHECK-F16-NOFTZ:  setp.gt.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
543; CHECK-F16-FTZ:  setp.gt.ftz.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
544; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
545; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
546; CHECK-NOF16: setp.gt.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
547; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
548; CHECK-NEXT: st.param.b32    [func_retval0], [[R]];
549; CHECK-NEXT: ret;
550define i1 @test_fcmp_ogt(half %a, half %b) #0 {
551  %r = fcmp ogt half %a, %b
552  ret i1 %r
553}
554
555; CHECK-LABEL: test_fcmp_oge(
556; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fcmp_oge_param_0];
557; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fcmp_oge_param_1];
558; CHECK-F16-NOFTZ:  setp.ge.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
559; CHECK-F16-FTZ:  setp.ge.ftz.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
560; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
561; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
562; CHECK-NOF16: setp.ge.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
563; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
564; CHECK-NEXT: st.param.b32    [func_retval0], [[R]];
565; CHECK-NEXT: ret;
566define i1 @test_fcmp_oge(half %a, half %b) #0 {
567  %r = fcmp oge half %a, %b
568  ret i1 %r
569}
570
571; XCHECK-LABEL: test_fcmp_olt(
572; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fcmp_olt_param_0];
573; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fcmp_olt_param_1];
574; CHECK-F16-NOFTZ:  setp.lt.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
575; CHECK-F16-FTZ:  setp.lt.ftz.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
576; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
577; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
578; CHECK-NOF16: setp.lt.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
579; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
580; CHECK-NEXT: st.param.b32    [func_retval0], [[R]];
581; CHECK-NEXT: ret;
582define i1 @test_fcmp_olt(half %a, half %b) #0 {
583  %r = fcmp olt half %a, %b
584  ret i1 %r
585}
586
587; XCHECK-LABEL: test_fcmp_ole(
588; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fcmp_ole_param_0];
589; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fcmp_ole_param_1];
590; CHECK-F16-NOFTZ:  setp.le.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
591; CHECK-F16-FTZ:  setp.le.ftz.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
592; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
593; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
594; CHECK-NOF16: setp.le.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
595; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
596; CHECK-NEXT: st.param.b32    [func_retval0], [[R]];
597; CHECK-NEXT: ret;
598define i1 @test_fcmp_ole(half %a, half %b) #0 {
599  %r = fcmp ole half %a, %b
600  ret i1 %r
601}
602
603; CHECK-LABEL: test_fcmp_ord(
604; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fcmp_ord_param_0];
605; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fcmp_ord_param_1];
606; CHECK-F16-NOFTZ:  setp.num.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
607; CHECK-F16-FTZ:  setp.num.ftz.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
608; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
609; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
610; CHECK-NOF16: setp.num.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
611; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
612; CHECK-NEXT: st.param.b32    [func_retval0], [[R]];
613; CHECK-NEXT: ret;
614define i1 @test_fcmp_ord(half %a, half %b) #0 {
615  %r = fcmp ord half %a, %b
616  ret i1 %r
617}
618
619; CHECK-LABEL: test_br_cc(
620; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_br_cc_param_0];
621; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_br_cc_param_1];
622; CHECK-DAG:  ld.param.u64    %[[C:rd[0-9]+]], [test_br_cc_param_2];
623; CHECK-DAG:  ld.param.u64    %[[D:rd[0-9]+]], [test_br_cc_param_3];
624; CHECK-F16-NOFTZ:  setp.lt.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
625; CHECK-F16-FTZ:  setp.lt.ftz.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
626; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
627; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
628; CHECK-NOF16: setp.lt.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
629; CHECK-NEXT: @[[PRED]] bra   [[LABEL:\$L__BB.*]];
630; CHECK:      st.u32  [%[[C]]],
631; CHECK:      [[LABEL]]:
632; CHECK:      st.u32  [%[[D]]],
633; CHECK:      ret;
634define void @test_br_cc(half %a, half %b, ptr %p1, ptr %p2) #0 {
635  %c = fcmp uge half %a, %b
636  br i1 %c, label %then, label %else
637then:
638  store i32 0, ptr %p1
639  ret void
640else:
641  store i32 0, ptr %p2
642  ret void
643}
644
645; CHECK-LABEL: test_phi(
646; CHECK:      ld.param.u64    %[[P1:rd[0-9]+]], [test_phi_param_0];
647; CHECK:      ld.b16  {{%rs[0-9]+}}, [%[[P1]]];
648; CHECK: [[LOOP:\$L__BB[0-9_]+]]:
649; CHECK:      mov.u16 [[R:%rs[0-9]+]], [[AB:%rs[0-9]+]];
650; CHECK:      ld.b16  [[AB:%rs[0-9]+]], [%[[P1]]];
651; CHECK:      {
652; CHECK:      st.param.b64    [param0], %[[P1]];
653; CHECK:      call.uni (retval0),
654; CHECK-NEXT: test_dummy
655; CHECK:      }
656; CHECK:      setp.eq.b32     [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 1;
657; CHECK:      @[[PRED]] bra   [[LOOP]];
658; CHECK:      st.param.b16    [func_retval0], [[R]];
659; CHECK:      ret;
660define half @test_phi(ptr %p1) #0 {
661entry:
662  %a = load half, ptr %p1
663  br label %loop
664loop:
665  %r = phi half [%a, %entry], [%b, %loop]
666  %b = load half, ptr %p1
667  %c = call i1 @test_dummy(ptr %p1)
668  br i1 %c, label %loop, label %return
669return:
670  ret half %r
671}
672declare i1 @test_dummy(ptr %p1) #0
673
674; CHECK-LABEL: test_fptosi_i32(
675; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_fptosi_i32_param_0];
676; CHECK:      cvt.rzi.s32.f16 [[R:%r[0-9]+]], [[A]];
677; CHECK:      st.param.b32    [func_retval0], [[R]];
678; CHECK:      ret;
679define i32 @test_fptosi_i32(half %a) #0 {
680  %r = fptosi half %a to i32
681  ret i32 %r
682}
683
684; CHECK-LABEL: test_fptosi_i64(
685; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_fptosi_i64_param_0];
686; CHECK:      cvt.rzi.s64.f16 [[R:%rd[0-9]+]], [[A]];
687; CHECK:      st.param.b64    [func_retval0], [[R]];
688; CHECK:      ret;
689define i64 @test_fptosi_i64(half %a) #0 {
690  %r = fptosi half %a to i64
691  ret i64 %r
692}
693
694; CHECK-LABEL: test_fptoui_i32(
695; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_fptoui_i32_param_0];
696; CHECK:      cvt.rzi.u32.f16 [[R:%r[0-9]+]], [[A]];
697; CHECK:      st.param.b32    [func_retval0], [[R]];
698; CHECK:      ret;
699define i32 @test_fptoui_i32(half %a) #0 {
700  %r = fptoui half %a to i32
701  ret i32 %r
702}
703
704; CHECK-LABEL: test_fptoui_i64(
705; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_fptoui_i64_param_0];
706; CHECK:      cvt.rzi.u64.f16 [[R:%rd[0-9]+]], [[A]];
707; CHECK:      st.param.b64    [func_retval0], [[R]];
708; CHECK:      ret;
709define i64 @test_fptoui_i64(half %a) #0 {
710  %r = fptoui half %a to i64
711  ret i64 %r
712}
713
714; CHECK-LABEL: test_uitofp_i32(
715; CHECK:      ld.param.u32    [[A:%r[0-9]+]], [test_uitofp_i32_param_0];
716; CHECK:      cvt.rn.f16.u32  [[R:%rs[0-9]+]], [[A]];
717; CHECK:      st.param.b16    [func_retval0], [[R]];
718; CHECK:      ret;
719define half @test_uitofp_i32(i32 %a) #0 {
720  %r = uitofp i32 %a to half
721  ret half %r
722}
723
724; CHECK-LABEL: test_uitofp_i64(
725; CHECK:      ld.param.u64    [[A:%rd[0-9]+]], [test_uitofp_i64_param_0];
726; CHECK:      cvt.rn.f16.u64  [[R:%rs[0-9]+]], [[A]];
727; CHECK:      st.param.b16    [func_retval0], [[R]];
728; CHECK:      ret;
729define half @test_uitofp_i64(i64 %a) #0 {
730  %r = uitofp i64 %a to half
731  ret half %r
732}
733
734; CHECK-LABEL: test_sitofp_i32(
735; CHECK:      ld.param.u32    [[A:%r[0-9]+]], [test_sitofp_i32_param_0];
736; CHECK:      cvt.rn.f16.s32  [[R:%rs[0-9]+]], [[A]];
737; CHECK:      st.param.b16    [func_retval0], [[R]];
738; CHECK:      ret;
739define half @test_sitofp_i32(i32 %a) #0 {
740  %r = sitofp i32 %a to half
741  ret half %r
742}
743
744; CHECK-LABEL: test_sitofp_i64(
745; CHECK:      ld.param.u64    [[A:%rd[0-9]+]], [test_sitofp_i64_param_0];
746; CHECK:      cvt.rn.f16.s64  [[R:%rs[0-9]+]], [[A]];
747; CHECK:      st.param.b16    [func_retval0], [[R]];
748; CHECK:      ret;
749define half @test_sitofp_i64(i64 %a) #0 {
750  %r = sitofp i64 %a to half
751  ret half %r
752}
753
754; CHECK-LABEL: test_uitofp_i32_fadd(
755; CHECK-DAG:  ld.param.u32    [[A:%r[0-9]+]], [test_uitofp_i32_fadd_param_0];
756; CHECK-DAG:  cvt.rn.f16.u32  [[C:%rs[0-9]+]], [[A]];
757; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_uitofp_i32_fadd_param_1];
758; CHECK-F16-NOFTZ:       add.rn.f16      [[R:%rs[0-9]+]], [[B]], [[C]];
759; CHECK-F16-FTZ:       add.rn.ftz.f16      [[R:%rs[0-9]+]], [[B]], [[C]];
760; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
761; CHECK-NOF16-DAG:  cvt.f32.f16    [[C32:%f[0-9]+]], [[C]]
762; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[B32]], [[C32]];
763; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
764; CHECK:      st.param.b16    [func_retval0], [[R]];
765; CHECK:      ret;
766define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 {
767  %c = uitofp i32 %a to half
768  %r = fadd half %b, %c
769  ret half %r
770}
771
772; CHECK-LABEL: test_sitofp_i32_fadd(
773; CHECK-DAG:  ld.param.u32    [[A:%r[0-9]+]], [test_sitofp_i32_fadd_param_0];
774; CHECK-DAG:  cvt.rn.f16.s32  [[C:%rs[0-9]+]], [[A]];
775; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_sitofp_i32_fadd_param_1];
776; CHECK-F16-NOFTZ:         add.rn.f16     [[R:%rs[0-9]+]], [[B]], [[C]];
777; CHECK-F16-FTZ:         add.rn.ftz.f16     [[R:%rs[0-9]+]], [[B]], [[C]];
778; XCHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
779; XCHECK-NOF16-DAG:  cvt.f32.f16    [[C32:%f[0-9]+]], [[C]]
780; XCHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[B32]], [[C32]];
781; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
782; CHECK:      st.param.b16    [func_retval0], [[R]];
783; CHECK:      ret;
784define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 {
785  %c = sitofp i32 %a to half
786  %r = fadd half %b, %c
787  ret half %r
788}
789
790; CHECK-LABEL: test_fptrunc_float(
791; CHECK:      ld.param.f32    [[A:%f[0-9]+]], [test_fptrunc_float_param_0];
792; CHECK:      cvt.rn.f16.f32  [[R:%rs[0-9]+]], [[A]];
793; CHECK:      st.param.b16    [func_retval0], [[R]];
794; CHECK:      ret;
795define half @test_fptrunc_float(float %a) #0 {
796  %r = fptrunc float %a to half
797  ret half %r
798}
799
800; CHECK-LABEL: test_fptrunc_double(
801; CHECK:      ld.param.f64    [[A:%fd[0-9]+]], [test_fptrunc_double_param_0];
802; CHECK:      cvt.rn.f16.f64  [[R:%rs[0-9]+]], [[A]];
803; CHECK:      st.param.b16    [func_retval0], [[R]];
804; CHECK:      ret;
805define half @test_fptrunc_double(double %a) #0 {
806  %r = fptrunc double %a to half
807  ret half %r
808}
809
810; CHECK-LABEL: test_fpext_float(
811; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_fpext_float_param_0];
812; CHECK-NOFTZ:      cvt.f32.f16     [[R:%f[0-9]+]], [[A]];
813; CHECK-F16-FTZ:      cvt.ftz.f32.f16     [[R:%f[0-9]+]], [[A]];
814; CHECK:      st.param.f32    [func_retval0], [[R]];
815; CHECK:      ret;
816define float @test_fpext_float(half %a) #0 {
817  %r = fpext half %a to float
818  ret float %r
819}
820
821; CHECK-LABEL: test_fpext_double(
822; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_fpext_double_param_0];
823; CHECK:      cvt.f64.f16     [[R:%fd[0-9]+]], [[A]];
824; CHECK:      st.param.f64    [func_retval0], [[R]];
825; CHECK:      ret;
826define double @test_fpext_double(half %a) #0 {
827  %r = fpext half %a to double
828  ret double %r
829}
830
831
832; CHECK-LABEL: test_bitcast_halftoi16(
833; CHECK:      ld.param.b16    [[AH:%rs[0-9]+]], [test_bitcast_halftoi16_param_0];
834; CHECK:      cvt.u32.u16     [[R:%r[0-9]+]], [[AH]]
835; CHECK:      st.param.b32    [func_retval0], [[R]];
836; CHECK:      ret;
837define i16 @test_bitcast_halftoi16(half %a) #0 {
838  %r = bitcast half %a to i16
839  ret i16 %r
840}
841
842; CHECK-LABEL: test_bitcast_i16tohalf(
843; CHECK:      ld.param.u16    [[AS:%rs[0-9]+]], [test_bitcast_i16tohalf_param_0];
844; CHECK:      st.param.b16    [func_retval0], [[AS]];
845; CHECK:      ret;
846define half @test_bitcast_i16tohalf(i16 %a) #0 {
847  %r = bitcast i16 %a to half
848  ret half %r
849}
850
851
852declare half @llvm.sqrt.f16(half %a) #0
853declare half @llvm.powi.f16.i32(half %a, i32 %b) #0
854declare half @llvm.sin.f16(half %a) #0
855declare half @llvm.cos.f16(half %a) #0
856declare half @llvm.pow.f16(half %a, half %b) #0
857declare half @llvm.exp.f16(half %a) #0
858declare half @llvm.exp2.f16(half %a) #0
859declare half @llvm.log.f16(half %a) #0
860declare half @llvm.log10.f16(half %a) #0
861declare half @llvm.log2.f16(half %a) #0
862declare half @llvm.fma.f16(half %a, half %b, half %c) #0
863declare half @llvm.fabs.f16(half %a) #0
864declare half @llvm.minnum.f16(half %a, half %b) #0
865declare half @llvm.maxnum.f16(half %a, half %b) #0
866declare half @llvm.copysign.f16(half %a, half %b) #0
867declare half @llvm.floor.f16(half %a) #0
868declare half @llvm.ceil.f16(half %a) #0
869declare half @llvm.trunc.f16(half %a) #0
870declare half @llvm.rint.f16(half %a) #0
871declare half @llvm.nearbyint.f16(half %a) #0
872declare half @llvm.round.f16(half %a) #0
873declare half @llvm.roundeven.f16(half %a) #0
874declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
875
876; CHECK-LABEL: test_sqrt(
877; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_sqrt_param_0];
878; CHECK-NOFTZ:      cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
879; CHECK-NOFTZ:      sqrt.rn.f32     [[RF:%f[0-9]+]], [[AF]];
880; CHECK-F16-FTZ:      cvt.ftz.f32.f16     [[AF:%f[0-9]+]], [[A]];
881; CHECK-F16-FTZ:      sqrt.rn.ftz.f32     [[RF:%f[0-9]+]], [[AF]];
882; CHECK:      cvt.rn.f16.f32  [[R:%rs[0-9]+]], [[RF]];
883; CHECK:      st.param.b16    [func_retval0], [[R]];
884; CHECK:      ret;
885define half @test_sqrt(half %a) #0 {
886  %r = call half @llvm.sqrt.f16(half %a)
887  ret half %r
888}
889
890;;; Can't do this yet: requires libcall.
891; XCHECK-LABEL: test_powi(
892;define half @test_powi(half %a, i32 %b) #0 {
893;  %r = call half @llvm.powi.f16.i32(half %a, i32 %b)
894;  ret half %r
895;}
896
897; CHECK-LABEL: test_sin(
898; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_sin_param_0];
899; CHECK-NOFTZ:      cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
900; CHECK-F16-FTZ:      cvt.ftz.f32.f16     [[AF:%f[0-9]+]], [[A]];
901; CHECK:      sin.approx.f32  [[RF:%f[0-9]+]], [[AF]];
902; CHECK:      cvt.rn.f16.f32  [[R:%rs[0-9]+]], [[RF]];
903; CHECK:      st.param.b16    [func_retval0], [[R]];
904; CHECK:      ret;
905define half @test_sin(half %a) #0 #1 {
906  %r = call half @llvm.sin.f16(half %a)
907  ret half %r
908}
909
910; CHECK-LABEL: test_cos(
911; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_cos_param_0];
912; CHECK-NOFTZ:      cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
913; CHECK-F16-FTZ:      cvt.ftz.f32.f16     [[AF:%f[0-9]+]], [[A]];
914; CHECK:      cos.approx.f32  [[RF:%f[0-9]+]], [[AF]];
915; CHECK:      cvt.rn.f16.f32  [[R:%rs[0-9]+]], [[RF]];
916; CHECK:      st.param.b16    [func_retval0], [[R]];
917; CHECK:      ret;
918define half @test_cos(half %a) #0 #1 {
919  %r = call half @llvm.cos.f16(half %a)
920  ret half %r
921}
922
923;;; Can't do this yet: requires libcall.
924; XCHECK-LABEL: test_pow(
925;define half @test_pow(half %a, half %b) #0 {
926;  %r = call half @llvm.pow.f16(half %a, half %b)
927;  ret half %r
928;}
929
930;;; Can't do this yet: requires libcall.
931; XCHECK-LABEL: test_exp(
932;define half @test_exp(half %a) #0 {
933;  %r = call half @llvm.exp.f16(half %a)
934;  ret half %r
935;}
936
937;;; Can't do this yet: requires libcall.
938; XCHECK-LABEL: test_exp2(
939;define half @test_exp2(half %a) #0 {
940;  %r = call half @llvm.exp2.f16(half %a)
941;  ret half %r
942;}
943
944;;; Can't do this yet: requires libcall.
945; XCHECK-LABEL: test_log(
946;define half @test_log(half %a) #0 {
947;  %r = call half @llvm.log.f16(half %a)
948;  ret half %r
949;}
950
951;;; Can't do this yet: requires libcall.
952; XCHECK-LABEL: test_log10(
953;define half @test_log10(half %a) #0 {
954;  %r = call half @llvm.log10.f16(half %a)
955;  ret half %r
956;}
957
958;;; Can't do this yet: requires libcall.
959; XCHECK-LABEL: test_log2(
960;define half @test_log2(half %a) #0 {
961;  %r = call half @llvm.log2.f16(half %a)
962;  ret half %r
963;}
964
965; CHECK-LABEL: test_fma(
966; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fma_param_0];
967; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fma_param_1];
968; CHECK-DAG:  ld.param.b16    [[C:%rs[0-9]+]], [test_fma_param_2];
969; CHECK-F16-NOFTZ:      fma.rn.f16      [[R:%rs[0-9]+]], [[A]], [[B]], [[C]];
970; CHECK-F16-FTZ:      fma.rn.ftz.f16      [[R:%rs[0-9]+]], [[A]], [[B]], [[C]];
971; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
972; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
973; CHECK-NOF16-DAG:  cvt.f32.f16    [[C32:%f[0-9]+]], [[C]]
974; CHECK-NOF16-NEXT: fma.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
975; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
976; CHECK:      st.param.b16    [func_retval0], [[R]];
977; CHECK:      ret
978define half @test_fma(half %a, half %b, half %c) #0 {
979  %r = call half @llvm.fma.f16(half %a, half %b, half %c)
980  ret half %r
981}
982
983; CHECK-LABEL: test_fabs(
984; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_fabs_param_0];
985; CHECK-NOFTZ:      cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
986; CHECK-NOFTZ:      abs.f32         [[RF:%f[0-9]+]], [[AF]];
987; CHECK-F16-FTZ:      cvt.ftz.f32.f16     [[AF:%f[0-9]+]], [[A]];
988; CHECK-F16-FTZ:      abs.ftz.f32         [[RF:%f[0-9]+]], [[AF]];
989; CHECK:      cvt.rn.f16.f32  [[R:%rs[0-9]+]], [[RF]];
990; CHECK:      st.param.b16    [func_retval0], [[R]];
991; CHECK:      ret;
992define half @test_fabs(half %a) #0 {
993  %r = call half @llvm.fabs.f16(half %a)
994  ret half %r
995}
996
997; CHECK-LABEL: test_minnum(
998; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_minnum_param_0];
999; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_minnum_param_1];
1000; CHECK-NOFTZ-DAG:  cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
1001; CHECK-NOFTZ-DAG:  cvt.f32.f16     [[BF:%f[0-9]+]], [[B]];
1002; CHECK-NOFTZ:      min.f32         [[RF:%f[0-9]+]], [[AF]], [[BF]];
1003; CHECK-F16-FTZ-DAG:  cvt.ftz.f32.f16     [[AF:%f[0-9]+]], [[A]];
1004; CHECK-F16-FTZ-DAG:  cvt.ftz.f32.f16     [[BF:%f[0-9]+]], [[B]];
1005; CHECK-F16-FTZ:      min.ftz.f32         [[RF:%f[0-9]+]], [[AF]], [[BF]];
1006; CHECK:      cvt.rn.f16.f32  [[R:%rs[0-9]+]], [[RF]];
1007; CHECK:      st.param.b16    [func_retval0], [[R]];
1008; CHECK:      ret;
1009define half @test_minnum(half %a, half %b) #0 {
1010  %r = call half @llvm.minnum.f16(half %a, half %b)
1011  ret half %r
1012}
1013
1014; CHECK-LABEL: test_maxnum(
1015; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_maxnum_param_0];
1016; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_maxnum_param_1];
1017; CHECK-NOFTZ-DAG:  cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
1018; CHECK-NOFTZ-DAG:  cvt.f32.f16     [[BF:%f[0-9]+]], [[B]];
1019; CHECK-NOFTZ:      max.f32         [[RF:%f[0-9]+]], [[AF]], [[BF]];
1020; CHECK-F16-FTZ-DAG:  cvt.ftz.f32.f16     [[AF:%f[0-9]+]], [[A]];
1021; CHECK-F16-FTZ-DAG:  cvt.ftz.f32.f16     [[BF:%f[0-9]+]], [[B]];
1022; CHECK-F16-FTZ:      max.ftz.f32         [[RF:%f[0-9]+]], [[AF]], [[BF]];
1023; CHECK:      cvt.rn.f16.f32  [[R:%rs[0-9]+]], [[RF]];
1024; CHECK:      st.param.b16    [func_retval0], [[R]];
1025; CHECK:      ret;
1026define half @test_maxnum(half %a, half %b) #0 {
1027  %r = call half @llvm.maxnum.f16(half %a, half %b)
1028  ret half %r
1029}
1030
1031; CHECK-LABEL: test_copysign(
1032; CHECK-DAG:  ld.param.b16    [[AH:%rs[0-9]+]], [test_copysign_param_0];
1033; CHECK-DAG:  ld.param.b16    [[BH:%rs[0-9]+]], [test_copysign_param_1];
1034; CHECK-DAG:  and.b16         [[AX:%rs[0-9]+]], [[AH]], 32767;
1035; CHECK-DAG:  and.b16         [[BX:%rs[0-9]+]], [[BH]], -32768;
1036; CHECK:      or.b16          [[RX:%rs[0-9]+]], [[AX]], [[BX]];
1037; CHECK:      st.param.b16    [func_retval0], [[RX]];
1038; CHECK:      ret;
1039define half @test_copysign(half %a, half %b) #0 {
1040  %r = call half @llvm.copysign.f16(half %a, half %b)
1041  ret half %r
1042}
1043
1044; CHECK-LABEL: test_copysign_f32(
1045; CHECK-DAG:  ld.param.b16    [[AH:%rs[0-9]+]], [test_copysign_f32_param_0];
1046; CHECK-DAG:  ld.param.f32    [[BF:%f[0-9]+]], [test_copysign_f32_param_1];
1047; CHECK-DAG:  mov.b32         [[B:%r[0-9]+]], [[BF]];
1048; CHECK-DAG:  and.b16         [[AX:%rs[0-9]+]], [[AH]], 32767;
1049; CHECK-DAG:  and.b32         [[BX0:%r[0-9]+]], [[B]], -2147483648;
1050; CHECK-DAG:  mov.b32         {tmp, [[BX2:%rs[0-9]+]]}, [[BX0]];
1051; CHECK:      or.b16          [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
1052; CHECK:      st.param.b16    [func_retval0], [[RX]];
1053; CHECK:      ret;
1054define half @test_copysign_f32(half %a, float %b) #0 {
1055  %tb = fptrunc float %b to half
1056  %r = call half @llvm.copysign.f16(half %a, half %tb)
1057  ret half %r
1058}
1059
1060; CHECK-LABEL: test_copysign_f64(
1061; CHECK-DAG:  ld.param.b16    [[AH:%rs[0-9]+]], [test_copysign_f64_param_0];
1062; CHECK-DAG:  ld.param.f64    [[BD:%fd[0-9]+]], [test_copysign_f64_param_1];
1063; CHECK-DAG:  mov.b64         [[B:%rd[0-9]+]], [[BD]];
1064; CHECK-DAG:  and.b16         [[AX:%rs[0-9]+]], [[AH]], 32767;
1065; CHECK-DAG:  and.b64         [[BX0:%rd[0-9]+]], [[B]], -9223372036854775808;
1066; CHECK-DAG:  shr.u64         [[BX1:%rd[0-9]+]], [[BX0]], 48;
1067; CHECK-DAG:  cvt.u16.u64     [[BX2:%rs[0-9]+]], [[BX1]];
1068; CHECK:      or.b16          [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
1069; CHECK:      st.param.b16    [func_retval0], [[RX]];
1070; CHECK:      ret;
1071define half @test_copysign_f64(half %a, double %b) #0 {
1072  %tb = fptrunc double %b to half
1073  %r = call half @llvm.copysign.f16(half %a, half %tb)
1074  ret half %r
1075}
1076
1077; CHECK-LABEL: test_copysign_extended(
1078; CHECK-DAG:  ld.param.b16    [[AH:%rs[0-9]+]], [test_copysign_extended_param_0];
1079; CHECK-DAG:  ld.param.b16    [[BH:%rs[0-9]+]], [test_copysign_extended_param_1];
1080; CHECK-DAG:  and.b16         [[AX:%rs[0-9]+]], [[AH]], 32767;
1081; CHECK-DAG:  and.b16         [[BX:%rs[0-9]+]], [[BH]], -32768;
1082; CHECK:      or.b16          [[RX:%rs[0-9]+]], [[AX]], [[BX]];
1083; CHECK-NOFTZ: cvt.f32.f16     [[XR:%f[0-9]+]], [[RX]];
1084; CHECK-F16-FTZ:   cvt.ftz.f32.f16 [[XR:%f[0-9]+]], [[RX]];
1085; CHECK:      st.param.f32    [func_retval0], [[XR]];
1086; CHECK:      ret;
1087define float @test_copysign_extended(half %a, half %b) #0 {
1088  %r = call half @llvm.copysign.f16(half %a, half %b)
1089  %xr = fpext half %r to float
1090  ret float %xr
1091}
1092
1093; CHECK-LABEL: test_floor(
1094; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_floor_param_0];
1095; CHECK:      cvt.rmi.f16.f16 [[R:%rs[0-9]+]], [[A]];
1096; CHECK:      st.param.b16    [func_retval0], [[R]];
1097; CHECK:      ret;
1098define half @test_floor(half %a) #0 {
1099  %r = call half @llvm.floor.f16(half %a)
1100  ret half %r
1101}
1102
1103; CHECK-LABEL: test_ceil(
1104; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_ceil_param_0];
1105; CHECK:      cvt.rpi.f16.f16 [[R:%rs[0-9]+]], [[A]];
1106; CHECK:      st.param.b16    [func_retval0], [[R]];
1107; CHECK:      ret;
1108define half @test_ceil(half %a) #0 {
1109  %r = call half @llvm.ceil.f16(half %a)
1110  ret half %r
1111}
1112
1113; CHECK-LABEL: test_trunc(
1114; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_trunc_param_0];
1115; CHECK:      cvt.rzi.f16.f16 [[R:%rs[0-9]+]], [[A]];
1116; CHECK:      st.param.b16    [func_retval0], [[R]];
1117; CHECK:      ret;
1118define half @test_trunc(half %a) #0 {
1119  %r = call half @llvm.trunc.f16(half %a)
1120  ret half %r
1121}
1122
1123; CHECK-LABEL: test_rint(
1124; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_rint_param_0];
1125; CHECK:      cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]];
1126; CHECK:      st.param.b16    [func_retval0], [[R]];
1127; CHECK:      ret;
1128define half @test_rint(half %a) #0 {
1129  %r = call half @llvm.rint.f16(half %a)
1130  ret half %r
1131}
1132
1133; CHECK-LABEL: test_nearbyint(
1134; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_nearbyint_param_0];
1135; CHECK:      cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]];
1136; CHECK:      st.param.b16    [func_retval0], [[R]];
1137; CHECK:      ret;
1138define half @test_nearbyint(half %a) #0 {
1139  %r = call half @llvm.nearbyint.f16(half %a)
1140  ret half %r
1141}
1142
1143; CHECK-LABEL: test_roundeven(
1144; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_roundeven_param_0];
1145; CHECK:      cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]];
1146; CHECK:      st.param.b16    [func_retval0], [[R]];
1147; CHECK:      ret;
1148define half @test_roundeven(half %a) #0 {
1149  %r = call half @llvm.roundeven.f16(half %a)
1150  ret half %r
1151}
1152
1153; CHECK-LABEL: test_round(
1154; CHECK:      ld.param.b16    {{.*}}, [test_round_param_0];
1155; check the use of sign mask and 0.5 to implement round
1156; CHECK:      and.b32 [[R:%r[0-9]+]], {{.*}}, -2147483648;
1157; CHECK:      or.b32 {{.*}}, [[R]], 1056964608;
1158; CHECK:      st.param.b16    [func_retval0], {{.*}};
1159; CHECK:      ret;
1160define half @test_round(half %a) #0 {
1161  %r = call half @llvm.round.f16(half %a)
1162  ret half %r
1163}
1164
1165; CHECK-LABEL: test_fmuladd(
1166; CHECK-DAG:  ld.param.b16    [[A:%rs[0-9]+]], [test_fmuladd_param_0];
1167; CHECK-DAG:  ld.param.b16    [[B:%rs[0-9]+]], [test_fmuladd_param_1];
1168; CHECK-DAG:  ld.param.b16    [[C:%rs[0-9]+]], [test_fmuladd_param_2];
1169; CHECK-F16-NOFTZ:        fma.rn.f16     [[R:%rs[0-9]+]], [[A]], [[B]], [[C]];
1170; CHECK-F16-FTZ:        fma.rn.ftz.f16     [[R:%rs[0-9]+]], [[A]], [[B]], [[C]];
1171; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
1172; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
1173; CHECK-NOF16-DAG:  cvt.f32.f16    [[C32:%f[0-9]+]], [[C]]
1174; CHECK-NOF16-NEXT: fma.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
1175; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
1176; CHECK:      st.param.b16    [func_retval0], [[R]];
1177; CHECK:      ret;
1178define half @test_fmuladd(half %a, half %b, half %c) #0 {
1179  %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
1180  ret half %r
1181}
1182
1183; CHECK-LABEL: test_neg_f16(
1184; CHECK-F16-NOFTZ: neg.f16
1185; CHECK-F16-FTZ: neg.ftz.f16
1186; CHECK-NOF16: xor.b16  	%rs{{.*}}, %rs{{.*}}, -32768
1187define half @test_neg_f16(half noundef %arg) #0 {
1188  %res = fneg half %arg
1189  ret half %res
1190}
1191
1192; CHECK-LABEL: test_neg_f16x2(
1193; CHECK-F16-NOFTZ: neg.f16x2
1194; CHECK-F16-FTZ: neg.ftz.f16x2
1195; CHECK-NOF16: xor.b16  	%rs{{.*}}, %rs{{.*}}, -32768
1196; CHECK-NOF16: xor.b16  	%rs{{.*}}, %rs{{.*}}, -32768
1197define <2 x half> @test_neg_f16x2(<2 x half> noundef %arg) #0 {
1198  %res = fneg <2 x half> %arg
1199  ret <2 x half> %res
1200}
1201
1202attributes #0 = { nounwind }
1203attributes #1 = { "unsafe-fp-math" = "true" }
1204