xref: /llvm-project/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll (revision 310e79875752886a7713911e2a1ec14bc75bd4b3)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; ## Full FP16 support enabled by default.
3; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \
4; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
5; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-F16 %s
6; RUN: %if ptxas %{                                                           \
7; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \
8; RUN:          -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
9; RUN:   | %ptxas-verify -arch=sm_53                                          \
10; RUN: %}
11; ## FP16 support explicitly disabled.
12; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \
13; RUN:          -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math \
14; RUN:           -verify-machineinstrs \
15; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s
16; RUN: %if ptxas %{                                                           \
17; RUN:   llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 \
18; RUN:          -O0 -disable-post-ra -frame-pointer=all --nvptx-no-f16-math   \
19; RUN:           -verify-machineinstrs                                        \
20; RUN:   | %ptxas-verify -arch=sm_53                                          \
21; RUN: %}
22; ## FP16 is not supported by hardware.
23; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 \
24; RUN:          -disable-post-ra -frame-pointer=all -verify-machineinstrs \
25; RUN: | FileCheck -allow-deprecated-dag-overlap -check-prefixes CHECK,CHECK-NOF16 %s
26; RUN: %if ptxas %{                                                               \
27; RUN:   llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 \
28; RUN:          -disable-post-ra -frame-pointer=all -verify-machineinstrs         \
29; RUN:   | %ptxas-verify -arch=sm_52                                              \
30; RUN: %}
31
32target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
33
34define <2 x half> @test_ret_const() #0 {
35; CHECK-LABEL: test_ret_const(
36; CHECK:       {
37; CHECK-NEXT:    .reg .b32 %r<2>;
38; CHECK-EMPTY:
39; CHECK-NEXT:  // %bb.0:
40; CHECK-NEXT:    mov.b32 %r1, 1073757184;
41; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
42; CHECK-NEXT:    ret;
43  ret <2 x half> <half 1.0, half 2.0>
44}
45
46define half @test_extract_0(<2 x half> %a) #0 {
47; CHECK-LABEL: test_extract_0(
48; CHECK:       {
49; CHECK-NEXT:    .reg .b16 %rs<2>;
50; CHECK-NEXT:    .reg .b32 %r<2>;
51; CHECK-EMPTY:
52; CHECK-NEXT:  // %bb.0:
53; CHECK-NEXT:    ld.param.b32 %r1, [test_extract_0_param_0];
54; CHECK-NEXT:    { .reg .b16 tmp; mov.b32 {%rs1, tmp}, %r1; }
55; CHECK-NEXT:    st.param.b16 [func_retval0], %rs1;
56; CHECK-NEXT:    ret;
57  %e = extractelement <2 x half> %a, i32 0
58  ret half %e
59}
60
61define half @test_extract_1(<2 x half> %a) #0 {
62; CHECK-LABEL: test_extract_1(
63; CHECK:       {
64; CHECK-NEXT:    .reg .b16 %rs<2>;
65; CHECK-NEXT:    .reg .b32 %r<2>;
66; CHECK-EMPTY:
67; CHECK-NEXT:  // %bb.0:
68; CHECK-NEXT:    ld.param.b32 %r1, [test_extract_1_param_0];
69; CHECK-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r1; }
70; CHECK-NEXT:    st.param.b16 [func_retval0], %rs1;
71; CHECK-NEXT:    ret;
72  %e = extractelement <2 x half> %a, i32 1
73  ret half %e
74}
75
76define half @test_extract_i(<2 x half> %a, i64 %idx) #0 {
77; CHECK-LABEL: test_extract_i(
78; CHECK:       {
79; CHECK-NEXT:    .reg .pred %p<2>;
80; CHECK-NEXT:    .reg .b16 %rs<4>;
81; CHECK-NEXT:    .reg .b32 %r<2>;
82; CHECK-NEXT:    .reg .b64 %rd<2>;
83; CHECK-EMPTY:
84; CHECK-NEXT:  // %bb.0:
85; CHECK-NEXT:    ld.param.u64 %rd1, [test_extract_i_param_1];
86; CHECK-NEXT:    ld.param.b32 %r1, [test_extract_i_param_0];
87; CHECK-NEXT:    setp.eq.s64 %p1, %rd1, 0;
88; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
89; CHECK-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
90; CHECK-NEXT:    st.param.b16 [func_retval0], %rs3;
91; CHECK-NEXT:    ret;
92  %e = extractelement <2 x half> %a, i64 %idx
93  ret half %e
94}
95
96define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 {
97; CHECK-F16-LABEL: test_fadd(
98; CHECK-F16:       {
99; CHECK-F16-NEXT:    .reg .b32 %r<4>;
100; CHECK-F16-EMPTY:
101; CHECK-F16-NEXT:  // %bb.0:
102; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fadd_param_1];
103; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fadd_param_0];
104; CHECK-F16-NEXT:    add.rn.f16x2 %r3, %r1, %r2;
105; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r3;
106; CHECK-F16-NEXT:    ret;
107;
108; CHECK-NOF16-LABEL: test_fadd(
109; CHECK-NOF16:       {
110; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
111; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
112; CHECK-NOF16-NEXT:    .reg .f32 %f<7>;
113; CHECK-NOF16-EMPTY:
114; CHECK-NOF16-NEXT:  // %bb.0:
115; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fadd_param_1];
116; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fadd_param_0];
117; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
118; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
119; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
120; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
121; CHECK-NOF16-NEXT:    add.rn.f32 %f3, %f2, %f1;
122; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs5, %f3;
123; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs1;
124; CHECK-NOF16-NEXT:    cvt.f32.f16 %f5, %rs3;
125; CHECK-NOF16-NEXT:    add.rn.f32 %f6, %f5, %f4;
126; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs6, %f6;
127; CHECK-NOF16-NEXT:    mov.b32 %r3, {%rs6, %rs5};
128; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
129; CHECK-NOF16-NEXT:    ret;
130  %r = fadd <2 x half> %a, %b
131  ret <2 x half> %r
132}
133
134; Check that we can lower fadd with immediate arguments.
135define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 {
136; CHECK-F16-LABEL: test_fadd_imm_0(
137; CHECK-F16:       {
138; CHECK-F16-NEXT:    .reg .b32 %r<4>;
139; CHECK-F16-EMPTY:
140; CHECK-F16-NEXT:  // %bb.0:
141; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fadd_imm_0_param_0];
142; CHECK-F16-NEXT:    mov.b32 %r2, 1073757184;
143; CHECK-F16-NEXT:    add.rn.f16x2 %r3, %r1, %r2;
144; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r3;
145; CHECK-F16-NEXT:    ret;
146;
147; CHECK-NOF16-LABEL: test_fadd_imm_0(
148; CHECK-NOF16:       {
149; CHECK-NOF16-NEXT:    .reg .b16 %rs<5>;
150; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
151; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
152; CHECK-NOF16-EMPTY:
153; CHECK-NOF16-NEXT:  // %bb.0:
154; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fadd_imm_0_param_0];
155; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
156; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
157; CHECK-NOF16-NEXT:    add.rn.f32 %f2, %f1, 0f40000000;
158; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs3, %f2;
159; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
160; CHECK-NOF16-NEXT:    add.rn.f32 %f4, %f3, 0f3F800000;
161; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs4, %f4;
162; CHECK-NOF16-NEXT:    mov.b32 %r2, {%rs4, %rs3};
163; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r2;
164; CHECK-NOF16-NEXT:    ret;
165  %r = fadd <2 x half> <half 1.0, half 2.0>, %a
166  ret <2 x half> %r
167}
168
169define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 {
170; CHECK-F16-LABEL: test_fadd_imm_1(
171; CHECK-F16:       {
172; CHECK-F16-NEXT:    .reg .b32 %r<4>;
173; CHECK-F16-EMPTY:
174; CHECK-F16-NEXT:  // %bb.0:
175; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fadd_imm_1_param_0];
176; CHECK-F16-NEXT:    mov.b32 %r2, 1073757184;
177; CHECK-F16-NEXT:    add.rn.f16x2 %r3, %r1, %r2;
178; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r3;
179; CHECK-F16-NEXT:    ret;
180;
181; CHECK-NOF16-LABEL: test_fadd_imm_1(
182; CHECK-NOF16:       {
183; CHECK-NOF16-NEXT:    .reg .b16 %rs<5>;
184; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
185; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
186; CHECK-NOF16-EMPTY:
187; CHECK-NOF16-NEXT:  // %bb.0:
188; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fadd_imm_1_param_0];
189; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
190; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
191; CHECK-NOF16-NEXT:    add.rn.f32 %f2, %f1, 0f40000000;
192; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs3, %f2;
193; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
194; CHECK-NOF16-NEXT:    add.rn.f32 %f4, %f3, 0f3F800000;
195; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs4, %f4;
196; CHECK-NOF16-NEXT:    mov.b32 %r2, {%rs4, %rs3};
197; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r2;
198; CHECK-NOF16-NEXT:    ret;
199  %r = fadd <2 x half> %a, <half 1.0, half 2.0>
200  ret <2 x half> %r
201}
202
203define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 {
204; CHECK-F16-LABEL: test_fsub(
205; CHECK-F16:       {
206; CHECK-F16-NEXT:    .reg .b32 %r<4>;
207; CHECK-F16-EMPTY:
208; CHECK-F16-NEXT:  // %bb.0:
209; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fsub_param_1];
210; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fsub_param_0];
211; CHECK-F16-NEXT:    sub.rn.f16x2 %r3, %r1, %r2;
212; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r3;
213; CHECK-F16-NEXT:    ret;
214;
215; CHECK-NOF16-LABEL: test_fsub(
216; CHECK-NOF16:       {
217; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
218; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
219; CHECK-NOF16-NEXT:    .reg .f32 %f<7>;
220; CHECK-NOF16-EMPTY:
221; CHECK-NOF16-NEXT:  // %bb.0:
222; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fsub_param_1];
223; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fsub_param_0];
224; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
225; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
226; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
227; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
228; CHECK-NOF16-NEXT:    sub.rn.f32 %f3, %f2, %f1;
229; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs5, %f3;
230; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs1;
231; CHECK-NOF16-NEXT:    cvt.f32.f16 %f5, %rs3;
232; CHECK-NOF16-NEXT:    sub.rn.f32 %f6, %f5, %f4;
233; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs6, %f6;
234; CHECK-NOF16-NEXT:    mov.b32 %r3, {%rs6, %rs5};
235; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
236; CHECK-NOF16-NEXT:    ret;
237  %r = fsub <2 x half> %a, %b
238  ret <2 x half> %r
239}
240
241define <2 x half> @test_fneg(<2 x half> %a) #0 {
242; CHECK-F16-LABEL: test_fneg(
243; CHECK-F16:       {
244; CHECK-F16-NEXT:    .reg .b32 %r<4>;
245; CHECK-F16-EMPTY:
246; CHECK-F16-NEXT:  // %bb.0:
247; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fneg_param_0];
248; CHECK-F16-NEXT:    mov.b32 %r2, 0;
249; CHECK-F16-NEXT:    sub.rn.f16x2 %r3, %r2, %r1;
250; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r3;
251; CHECK-F16-NEXT:    ret;
252;
253; CHECK-NOF16-LABEL: test_fneg(
254; CHECK-NOF16:       {
255; CHECK-NOF16-NEXT:    .reg .b16 %rs<5>;
256; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
257; CHECK-NOF16-NEXT:    .reg .f32 %f<6>;
258; CHECK-NOF16-EMPTY:
259; CHECK-NOF16-NEXT:  // %bb.0:
260; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fneg_param_0];
261; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
262; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
263; CHECK-NOF16-NEXT:    mov.f32 %f2, 0f00000000;
264; CHECK-NOF16-NEXT:    sub.rn.f32 %f3, %f2, %f1;
265; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs3, %f3;
266; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs1;
267; CHECK-NOF16-NEXT:    sub.rn.f32 %f5, %f2, %f4;
268; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs4, %f5;
269; CHECK-NOF16-NEXT:    mov.b32 %r2, {%rs4, %rs3};
270; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r2;
271; CHECK-NOF16-NEXT:    ret;
272  %r = fsub <2 x half> <half 0.0, half 0.0>, %a
273  ret <2 x half> %r
274}
275
276define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 {
277; CHECK-F16-LABEL: test_fmul(
278; CHECK-F16:       {
279; CHECK-F16-NEXT:    .reg .b32 %r<4>;
280; CHECK-F16-EMPTY:
281; CHECK-F16-NEXT:  // %bb.0:
282; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fmul_param_1];
283; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fmul_param_0];
284; CHECK-F16-NEXT:    mul.rn.f16x2 %r3, %r1, %r2;
285; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r3;
286; CHECK-F16-NEXT:    ret;
287;
288; CHECK-NOF16-LABEL: test_fmul(
289; CHECK-NOF16:       {
290; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
291; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
292; CHECK-NOF16-NEXT:    .reg .f32 %f<7>;
293; CHECK-NOF16-EMPTY:
294; CHECK-NOF16-NEXT:  // %bb.0:
295; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fmul_param_1];
296; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fmul_param_0];
297; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
298; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
299; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
300; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
301; CHECK-NOF16-NEXT:    mul.rn.f32 %f3, %f2, %f1;
302; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs5, %f3;
303; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs1;
304; CHECK-NOF16-NEXT:    cvt.f32.f16 %f5, %rs3;
305; CHECK-NOF16-NEXT:    mul.rn.f32 %f6, %f5, %f4;
306; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs6, %f6;
307; CHECK-NOF16-NEXT:    mov.b32 %r3, {%rs6, %rs5};
308; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
309; CHECK-NOF16-NEXT:    ret;
310  %r = fmul <2 x half> %a, %b
311  ret <2 x half> %r
312}
313
314define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 {
315; CHECK-LABEL: test_fdiv(
316; CHECK:       {
317; CHECK-NEXT:    .reg .b16 %rs<7>;
318; CHECK-NEXT:    .reg .b32 %r<4>;
319; CHECK-NEXT:    .reg .f32 %f<7>;
320; CHECK-EMPTY:
321; CHECK-NEXT:  // %bb.0:
322; CHECK-NEXT:    ld.param.b32 %r2, [test_fdiv_param_1];
323; CHECK-NEXT:    ld.param.b32 %r1, [test_fdiv_param_0];
324; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
325; CHECK-NEXT:    cvt.f32.f16 %f1, %rs2;
326; CHECK-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
327; CHECK-NEXT:    cvt.f32.f16 %f2, %rs4;
328; CHECK-NEXT:    div.rn.f32 %f3, %f2, %f1;
329; CHECK-NEXT:    cvt.rn.f16.f32 %rs5, %f3;
330; CHECK-NEXT:    cvt.f32.f16 %f4, %rs1;
331; CHECK-NEXT:    cvt.f32.f16 %f5, %rs3;
332; CHECK-NEXT:    div.rn.f32 %f6, %f5, %f4;
333; CHECK-NEXT:    cvt.rn.f16.f32 %rs6, %f6;
334; CHECK-NEXT:    mov.b32 %r3, {%rs6, %rs5};
335; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
336; CHECK-NEXT:    ret;
337  %r = fdiv <2 x half> %a, %b
338  ret <2 x half> %r
339}
340
341; -- Load two 16x2 inputs and split them into f16 elements
342; -- Split into elements
343; -- promote to f32.
344; -- frem(a[0],b[0]).
345; -- frem(a[1],b[1]).
346; -- convert back to f16.
347; -- merge into f16x2 and return it.
348define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 {
349; CHECK-LABEL: test_frem(
350; CHECK:       {
351; CHECK-NEXT:    .reg .pred %p<3>;
352; CHECK-NEXT:    .reg .b16 %rs<7>;
353; CHECK-NEXT:    .reg .b32 %r<4>;
354; CHECK-NEXT:    .reg .f32 %f<15>;
355; CHECK-EMPTY:
356; CHECK-NEXT:  // %bb.0:
357; CHECK-NEXT:    ld.param.b32 %r2, [test_frem_param_1];
358; CHECK-NEXT:    ld.param.b32 %r1, [test_frem_param_0];
359; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
360; CHECK-NEXT:    cvt.f32.f16 %f1, %rs2;
361; CHECK-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
362; CHECK-NEXT:    cvt.f32.f16 %f2, %rs4;
363; CHECK-NEXT:    div.rn.f32 %f3, %f2, %f1;
364; CHECK-NEXT:    cvt.rzi.f32.f32 %f4, %f3;
365; CHECK-NEXT:    mul.f32 %f5, %f4, %f1;
366; CHECK-NEXT:    sub.f32 %f6, %f2, %f5;
367; CHECK-NEXT:    testp.infinite.f32 %p1, %f1;
368; CHECK-NEXT:    selp.f32 %f7, %f2, %f6, %p1;
369; CHECK-NEXT:    cvt.rn.f16.f32 %rs5, %f7;
370; CHECK-NEXT:    cvt.f32.f16 %f8, %rs1;
371; CHECK-NEXT:    cvt.f32.f16 %f9, %rs3;
372; CHECK-NEXT:    div.rn.f32 %f10, %f9, %f8;
373; CHECK-NEXT:    cvt.rzi.f32.f32 %f11, %f10;
374; CHECK-NEXT:    mul.f32 %f12, %f11, %f8;
375; CHECK-NEXT:    sub.f32 %f13, %f9, %f12;
376; CHECK-NEXT:    testp.infinite.f32 %p2, %f8;
377; CHECK-NEXT:    selp.f32 %f14, %f9, %f13, %p2;
378; CHECK-NEXT:    cvt.rn.f16.f32 %rs6, %f14;
379; CHECK-NEXT:    mov.b32 %r3, {%rs6, %rs5};
380; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
381; CHECK-NEXT:    ret;
382  %r = frem <2 x half> %a, %b
383  ret <2 x half> %r
384}
385
386define void @test_ldst_v2f16(ptr %a, ptr %b) {
387; CHECK-LABEL: test_ldst_v2f16(
388; CHECK:       {
389; CHECK-NEXT:    .reg .b32 %r<2>;
390; CHECK-NEXT:    .reg .b64 %rd<3>;
391; CHECK-EMPTY:
392; CHECK-NEXT:  // %bb.0:
393; CHECK-NEXT:    ld.param.u64 %rd2, [test_ldst_v2f16_param_1];
394; CHECK-NEXT:    ld.param.u64 %rd1, [test_ldst_v2f16_param_0];
395; CHECK-NEXT:    ld.b32 %r1, [%rd1];
396; CHECK-NEXT:    st.b32 [%rd2], %r1;
397; CHECK-NEXT:    ret;
398  %t1 = load <2 x half>, ptr %a
399  store <2 x half> %t1, ptr %b, align 16
400  ret void
401}
402
403; -- v3 is inconvenient to capture as it's lowered as ld.b64 + fair
404;    number of bitshifting instructions that may change at llvm's whim.
405;    So we only verify that we only issue correct number of writes using
406;    correct offset, but not the values we write.
407define void @test_ldst_v3f16(ptr %a, ptr %b) {
408; CHECK-LABEL: test_ldst_v3f16(
409; CHECK:       {
410; CHECK-NEXT:    .reg .b16 %rs<2>;
411; CHECK-NEXT:    .reg .b32 %r<2>;
412; CHECK-NEXT:    .reg .b64 %rd<4>;
413; CHECK-EMPTY:
414; CHECK-NEXT:  // %bb.0:
415; CHECK-NEXT:    ld.param.u64 %rd2, [test_ldst_v3f16_param_1];
416; CHECK-NEXT:    ld.param.u64 %rd1, [test_ldst_v3f16_param_0];
417; CHECK-NEXT:    ld.u64 %rd3, [%rd1];
418; CHECK-NEXT:    { .reg .b32 tmp; mov.b64 {tmp, %r1}, %rd3; }
419; CHECK-NEXT:    st.u32 [%rd2], %rd3;
420; CHECK-NEXT:    { .reg .b16 tmp; mov.b32 {%rs1, tmp}, %r1; }
421; CHECK-NEXT:    st.b16 [%rd2+4], %rs1;
422; CHECK-NEXT:    ret;
423  %t1 = load <3 x half>, ptr %a
424  store <3 x half> %t1, ptr %b, align 16
425  ret void
426}
427
428define void @test_ldst_v4f16(ptr %a, ptr %b) {
429; CHECK-LABEL: test_ldst_v4f16(
430; CHECK:       {
431; CHECK-NEXT:    .reg .b16 %rs<5>;
432; CHECK-NEXT:    .reg .b64 %rd<3>;
433; CHECK-EMPTY:
434; CHECK-NEXT:  // %bb.0:
435; CHECK-NEXT:    ld.param.u64 %rd2, [test_ldst_v4f16_param_1];
436; CHECK-NEXT:    ld.param.u64 %rd1, [test_ldst_v4f16_param_0];
437; CHECK-NEXT:    ld.v4.b16 {%rs1, %rs2, %rs3, %rs4}, [%rd1];
438; CHECK-NEXT:    st.v4.b16 [%rd2], {%rs1, %rs2, %rs3, %rs4};
439; CHECK-NEXT:    ret;
440  %t1 = load <4 x half>, ptr %a
441  store <4 x half> %t1, ptr %b, align 16
442  ret void
443}
444
445define void @test_ldst_v8f16(ptr %a, ptr %b) {
446; CHECK-LABEL: test_ldst_v8f16(
447; CHECK:       {
448; CHECK-NEXT:    .reg .b32 %r<5>;
449; CHECK-NEXT:    .reg .b64 %rd<3>;
450; CHECK-EMPTY:
451; CHECK-NEXT:  // %bb.0:
452; CHECK-NEXT:    ld.param.u64 %rd2, [test_ldst_v8f16_param_1];
453; CHECK-NEXT:    ld.param.u64 %rd1, [test_ldst_v8f16_param_0];
454; CHECK-NEXT:    ld.v4.b32 {%r1, %r2, %r3, %r4}, [%rd1];
455; CHECK-NEXT:    st.v4.b32 [%rd2], {%r1, %r2, %r3, %r4};
456; CHECK-NEXT:    ret;
457  %t1 = load <8 x half>, ptr %a
458  store <8 x half> %t1, ptr %b, align 16
459  ret void
460}
461
462declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0
463
464define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 {
465; CHECK-LABEL: test_call(
466; CHECK:       {
467; CHECK-NEXT:    .reg .b32 %r<5>;
468; CHECK-EMPTY:
469; CHECK-NEXT:  // %bb.0:
470; CHECK-NEXT:    ld.param.b32 %r2, [test_call_param_1];
471; CHECK-NEXT:    ld.param.b32 %r1, [test_call_param_0];
472; CHECK-NEXT:    { // callseq 0, 0
473; CHECK-NEXT:    .param .align 4 .b8 param0[4];
474; CHECK-NEXT:    st.param.b32 [param0], %r1;
475; CHECK-NEXT:    .param .align 4 .b8 param1[4];
476; CHECK-NEXT:    st.param.b32 [param1], %r2;
477; CHECK-NEXT:    .param .align 4 .b8 retval0[4];
478; CHECK-NEXT:    call.uni (retval0),
479; CHECK-NEXT:    test_callee,
480; CHECK-NEXT:    (
481; CHECK-NEXT:    param0,
482; CHECK-NEXT:    param1
483; CHECK-NEXT:    );
484; CHECK-NEXT:    ld.param.b32 %r3, [retval0];
485; CHECK-NEXT:    } // callseq 0
486; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
487; CHECK-NEXT:    ret;
488  %r = call <2 x half> @test_callee(<2 x half> %a, <2 x half> %b)
489  ret <2 x half> %r
490}
491
492define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 {
493; CHECK-LABEL: test_call_flipped(
494; CHECK:       {
495; CHECK-NEXT:    .reg .b32 %r<5>;
496; CHECK-EMPTY:
497; CHECK-NEXT:  // %bb.0:
498; CHECK-NEXT:    ld.param.b32 %r2, [test_call_flipped_param_1];
499; CHECK-NEXT:    ld.param.b32 %r1, [test_call_flipped_param_0];
500; CHECK-NEXT:    { // callseq 1, 0
501; CHECK-NEXT:    .param .align 4 .b8 param0[4];
502; CHECK-NEXT:    st.param.b32 [param0], %r2;
503; CHECK-NEXT:    .param .align 4 .b8 param1[4];
504; CHECK-NEXT:    st.param.b32 [param1], %r1;
505; CHECK-NEXT:    .param .align 4 .b8 retval0[4];
506; CHECK-NEXT:    call.uni (retval0),
507; CHECK-NEXT:    test_callee,
508; CHECK-NEXT:    (
509; CHECK-NEXT:    param0,
510; CHECK-NEXT:    param1
511; CHECK-NEXT:    );
512; CHECK-NEXT:    ld.param.b32 %r3, [retval0];
513; CHECK-NEXT:    } // callseq 1
514; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
515; CHECK-NEXT:    ret;
516  %r = call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a)
517  ret <2 x half> %r
518}
519
520define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 {
521; CHECK-LABEL: test_tailcall_flipped(
522; CHECK:       {
523; CHECK-NEXT:    .reg .b32 %r<5>;
524; CHECK-EMPTY:
525; CHECK-NEXT:  // %bb.0:
526; CHECK-NEXT:    ld.param.b32 %r2, [test_tailcall_flipped_param_1];
527; CHECK-NEXT:    ld.param.b32 %r1, [test_tailcall_flipped_param_0];
528; CHECK-NEXT:    { // callseq 2, 0
529; CHECK-NEXT:    .param .align 4 .b8 param0[4];
530; CHECK-NEXT:    st.param.b32 [param0], %r2;
531; CHECK-NEXT:    .param .align 4 .b8 param1[4];
532; CHECK-NEXT:    st.param.b32 [param1], %r1;
533; CHECK-NEXT:    .param .align 4 .b8 retval0[4];
534; CHECK-NEXT:    call.uni (retval0),
535; CHECK-NEXT:    test_callee,
536; CHECK-NEXT:    (
537; CHECK-NEXT:    param0,
538; CHECK-NEXT:    param1
539; CHECK-NEXT:    );
540; CHECK-NEXT:    ld.param.b32 %r3, [retval0];
541; CHECK-NEXT:    } // callseq 2
542; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
543; CHECK-NEXT:    ret;
544  %r = tail call <2 x half> @test_callee(<2 x half> %b, <2 x half> %a)
545  ret <2 x half> %r
546}
547
548define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 {
549; CHECK-LABEL: test_select(
550; CHECK:       {
551; CHECK-NEXT:    .reg .pred %p<2>;
552; CHECK-NEXT:    .reg .b16 %rs<3>;
553; CHECK-NEXT:    .reg .b32 %r<4>;
554; CHECK-EMPTY:
555; CHECK-NEXT:  // %bb.0:
556; CHECK-NEXT:    ld.param.u8 %rs1, [test_select_param_2];
557; CHECK-NEXT:    and.b16 %rs2, %rs1, 1;
558; CHECK-NEXT:    setp.eq.b16 %p1, %rs2, 1;
559; CHECK-NEXT:    ld.param.b32 %r2, [test_select_param_1];
560; CHECK-NEXT:    ld.param.b32 %r1, [test_select_param_0];
561; CHECK-NEXT:    selp.b32 %r3, %r1, %r2, %p1;
562; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
563; CHECK-NEXT:    ret;
564  %r = select i1 %c, <2 x half> %a, <2 x half> %b
565  ret <2 x half> %r
566}
567
568define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #0 {
569; CHECK-F16-LABEL: test_select_cc(
570; CHECK-F16:       {
571; CHECK-F16-NEXT:    .reg .pred %p<3>;
572; CHECK-F16-NEXT:    .reg .b16 %rs<7>;
573; CHECK-F16-NEXT:    .reg .b32 %r<6>;
574; CHECK-F16-EMPTY:
575; CHECK-F16-NEXT:  // %bb.0:
576; CHECK-F16-NEXT:    ld.param.b32 %r4, [test_select_cc_param_3];
577; CHECK-F16-NEXT:    ld.param.b32 %r3, [test_select_cc_param_2];
578; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_select_cc_param_1];
579; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_select_cc_param_0];
580; CHECK-F16-NEXT:    setp.neu.f16x2 %p1|%p2, %r3, %r4;
581; CHECK-F16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
582; CHECK-F16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
583; CHECK-F16-NEXT:    selp.b16 %rs5, %rs4, %rs2, %p2;
584; CHECK-F16-NEXT:    selp.b16 %rs6, %rs3, %rs1, %p1;
585; CHECK-F16-NEXT:    mov.b32 %r5, {%rs6, %rs5};
586; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r5;
587; CHECK-F16-NEXT:    ret;
588;
589; CHECK-NOF16-LABEL: test_select_cc(
590; CHECK-NOF16:       {
591; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
592; CHECK-NOF16-NEXT:    .reg .b16 %rs<11>;
593; CHECK-NOF16-NEXT:    .reg .b32 %r<6>;
594; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
595; CHECK-NOF16-EMPTY:
596; CHECK-NOF16-NEXT:  // %bb.0:
597; CHECK-NOF16-NEXT:    ld.param.b32 %r4, [test_select_cc_param_3];
598; CHECK-NOF16-NEXT:    ld.param.b32 %r3, [test_select_cc_param_2];
599; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_select_cc_param_1];
600; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_select_cc_param_0];
601; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
602; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs1;
603; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r3;
604; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs3;
605; CHECK-NOF16-NEXT:    setp.neu.f32 %p1, %f2, %f1;
606; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs2;
607; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs4;
608; CHECK-NOF16-NEXT:    setp.neu.f32 %p2, %f4, %f3;
609; CHECK-NOF16-NEXT:    mov.b32 {%rs5, %rs6}, %r2;
610; CHECK-NOF16-NEXT:    mov.b32 {%rs7, %rs8}, %r1;
611; CHECK-NOF16-NEXT:    selp.b16 %rs9, %rs8, %rs6, %p2;
612; CHECK-NOF16-NEXT:    selp.b16 %rs10, %rs7, %rs5, %p1;
613; CHECK-NOF16-NEXT:    mov.b32 %r5, {%rs10, %rs9};
614; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r5;
615; CHECK-NOF16-NEXT:    ret;
616  %cc = fcmp une <2 x half> %c, %d
617  %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b
618  ret <2 x half> %r
619}
620
621define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b,
622; CHECK-F16-LABEL: test_select_cc_f32_f16(
623; CHECK-F16:       {
624; CHECK-F16-NEXT:    .reg .pred %p<3>;
625; CHECK-F16-NEXT:    .reg .b32 %r<3>;
626; CHECK-F16-NEXT:    .reg .f32 %f<7>;
627; CHECK-F16-EMPTY:
628; CHECK-F16-NEXT:  // %bb.0:
629; CHECK-F16-NEXT:    ld.param.v2.f32 {%f3, %f4}, [test_select_cc_f32_f16_param_1];
630; CHECK-F16-NEXT:    ld.param.v2.f32 {%f1, %f2}, [test_select_cc_f32_f16_param_0];
631; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_select_cc_f32_f16_param_3];
632; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_select_cc_f32_f16_param_2];
633; CHECK-F16-NEXT:    setp.neu.f16x2 %p1|%p2, %r1, %r2;
634; CHECK-F16-NEXT:    selp.f32 %f5, %f2, %f4, %p2;
635; CHECK-F16-NEXT:    selp.f32 %f6, %f1, %f3, %p1;
636; CHECK-F16-NEXT:    st.param.v2.f32 [func_retval0], {%f6, %f5};
637; CHECK-F16-NEXT:    ret;
638;
639; CHECK-NOF16-LABEL: test_select_cc_f32_f16(
640; CHECK-NOF16:       {
641; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
642; CHECK-NOF16-NEXT:    .reg .b16 %rs<5>;
643; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
644; CHECK-NOF16-NEXT:    .reg .f32 %f<11>;
645; CHECK-NOF16-EMPTY:
646; CHECK-NOF16-NEXT:  // %bb.0:
647; CHECK-NOF16-NEXT:    ld.param.v2.f32 {%f3, %f4}, [test_select_cc_f32_f16_param_1];
648; CHECK-NOF16-NEXT:    ld.param.v2.f32 {%f1, %f2}, [test_select_cc_f32_f16_param_0];
649; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_select_cc_f32_f16_param_3];
650; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_select_cc_f32_f16_param_2];
651; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
652; CHECK-NOF16-NEXT:    cvt.f32.f16 %f5, %rs1;
653; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
654; CHECK-NOF16-NEXT:    cvt.f32.f16 %f6, %rs3;
655; CHECK-NOF16-NEXT:    setp.neu.f32 %p1, %f6, %f5;
656; CHECK-NOF16-NEXT:    cvt.f32.f16 %f7, %rs2;
657; CHECK-NOF16-NEXT:    cvt.f32.f16 %f8, %rs4;
658; CHECK-NOF16-NEXT:    setp.neu.f32 %p2, %f8, %f7;
659; CHECK-NOF16-NEXT:    selp.f32 %f9, %f2, %f4, %p2;
660; CHECK-NOF16-NEXT:    selp.f32 %f10, %f1, %f3, %p1;
661; CHECK-NOF16-NEXT:    st.param.v2.f32 [func_retval0], {%f10, %f9};
662; CHECK-NOF16-NEXT:    ret;
663                                           <2 x half> %c, <2 x half> %d) #0 {
664  %cc = fcmp une <2 x half> %c, %d
665  %r = select <2 x i1> %cc, <2 x float> %a, <2 x float> %b
666  ret <2 x float> %r
667}
668
669define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b,
670; CHECK-LABEL: test_select_cc_f16_f32(
671; CHECK:       {
672; CHECK-NEXT:    .reg .pred %p<3>;
673; CHECK-NEXT:    .reg .b16 %rs<7>;
674; CHECK-NEXT:    .reg .b32 %r<4>;
675; CHECK-NEXT:    .reg .f32 %f<5>;
676; CHECK-EMPTY:
677; CHECK-NEXT:  // %bb.0:
678; CHECK-NEXT:    ld.param.v2.f32 {%f3, %f4}, [test_select_cc_f16_f32_param_3];
679; CHECK-NEXT:    ld.param.v2.f32 {%f1, %f2}, [test_select_cc_f16_f32_param_2];
680; CHECK-NEXT:    ld.param.b32 %r2, [test_select_cc_f16_f32_param_1];
681; CHECK-NEXT:    ld.param.b32 %r1, [test_select_cc_f16_f32_param_0];
682; CHECK-NEXT:    setp.neu.f32 %p1, %f1, %f3;
683; CHECK-NEXT:    setp.neu.f32 %p2, %f2, %f4;
684; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
685; CHECK-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
686; CHECK-NEXT:    selp.b16 %rs5, %rs4, %rs2, %p2;
687; CHECK-NEXT:    selp.b16 %rs6, %rs3, %rs1, %p1;
688; CHECK-NEXT:    mov.b32 %r3, {%rs6, %rs5};
689; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
690; CHECK-NEXT:    ret;
691                                          <2 x float> %c, <2 x float> %d) #0 {
692  %cc = fcmp une <2 x float> %c, %d
693  %r = select <2 x i1> %cc, <2 x half> %a, <2 x half> %b
694  ret <2 x half> %r
695}
696
697define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 {
698; CHECK-F16-LABEL: test_fcmp_une(
699; CHECK-F16:       {
700; CHECK-F16-NEXT:    .reg .pred %p<3>;
701; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
702; CHECK-F16-NEXT:    .reg .b32 %r<3>;
703; CHECK-F16-EMPTY:
704; CHECK-F16-NEXT:  // %bb.0:
705; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fcmp_une_param_1];
706; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fcmp_une_param_0];
707; CHECK-F16-NEXT:    setp.neu.f16x2 %p1|%p2, %r1, %r2;
708; CHECK-F16-NEXT:    selp.u16 %rs1, -1, 0, %p1;
709; CHECK-F16-NEXT:    st.param.b8 [func_retval0], %rs1;
710; CHECK-F16-NEXT:    selp.u16 %rs2, -1, 0, %p2;
711; CHECK-F16-NEXT:    st.param.b8 [func_retval0+1], %rs2;
712; CHECK-F16-NEXT:    ret;
713;
714; CHECK-NOF16-LABEL: test_fcmp_une(
715; CHECK-NOF16:       {
716; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
717; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
718; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
719; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
720; CHECK-NOF16-EMPTY:
721; CHECK-NOF16-NEXT:  // %bb.0:
722; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fcmp_une_param_1];
723; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fcmp_une_param_0];
724; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
725; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
726; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
727; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
728; CHECK-NOF16-NEXT:    setp.neu.f32 %p1, %f2, %f1;
729; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
730; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs3;
731; CHECK-NOF16-NEXT:    setp.neu.f32 %p2, %f4, %f3;
732; CHECK-NOF16-NEXT:    selp.u16 %rs5, -1, 0, %p2;
733; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0], %rs5;
734; CHECK-NOF16-NEXT:    selp.u16 %rs6, -1, 0, %p1;
735; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0+1], %rs6;
736; CHECK-NOF16-NEXT:    ret;
737  %r = fcmp une <2 x half> %a, %b
738  ret <2 x i1> %r
739}
740
741define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 {
742; CHECK-F16-LABEL: test_fcmp_ueq(
743; CHECK-F16:       {
744; CHECK-F16-NEXT:    .reg .pred %p<3>;
745; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
746; CHECK-F16-NEXT:    .reg .b32 %r<3>;
747; CHECK-F16-EMPTY:
748; CHECK-F16-NEXT:  // %bb.0:
749; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fcmp_ueq_param_1];
750; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fcmp_ueq_param_0];
751; CHECK-F16-NEXT:    setp.equ.f16x2 %p1|%p2, %r1, %r2;
752; CHECK-F16-NEXT:    selp.u16 %rs1, -1, 0, %p1;
753; CHECK-F16-NEXT:    st.param.b8 [func_retval0], %rs1;
754; CHECK-F16-NEXT:    selp.u16 %rs2, -1, 0, %p2;
755; CHECK-F16-NEXT:    st.param.b8 [func_retval0+1], %rs2;
756; CHECK-F16-NEXT:    ret;
757;
758; CHECK-NOF16-LABEL: test_fcmp_ueq(
759; CHECK-NOF16:       {
760; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
761; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
762; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
763; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
764; CHECK-NOF16-EMPTY:
765; CHECK-NOF16-NEXT:  // %bb.0:
766; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fcmp_ueq_param_1];
767; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fcmp_ueq_param_0];
768; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
769; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
770; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
771; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
772; CHECK-NOF16-NEXT:    setp.equ.f32 %p1, %f2, %f1;
773; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
774; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs3;
775; CHECK-NOF16-NEXT:    setp.equ.f32 %p2, %f4, %f3;
776; CHECK-NOF16-NEXT:    selp.u16 %rs5, -1, 0, %p2;
777; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0], %rs5;
778; CHECK-NOF16-NEXT:    selp.u16 %rs6, -1, 0, %p1;
779; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0+1], %rs6;
780; CHECK-NOF16-NEXT:    ret;
781  %r = fcmp ueq <2 x half> %a, %b
782  ret <2 x i1> %r
783}
784
785define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 {
786; CHECK-F16-LABEL: test_fcmp_ugt(
787; CHECK-F16:       {
788; CHECK-F16-NEXT:    .reg .pred %p<3>;
789; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
790; CHECK-F16-NEXT:    .reg .b32 %r<3>;
791; CHECK-F16-EMPTY:
792; CHECK-F16-NEXT:  // %bb.0:
793; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fcmp_ugt_param_1];
794; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fcmp_ugt_param_0];
795; CHECK-F16-NEXT:    setp.gtu.f16x2 %p1|%p2, %r1, %r2;
796; CHECK-F16-NEXT:    selp.u16 %rs1, -1, 0, %p1;
797; CHECK-F16-NEXT:    st.param.b8 [func_retval0], %rs1;
798; CHECK-F16-NEXT:    selp.u16 %rs2, -1, 0, %p2;
799; CHECK-F16-NEXT:    st.param.b8 [func_retval0+1], %rs2;
800; CHECK-F16-NEXT:    ret;
801;
802; CHECK-NOF16-LABEL: test_fcmp_ugt(
803; CHECK-NOF16:       {
804; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
805; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
806; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
807; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
808; CHECK-NOF16-EMPTY:
809; CHECK-NOF16-NEXT:  // %bb.0:
810; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fcmp_ugt_param_1];
811; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fcmp_ugt_param_0];
812; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
813; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
814; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
815; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
816; CHECK-NOF16-NEXT:    setp.gtu.f32 %p1, %f2, %f1;
817; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
818; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs3;
819; CHECK-NOF16-NEXT:    setp.gtu.f32 %p2, %f4, %f3;
820; CHECK-NOF16-NEXT:    selp.u16 %rs5, -1, 0, %p2;
821; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0], %rs5;
822; CHECK-NOF16-NEXT:    selp.u16 %rs6, -1, 0, %p1;
823; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0+1], %rs6;
824; CHECK-NOF16-NEXT:    ret;
825  %r = fcmp ugt <2 x half> %a, %b
826  ret <2 x i1> %r
827}
828
829define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 {
830; CHECK-F16-LABEL: test_fcmp_uge(
831; CHECK-F16:       {
832; CHECK-F16-NEXT:    .reg .pred %p<3>;
833; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
834; CHECK-F16-NEXT:    .reg .b32 %r<3>;
835; CHECK-F16-EMPTY:
836; CHECK-F16-NEXT:  // %bb.0:
837; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fcmp_uge_param_1];
838; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fcmp_uge_param_0];
839; CHECK-F16-NEXT:    setp.geu.f16x2 %p1|%p2, %r1, %r2;
840; CHECK-F16-NEXT:    selp.u16 %rs1, -1, 0, %p1;
841; CHECK-F16-NEXT:    st.param.b8 [func_retval0], %rs1;
842; CHECK-F16-NEXT:    selp.u16 %rs2, -1, 0, %p2;
843; CHECK-F16-NEXT:    st.param.b8 [func_retval0+1], %rs2;
844; CHECK-F16-NEXT:    ret;
845;
846; CHECK-NOF16-LABEL: test_fcmp_uge(
847; CHECK-NOF16:       {
848; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
849; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
850; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
851; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
852; CHECK-NOF16-EMPTY:
853; CHECK-NOF16-NEXT:  // %bb.0:
854; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fcmp_uge_param_1];
855; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fcmp_uge_param_0];
856; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
857; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
858; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
859; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
860; CHECK-NOF16-NEXT:    setp.geu.f32 %p1, %f2, %f1;
861; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
862; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs3;
863; CHECK-NOF16-NEXT:    setp.geu.f32 %p2, %f4, %f3;
864; CHECK-NOF16-NEXT:    selp.u16 %rs5, -1, 0, %p2;
865; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0], %rs5;
866; CHECK-NOF16-NEXT:    selp.u16 %rs6, -1, 0, %p1;
867; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0+1], %rs6;
868; CHECK-NOF16-NEXT:    ret;
869  %r = fcmp uge <2 x half> %a, %b
870  ret <2 x i1> %r
871}
872
873define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 {
874; CHECK-F16-LABEL: test_fcmp_ult(
875; CHECK-F16:       {
876; CHECK-F16-NEXT:    .reg .pred %p<3>;
877; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
878; CHECK-F16-NEXT:    .reg .b32 %r<3>;
879; CHECK-F16-EMPTY:
880; CHECK-F16-NEXT:  // %bb.0:
881; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fcmp_ult_param_1];
882; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fcmp_ult_param_0];
883; CHECK-F16-NEXT:    setp.ltu.f16x2 %p1|%p2, %r1, %r2;
884; CHECK-F16-NEXT:    selp.u16 %rs1, -1, 0, %p1;
885; CHECK-F16-NEXT:    st.param.b8 [func_retval0], %rs1;
886; CHECK-F16-NEXT:    selp.u16 %rs2, -1, 0, %p2;
887; CHECK-F16-NEXT:    st.param.b8 [func_retval0+1], %rs2;
888; CHECK-F16-NEXT:    ret;
889;
890; CHECK-NOF16-LABEL: test_fcmp_ult(
891; CHECK-NOF16:       {
892; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
893; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
894; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
895; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
896; CHECK-NOF16-EMPTY:
897; CHECK-NOF16-NEXT:  // %bb.0:
898; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fcmp_ult_param_1];
899; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fcmp_ult_param_0];
900; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
901; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
902; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
903; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
904; CHECK-NOF16-NEXT:    setp.ltu.f32 %p1, %f2, %f1;
905; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
906; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs3;
907; CHECK-NOF16-NEXT:    setp.ltu.f32 %p2, %f4, %f3;
908; CHECK-NOF16-NEXT:    selp.u16 %rs5, -1, 0, %p2;
909; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0], %rs5;
910; CHECK-NOF16-NEXT:    selp.u16 %rs6, -1, 0, %p1;
911; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0+1], %rs6;
912; CHECK-NOF16-NEXT:    ret;
913  %r = fcmp ult <2 x half> %a, %b
914  ret <2 x i1> %r
915}
916
917define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 {
918; CHECK-F16-LABEL: test_fcmp_ule(
919; CHECK-F16:       {
920; CHECK-F16-NEXT:    .reg .pred %p<3>;
921; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
922; CHECK-F16-NEXT:    .reg .b32 %r<3>;
923; CHECK-F16-EMPTY:
924; CHECK-F16-NEXT:  // %bb.0:
925; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fcmp_ule_param_1];
926; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fcmp_ule_param_0];
927; CHECK-F16-NEXT:    setp.leu.f16x2 %p1|%p2, %r1, %r2;
928; CHECK-F16-NEXT:    selp.u16 %rs1, -1, 0, %p1;
929; CHECK-F16-NEXT:    st.param.b8 [func_retval0], %rs1;
930; CHECK-F16-NEXT:    selp.u16 %rs2, -1, 0, %p2;
931; CHECK-F16-NEXT:    st.param.b8 [func_retval0+1], %rs2;
932; CHECK-F16-NEXT:    ret;
933;
934; CHECK-NOF16-LABEL: test_fcmp_ule(
935; CHECK-NOF16:       {
936; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
937; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
938; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
939; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
940; CHECK-NOF16-EMPTY:
941; CHECK-NOF16-NEXT:  // %bb.0:
942; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fcmp_ule_param_1];
943; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fcmp_ule_param_0];
944; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
945; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
946; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
947; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
948; CHECK-NOF16-NEXT:    setp.leu.f32 %p1, %f2, %f1;
949; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
950; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs3;
951; CHECK-NOF16-NEXT:    setp.leu.f32 %p2, %f4, %f3;
952; CHECK-NOF16-NEXT:    selp.u16 %rs5, -1, 0, %p2;
953; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0], %rs5;
954; CHECK-NOF16-NEXT:    selp.u16 %rs6, -1, 0, %p1;
955; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0+1], %rs6;
956; CHECK-NOF16-NEXT:    ret;
957  %r = fcmp ule <2 x half> %a, %b
958  ret <2 x i1> %r
959}
960
961
962define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 {
963; CHECK-F16-LABEL: test_fcmp_uno(
964; CHECK-F16:       {
965; CHECK-F16-NEXT:    .reg .pred %p<3>;
966; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
967; CHECK-F16-NEXT:    .reg .b32 %r<3>;
968; CHECK-F16-EMPTY:
969; CHECK-F16-NEXT:  // %bb.0:
970; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fcmp_uno_param_1];
971; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fcmp_uno_param_0];
972; CHECK-F16-NEXT:    setp.nan.f16x2 %p1|%p2, %r1, %r2;
973; CHECK-F16-NEXT:    selp.u16 %rs1, -1, 0, %p1;
974; CHECK-F16-NEXT:    st.param.b8 [func_retval0], %rs1;
975; CHECK-F16-NEXT:    selp.u16 %rs2, -1, 0, %p2;
976; CHECK-F16-NEXT:    st.param.b8 [func_retval0+1], %rs2;
977; CHECK-F16-NEXT:    ret;
978;
979; CHECK-NOF16-LABEL: test_fcmp_uno(
980; CHECK-NOF16:       {
981; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
982; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
983; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
984; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
985; CHECK-NOF16-EMPTY:
986; CHECK-NOF16-NEXT:  // %bb.0:
987; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fcmp_uno_param_1];
988; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fcmp_uno_param_0];
989; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
990; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
991; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
992; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
993; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f2, %f1;
994; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
995; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs3;
996; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %f4, %f3;
997; CHECK-NOF16-NEXT:    selp.u16 %rs5, -1, 0, %p2;
998; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0], %rs5;
999; CHECK-NOF16-NEXT:    selp.u16 %rs6, -1, 0, %p1;
1000; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0+1], %rs6;
1001; CHECK-NOF16-NEXT:    ret;
1002  %r = fcmp uno <2 x half> %a, %b
1003  ret <2 x i1> %r
1004}
1005
1006define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 {
1007; CHECK-F16-LABEL: test_fcmp_one(
1008; CHECK-F16:       {
1009; CHECK-F16-NEXT:    .reg .pred %p<3>;
1010; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
1011; CHECK-F16-NEXT:    .reg .b32 %r<3>;
1012; CHECK-F16-EMPTY:
1013; CHECK-F16-NEXT:  // %bb.0:
1014; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fcmp_one_param_1];
1015; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fcmp_one_param_0];
1016; CHECK-F16-NEXT:    setp.ne.f16x2 %p1|%p2, %r1, %r2;
1017; CHECK-F16-NEXT:    selp.u16 %rs1, -1, 0, %p1;
1018; CHECK-F16-NEXT:    st.param.b8 [func_retval0], %rs1;
1019; CHECK-F16-NEXT:    selp.u16 %rs2, -1, 0, %p2;
1020; CHECK-F16-NEXT:    st.param.b8 [func_retval0+1], %rs2;
1021; CHECK-F16-NEXT:    ret;
1022;
1023; CHECK-NOF16-LABEL: test_fcmp_one(
1024; CHECK-NOF16:       {
1025; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
1026; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
1027; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
1028; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
1029; CHECK-NOF16-EMPTY:
1030; CHECK-NOF16-NEXT:  // %bb.0:
1031; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fcmp_one_param_1];
1032; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fcmp_one_param_0];
1033; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
1034; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1035; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
1036; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
1037; CHECK-NOF16-NEXT:    setp.ne.f32 %p1, %f2, %f1;
1038; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
1039; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs3;
1040; CHECK-NOF16-NEXT:    setp.ne.f32 %p2, %f4, %f3;
1041; CHECK-NOF16-NEXT:    selp.u16 %rs5, -1, 0, %p2;
1042; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0], %rs5;
1043; CHECK-NOF16-NEXT:    selp.u16 %rs6, -1, 0, %p1;
1044; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0+1], %rs6;
1045; CHECK-NOF16-NEXT:    ret;
1046  %r = fcmp one <2 x half> %a, %b
1047  ret <2 x i1> %r
1048}
1049
1050define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 {
1051; CHECK-F16-LABEL: test_fcmp_oeq(
1052; CHECK-F16:       {
1053; CHECK-F16-NEXT:    .reg .pred %p<3>;
1054; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
1055; CHECK-F16-NEXT:    .reg .b32 %r<3>;
1056; CHECK-F16-EMPTY:
1057; CHECK-F16-NEXT:  // %bb.0:
1058; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fcmp_oeq_param_1];
1059; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fcmp_oeq_param_0];
1060; CHECK-F16-NEXT:    setp.eq.f16x2 %p1|%p2, %r1, %r2;
1061; CHECK-F16-NEXT:    selp.u16 %rs1, -1, 0, %p1;
1062; CHECK-F16-NEXT:    st.param.b8 [func_retval0], %rs1;
1063; CHECK-F16-NEXT:    selp.u16 %rs2, -1, 0, %p2;
1064; CHECK-F16-NEXT:    st.param.b8 [func_retval0+1], %rs2;
1065; CHECK-F16-NEXT:    ret;
1066;
1067; CHECK-NOF16-LABEL: test_fcmp_oeq(
1068; CHECK-NOF16:       {
1069; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
1070; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
1071; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
1072; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
1073; CHECK-NOF16-EMPTY:
1074; CHECK-NOF16-NEXT:  // %bb.0:
1075; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fcmp_oeq_param_1];
1076; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fcmp_oeq_param_0];
1077; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
1078; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1079; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
1080; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
1081; CHECK-NOF16-NEXT:    setp.eq.f32 %p1, %f2, %f1;
1082; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
1083; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs3;
1084; CHECK-NOF16-NEXT:    setp.eq.f32 %p2, %f4, %f3;
1085; CHECK-NOF16-NEXT:    selp.u16 %rs5, -1, 0, %p2;
1086; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0], %rs5;
1087; CHECK-NOF16-NEXT:    selp.u16 %rs6, -1, 0, %p1;
1088; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0+1], %rs6;
1089; CHECK-NOF16-NEXT:    ret;
1090  %r = fcmp oeq <2 x half> %a, %b
1091  ret <2 x i1> %r
1092}
1093
1094define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 {
1095; CHECK-F16-LABEL: test_fcmp_ogt(
1096; CHECK-F16:       {
1097; CHECK-F16-NEXT:    .reg .pred %p<3>;
1098; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
1099; CHECK-F16-NEXT:    .reg .b32 %r<3>;
1100; CHECK-F16-EMPTY:
1101; CHECK-F16-NEXT:  // %bb.0:
1102; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fcmp_ogt_param_1];
1103; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fcmp_ogt_param_0];
1104; CHECK-F16-NEXT:    setp.gt.f16x2 %p1|%p2, %r1, %r2;
1105; CHECK-F16-NEXT:    selp.u16 %rs1, -1, 0, %p1;
1106; CHECK-F16-NEXT:    st.param.b8 [func_retval0], %rs1;
1107; CHECK-F16-NEXT:    selp.u16 %rs2, -1, 0, %p2;
1108; CHECK-F16-NEXT:    st.param.b8 [func_retval0+1], %rs2;
1109; CHECK-F16-NEXT:    ret;
1110;
1111; CHECK-NOF16-LABEL: test_fcmp_ogt(
1112; CHECK-NOF16:       {
1113; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
1114; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
1115; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
1116; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
1117; CHECK-NOF16-EMPTY:
1118; CHECK-NOF16-NEXT:  // %bb.0:
1119; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fcmp_ogt_param_1];
1120; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fcmp_ogt_param_0];
1121; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
1122; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1123; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
1124; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
1125; CHECK-NOF16-NEXT:    setp.gt.f32 %p1, %f2, %f1;
1126; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
1127; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs3;
1128; CHECK-NOF16-NEXT:    setp.gt.f32 %p2, %f4, %f3;
1129; CHECK-NOF16-NEXT:    selp.u16 %rs5, -1, 0, %p2;
1130; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0], %rs5;
1131; CHECK-NOF16-NEXT:    selp.u16 %rs6, -1, 0, %p1;
1132; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0+1], %rs6;
1133; CHECK-NOF16-NEXT:    ret;
1134  %r = fcmp ogt <2 x half> %a, %b
1135  ret <2 x i1> %r
1136}
1137
1138define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 {
1139; CHECK-F16-LABEL: test_fcmp_oge(
1140; CHECK-F16:       {
1141; CHECK-F16-NEXT:    .reg .pred %p<3>;
1142; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
1143; CHECK-F16-NEXT:    .reg .b32 %r<3>;
1144; CHECK-F16-EMPTY:
1145; CHECK-F16-NEXT:  // %bb.0:
1146; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fcmp_oge_param_1];
1147; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fcmp_oge_param_0];
1148; CHECK-F16-NEXT:    setp.ge.f16x2 %p1|%p2, %r1, %r2;
1149; CHECK-F16-NEXT:    selp.u16 %rs1, -1, 0, %p1;
1150; CHECK-F16-NEXT:    st.param.b8 [func_retval0], %rs1;
1151; CHECK-F16-NEXT:    selp.u16 %rs2, -1, 0, %p2;
1152; CHECK-F16-NEXT:    st.param.b8 [func_retval0+1], %rs2;
1153; CHECK-F16-NEXT:    ret;
1154;
1155; CHECK-NOF16-LABEL: test_fcmp_oge(
1156; CHECK-NOF16:       {
1157; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
1158; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
1159; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
1160; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
1161; CHECK-NOF16-EMPTY:
1162; CHECK-NOF16-NEXT:  // %bb.0:
1163; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fcmp_oge_param_1];
1164; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fcmp_oge_param_0];
1165; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
1166; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1167; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
1168; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
1169; CHECK-NOF16-NEXT:    setp.ge.f32 %p1, %f2, %f1;
1170; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
1171; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs3;
1172; CHECK-NOF16-NEXT:    setp.ge.f32 %p2, %f4, %f3;
1173; CHECK-NOF16-NEXT:    selp.u16 %rs5, -1, 0, %p2;
1174; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0], %rs5;
1175; CHECK-NOF16-NEXT:    selp.u16 %rs6, -1, 0, %p1;
1176; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0+1], %rs6;
1177; CHECK-NOF16-NEXT:    ret;
1178  %r = fcmp oge <2 x half> %a, %b
1179  ret <2 x i1> %r
1180}
1181
1182define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 {
1183; CHECK-F16-LABEL: test_fcmp_olt(
1184; CHECK-F16:       {
1185; CHECK-F16-NEXT:    .reg .pred %p<3>;
1186; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
1187; CHECK-F16-NEXT:    .reg .b32 %r<3>;
1188; CHECK-F16-EMPTY:
1189; CHECK-F16-NEXT:  // %bb.0:
1190; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fcmp_olt_param_1];
1191; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fcmp_olt_param_0];
1192; CHECK-F16-NEXT:    setp.lt.f16x2 %p1|%p2, %r1, %r2;
1193; CHECK-F16-NEXT:    selp.u16 %rs1, -1, 0, %p1;
1194; CHECK-F16-NEXT:    st.param.b8 [func_retval0], %rs1;
1195; CHECK-F16-NEXT:    selp.u16 %rs2, -1, 0, %p2;
1196; CHECK-F16-NEXT:    st.param.b8 [func_retval0+1], %rs2;
1197; CHECK-F16-NEXT:    ret;
1198;
1199; CHECK-NOF16-LABEL: test_fcmp_olt(
1200; CHECK-NOF16:       {
1201; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
1202; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
1203; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
1204; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
1205; CHECK-NOF16-EMPTY:
1206; CHECK-NOF16-NEXT:  // %bb.0:
1207; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fcmp_olt_param_1];
1208; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fcmp_olt_param_0];
1209; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
1210; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1211; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
1212; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
1213; CHECK-NOF16-NEXT:    setp.lt.f32 %p1, %f2, %f1;
1214; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
1215; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs3;
1216; CHECK-NOF16-NEXT:    setp.lt.f32 %p2, %f4, %f3;
1217; CHECK-NOF16-NEXT:    selp.u16 %rs5, -1, 0, %p2;
1218; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0], %rs5;
1219; CHECK-NOF16-NEXT:    selp.u16 %rs6, -1, 0, %p1;
1220; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0+1], %rs6;
1221; CHECK-NOF16-NEXT:    ret;
1222  %r = fcmp olt <2 x half> %a, %b
1223  ret <2 x i1> %r
1224}
1225
1226define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 {
1227; CHECK-F16-LABEL: test_fcmp_ole(
1228; CHECK-F16:       {
1229; CHECK-F16-NEXT:    .reg .pred %p<3>;
1230; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
1231; CHECK-F16-NEXT:    .reg .b32 %r<3>;
1232; CHECK-F16-EMPTY:
1233; CHECK-F16-NEXT:  // %bb.0:
1234; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fcmp_ole_param_1];
1235; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fcmp_ole_param_0];
1236; CHECK-F16-NEXT:    setp.le.f16x2 %p1|%p2, %r1, %r2;
1237; CHECK-F16-NEXT:    selp.u16 %rs1, -1, 0, %p1;
1238; CHECK-F16-NEXT:    st.param.b8 [func_retval0], %rs1;
1239; CHECK-F16-NEXT:    selp.u16 %rs2, -1, 0, %p2;
1240; CHECK-F16-NEXT:    st.param.b8 [func_retval0+1], %rs2;
1241; CHECK-F16-NEXT:    ret;
1242;
1243; CHECK-NOF16-LABEL: test_fcmp_ole(
1244; CHECK-NOF16:       {
1245; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
1246; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
1247; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
1248; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
1249; CHECK-NOF16-EMPTY:
1250; CHECK-NOF16-NEXT:  // %bb.0:
1251; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fcmp_ole_param_1];
1252; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fcmp_ole_param_0];
1253; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
1254; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1255; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
1256; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
1257; CHECK-NOF16-NEXT:    setp.le.f32 %p1, %f2, %f1;
1258; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
1259; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs3;
1260; CHECK-NOF16-NEXT:    setp.le.f32 %p2, %f4, %f3;
1261; CHECK-NOF16-NEXT:    selp.u16 %rs5, -1, 0, %p2;
1262; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0], %rs5;
1263; CHECK-NOF16-NEXT:    selp.u16 %rs6, -1, 0, %p1;
1264; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0+1], %rs6;
1265; CHECK-NOF16-NEXT:    ret;
1266  %r = fcmp ole <2 x half> %a, %b
1267  ret <2 x i1> %r
1268}
1269
1270define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 {
1271; CHECK-F16-LABEL: test_fcmp_ord(
1272; CHECK-F16:       {
1273; CHECK-F16-NEXT:    .reg .pred %p<3>;
1274; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
1275; CHECK-F16-NEXT:    .reg .b32 %r<3>;
1276; CHECK-F16-EMPTY:
1277; CHECK-F16-NEXT:  // %bb.0:
1278; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fcmp_ord_param_1];
1279; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fcmp_ord_param_0];
1280; CHECK-F16-NEXT:    setp.num.f16x2 %p1|%p2, %r1, %r2;
1281; CHECK-F16-NEXT:    selp.u16 %rs1, -1, 0, %p1;
1282; CHECK-F16-NEXT:    st.param.b8 [func_retval0], %rs1;
1283; CHECK-F16-NEXT:    selp.u16 %rs2, -1, 0, %p2;
1284; CHECK-F16-NEXT:    st.param.b8 [func_retval0+1], %rs2;
1285; CHECK-F16-NEXT:    ret;
1286;
1287; CHECK-NOF16-LABEL: test_fcmp_ord(
1288; CHECK-NOF16:       {
1289; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
1290; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
1291; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
1292; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
1293; CHECK-NOF16-EMPTY:
1294; CHECK-NOF16-NEXT:  // %bb.0:
1295; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fcmp_ord_param_1];
1296; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fcmp_ord_param_0];
1297; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
1298; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1299; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
1300; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
1301; CHECK-NOF16-NEXT:    setp.num.f32 %p1, %f2, %f1;
1302; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
1303; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs3;
1304; CHECK-NOF16-NEXT:    setp.num.f32 %p2, %f4, %f3;
1305; CHECK-NOF16-NEXT:    selp.u16 %rs5, -1, 0, %p2;
1306; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0], %rs5;
1307; CHECK-NOF16-NEXT:    selp.u16 %rs6, -1, 0, %p1;
1308; CHECK-NOF16-NEXT:    st.param.b8 [func_retval0+1], %rs6;
1309; CHECK-NOF16-NEXT:    ret;
1310  %r = fcmp ord <2 x half> %a, %b
1311  ret <2 x i1> %r
1312}
1313
1314define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 {
1315; CHECK-LABEL: test_fptosi_i32(
1316; CHECK:       {
1317; CHECK-NEXT:    .reg .b16 %rs<3>;
1318; CHECK-NEXT:    .reg .b32 %r<4>;
1319; CHECK-EMPTY:
1320; CHECK-NEXT:  // %bb.0:
1321; CHECK-NEXT:    ld.param.b32 %r1, [test_fptosi_i32_param_0];
1322; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
1323; CHECK-NEXT:    cvt.rzi.s32.f16 %r2, %rs2;
1324; CHECK-NEXT:    cvt.rzi.s32.f16 %r3, %rs1;
1325; CHECK-NEXT:    st.param.v2.b32 [func_retval0], {%r3, %r2};
1326; CHECK-NEXT:    ret;
1327  %r = fptosi <2 x half> %a to <2 x i32>
1328  ret <2 x i32> %r
1329}
1330
1331define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 {
1332; CHECK-LABEL: test_fptosi_i64(
1333; CHECK:       {
1334; CHECK-NEXT:    .reg .b16 %rs<3>;
1335; CHECK-NEXT:    .reg .b32 %r<2>;
1336; CHECK-NEXT:    .reg .b64 %rd<3>;
1337; CHECK-EMPTY:
1338; CHECK-NEXT:  // %bb.0:
1339; CHECK-NEXT:    ld.param.b32 %r1, [test_fptosi_i64_param_0];
1340; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
1341; CHECK-NEXT:    cvt.rzi.s64.f16 %rd1, %rs2;
1342; CHECK-NEXT:    cvt.rzi.s64.f16 %rd2, %rs1;
1343; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd2, %rd1};
1344; CHECK-NEXT:    ret;
1345  %r = fptosi <2 x half> %a to <2 x i64>
1346  ret <2 x i64> %r
1347}
1348
1349define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 {
1350; CHECK-LABEL: test_fptoui_2xi32(
1351; CHECK:       {
1352; CHECK-NEXT:    .reg .b16 %rs<3>;
1353; CHECK-NEXT:    .reg .b32 %r<4>;
1354; CHECK-EMPTY:
1355; CHECK-NEXT:  // %bb.0:
1356; CHECK-NEXT:    ld.param.b32 %r1, [test_fptoui_2xi32_param_0];
1357; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
1358; CHECK-NEXT:    cvt.rzi.u32.f16 %r2, %rs2;
1359; CHECK-NEXT:    cvt.rzi.u32.f16 %r3, %rs1;
1360; CHECK-NEXT:    st.param.v2.b32 [func_retval0], {%r3, %r2};
1361; CHECK-NEXT:    ret;
1362  %r = fptoui <2 x half> %a to <2 x i32>
1363  ret <2 x i32> %r
1364}
1365
1366define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 {
1367; CHECK-LABEL: test_fptoui_2xi64(
1368; CHECK:       {
1369; CHECK-NEXT:    .reg .b16 %rs<3>;
1370; CHECK-NEXT:    .reg .b32 %r<2>;
1371; CHECK-NEXT:    .reg .b64 %rd<3>;
1372; CHECK-EMPTY:
1373; CHECK-NEXT:  // %bb.0:
1374; CHECK-NEXT:    ld.param.b32 %r1, [test_fptoui_2xi64_param_0];
1375; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
1376; CHECK-NEXT:    cvt.rzi.u64.f16 %rd1, %rs2;
1377; CHECK-NEXT:    cvt.rzi.u64.f16 %rd2, %rs1;
1378; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd2, %rd1};
1379; CHECK-NEXT:    ret;
1380  %r = fptoui <2 x half> %a to <2 x i64>
1381  ret <2 x i64> %r
1382}
1383
1384define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 {
1385; CHECK-LABEL: test_uitofp_2xi32(
1386; CHECK:       {
1387; CHECK-NEXT:    .reg .b16 %rs<3>;
1388; CHECK-NEXT:    .reg .b32 %r<4>;
1389; CHECK-EMPTY:
1390; CHECK-NEXT:  // %bb.0:
1391; CHECK-NEXT:    ld.param.v2.u32 {%r1, %r2}, [test_uitofp_2xi32_param_0];
1392; CHECK-NEXT:    cvt.rn.f16.u32 %rs1, %r2;
1393; CHECK-NEXT:    cvt.rn.f16.u32 %rs2, %r1;
1394; CHECK-NEXT:    mov.b32 %r3, {%rs2, %rs1};
1395; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
1396; CHECK-NEXT:    ret;
1397  %r = uitofp <2 x i32> %a to <2 x half>
1398  ret <2 x half> %r
1399}
1400
1401define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 {
1402; CHECK-LABEL: test_uitofp_2xi64(
1403; CHECK:       {
1404; CHECK-NEXT:    .reg .b16 %rs<3>;
1405; CHECK-NEXT:    .reg .b32 %r<2>;
1406; CHECK-NEXT:    .reg .b64 %rd<3>;
1407; CHECK-EMPTY:
1408; CHECK-NEXT:  // %bb.0:
1409; CHECK-NEXT:    ld.param.v2.u64 {%rd1, %rd2}, [test_uitofp_2xi64_param_0];
1410; CHECK-NEXT:    cvt.rn.f16.u64 %rs1, %rd2;
1411; CHECK-NEXT:    cvt.rn.f16.u64 %rs2, %rd1;
1412; CHECK-NEXT:    mov.b32 %r1, {%rs2, %rs1};
1413; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
1414; CHECK-NEXT:    ret;
1415  %r = uitofp <2 x i64> %a to <2 x half>
1416  ret <2 x half> %r
1417}
1418
1419define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 {
1420; CHECK-LABEL: test_sitofp_2xi32(
1421; CHECK:       {
1422; CHECK-NEXT:    .reg .b16 %rs<3>;
1423; CHECK-NEXT:    .reg .b32 %r<4>;
1424; CHECK-EMPTY:
1425; CHECK-NEXT:  // %bb.0:
1426; CHECK-NEXT:    ld.param.v2.u32 {%r1, %r2}, [test_sitofp_2xi32_param_0];
1427; CHECK-NEXT:    cvt.rn.f16.s32 %rs1, %r2;
1428; CHECK-NEXT:    cvt.rn.f16.s32 %rs2, %r1;
1429; CHECK-NEXT:    mov.b32 %r3, {%rs2, %rs1};
1430; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
1431; CHECK-NEXT:    ret;
1432  %r = sitofp <2 x i32> %a to <2 x half>
1433  ret <2 x half> %r
1434}
1435
1436define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 {
1437; CHECK-LABEL: test_sitofp_2xi64(
1438; CHECK:       {
1439; CHECK-NEXT:    .reg .b16 %rs<3>;
1440; CHECK-NEXT:    .reg .b32 %r<2>;
1441; CHECK-NEXT:    .reg .b64 %rd<3>;
1442; CHECK-EMPTY:
1443; CHECK-NEXT:  // %bb.0:
1444; CHECK-NEXT:    ld.param.v2.u64 {%rd1, %rd2}, [test_sitofp_2xi64_param_0];
1445; CHECK-NEXT:    cvt.rn.f16.s64 %rs1, %rd2;
1446; CHECK-NEXT:    cvt.rn.f16.s64 %rs2, %rd1;
1447; CHECK-NEXT:    mov.b32 %r1, {%rs2, %rs1};
1448; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
1449; CHECK-NEXT:    ret;
1450  %r = sitofp <2 x i64> %a to <2 x half>
1451  ret <2 x half> %r
1452}
1453
1454
1455define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
1456; CHECK-F16-LABEL: test_uitofp_2xi32_fadd(
1457; CHECK-F16:       {
1458; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
1459; CHECK-F16-NEXT:    .reg .b32 %r<6>;
1460; CHECK-F16-EMPTY:
1461; CHECK-F16-NEXT:  // %bb.0:
1462; CHECK-F16-NEXT:    ld.param.v2.u32 {%r1, %r2}, [test_uitofp_2xi32_fadd_param_0];
1463; CHECK-F16-NEXT:    ld.param.b32 %r3, [test_uitofp_2xi32_fadd_param_1];
1464; CHECK-F16-NEXT:    cvt.rn.f16.u32 %rs1, %r2;
1465; CHECK-F16-NEXT:    cvt.rn.f16.u32 %rs2, %r1;
1466; CHECK-F16-NEXT:    mov.b32 %r4, {%rs2, %rs1};
1467; CHECK-F16-NEXT:    add.rn.f16x2 %r5, %r3, %r4;
1468; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r5;
1469; CHECK-F16-NEXT:    ret;
1470;
1471; CHECK-NOF16-LABEL: test_uitofp_2xi32_fadd(
1472; CHECK-NOF16:       {
1473; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
1474; CHECK-NOF16-NEXT:    .reg .b32 %r<5>;
1475; CHECK-NOF16-NEXT:    .reg .f32 %f<7>;
1476; CHECK-NOF16-EMPTY:
1477; CHECK-NOF16-NEXT:  // %bb.0:
1478; CHECK-NOF16-NEXT:    ld.param.v2.u32 {%r1, %r2}, [test_uitofp_2xi32_fadd_param_0];
1479; CHECK-NOF16-NEXT:    ld.param.b32 %r3, [test_uitofp_2xi32_fadd_param_1];
1480; CHECK-NOF16-NEXT:    cvt.rn.f16.u32 %rs1, %r1;
1481; CHECK-NOF16-NEXT:    cvt.rn.f16.u32 %rs2, %r2;
1482; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1483; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r3;
1484; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
1485; CHECK-NOF16-NEXT:    add.rn.f32 %f3, %f2, %f1;
1486; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs5, %f3;
1487; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs1;
1488; CHECK-NOF16-NEXT:    cvt.f32.f16 %f5, %rs3;
1489; CHECK-NOF16-NEXT:    add.rn.f32 %f6, %f5, %f4;
1490; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs6, %f6;
1491; CHECK-NOF16-NEXT:    mov.b32 %r4, {%rs6, %rs5};
1492; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r4;
1493; CHECK-NOF16-NEXT:    ret;
1494  %c = uitofp <2 x i32> %a to <2 x half>
1495  %r = fadd <2 x half> %b, %c
1496  ret <2 x half> %r
1497}
1498
1499define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 {
1500; CHECK-F16-LABEL: test_sitofp_2xi32_fadd(
1501; CHECK-F16:       {
1502; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
1503; CHECK-F16-NEXT:    .reg .b32 %r<6>;
1504; CHECK-F16-EMPTY:
1505; CHECK-F16-NEXT:  // %bb.0:
1506; CHECK-F16-NEXT:    ld.param.v2.u32 {%r1, %r2}, [test_sitofp_2xi32_fadd_param_0];
1507; CHECK-F16-NEXT:    ld.param.b32 %r3, [test_sitofp_2xi32_fadd_param_1];
1508; CHECK-F16-NEXT:    cvt.rn.f16.s32 %rs1, %r2;
1509; CHECK-F16-NEXT:    cvt.rn.f16.s32 %rs2, %r1;
1510; CHECK-F16-NEXT:    mov.b32 %r4, {%rs2, %rs1};
1511; CHECK-F16-NEXT:    add.rn.f16x2 %r5, %r3, %r4;
1512; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r5;
1513; CHECK-F16-NEXT:    ret;
1514;
1515; CHECK-NOF16-LABEL: test_sitofp_2xi32_fadd(
1516; CHECK-NOF16:       {
1517; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
1518; CHECK-NOF16-NEXT:    .reg .b32 %r<5>;
1519; CHECK-NOF16-NEXT:    .reg .f32 %f<7>;
1520; CHECK-NOF16-EMPTY:
1521; CHECK-NOF16-NEXT:  // %bb.0:
1522; CHECK-NOF16-NEXT:    ld.param.v2.u32 {%r1, %r2}, [test_sitofp_2xi32_fadd_param_0];
1523; CHECK-NOF16-NEXT:    ld.param.b32 %r3, [test_sitofp_2xi32_fadd_param_1];
1524; CHECK-NOF16-NEXT:    cvt.rn.f16.s32 %rs1, %r1;
1525; CHECK-NOF16-NEXT:    cvt.rn.f16.s32 %rs2, %r2;
1526; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1527; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r3;
1528; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
1529; CHECK-NOF16-NEXT:    add.rn.f32 %f3, %f2, %f1;
1530; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs5, %f3;
1531; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs1;
1532; CHECK-NOF16-NEXT:    cvt.f32.f16 %f5, %rs3;
1533; CHECK-NOF16-NEXT:    add.rn.f32 %f6, %f5, %f4;
1534; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs6, %f6;
1535; CHECK-NOF16-NEXT:    mov.b32 %r4, {%rs6, %rs5};
1536; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r4;
1537; CHECK-NOF16-NEXT:    ret;
1538  %c = sitofp <2 x i32> %a to <2 x half>
1539  %r = fadd <2 x half> %b, %c
1540  ret <2 x half> %r
1541}
1542
1543define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 {
1544; CHECK-LABEL: test_fptrunc_2xfloat(
1545; CHECK:       {
1546; CHECK-NEXT:    .reg .b16 %rs<3>;
1547; CHECK-NEXT:    .reg .b32 %r<2>;
1548; CHECK-NEXT:    .reg .f32 %f<3>;
1549; CHECK-EMPTY:
1550; CHECK-NEXT:  // %bb.0:
1551; CHECK-NEXT:    ld.param.v2.f32 {%f1, %f2}, [test_fptrunc_2xfloat_param_0];
1552; CHECK-NEXT:    cvt.rn.f16.f32 %rs1, %f2;
1553; CHECK-NEXT:    cvt.rn.f16.f32 %rs2, %f1;
1554; CHECK-NEXT:    mov.b32 %r1, {%rs2, %rs1};
1555; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
1556; CHECK-NEXT:    ret;
1557  %r = fptrunc <2 x float> %a to <2 x half>
1558  ret <2 x half> %r
1559}
1560
1561define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 {
1562; CHECK-LABEL: test_fptrunc_2xdouble(
1563; CHECK:       {
1564; CHECK-NEXT:    .reg .b16 %rs<3>;
1565; CHECK-NEXT:    .reg .b32 %r<2>;
1566; CHECK-NEXT:    .reg .f64 %fd<3>;
1567; CHECK-EMPTY:
1568; CHECK-NEXT:  // %bb.0:
1569; CHECK-NEXT:    ld.param.v2.f64 {%fd1, %fd2}, [test_fptrunc_2xdouble_param_0];
1570; CHECK-NEXT:    cvt.rn.f16.f64 %rs1, %fd2;
1571; CHECK-NEXT:    cvt.rn.f16.f64 %rs2, %fd1;
1572; CHECK-NEXT:    mov.b32 %r1, {%rs2, %rs1};
1573; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
1574; CHECK-NEXT:    ret;
1575  %r = fptrunc <2 x double> %a to <2 x half>
1576  ret <2 x half> %r
1577}
1578
1579define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 {
1580; CHECK-LABEL: test_fpext_2xfloat(
1581; CHECK:       {
1582; CHECK-NEXT:    .reg .b16 %rs<3>;
1583; CHECK-NEXT:    .reg .b32 %r<2>;
1584; CHECK-NEXT:    .reg .f32 %f<3>;
1585; CHECK-EMPTY:
1586; CHECK-NEXT:  // %bb.0:
1587; CHECK-NEXT:    ld.param.b32 %r1, [test_fpext_2xfloat_param_0];
1588; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
1589; CHECK-NEXT:    cvt.f32.f16 %f1, %rs2;
1590; CHECK-NEXT:    cvt.f32.f16 %f2, %rs1;
1591; CHECK-NEXT:    st.param.v2.f32 [func_retval0], {%f2, %f1};
1592; CHECK-NEXT:    ret;
1593  %r = fpext <2 x half> %a to <2 x float>
1594  ret <2 x float> %r
1595}
1596
1597define <2 x double> @test_fpext_2xdouble(<2 x half> %a) #0 {
1598; CHECK-LABEL: test_fpext_2xdouble(
1599; CHECK:       {
1600; CHECK-NEXT:    .reg .b16 %rs<3>;
1601; CHECK-NEXT:    .reg .b32 %r<2>;
1602; CHECK-NEXT:    .reg .f64 %fd<3>;
1603; CHECK-EMPTY:
1604; CHECK-NEXT:  // %bb.0:
1605; CHECK-NEXT:    ld.param.b32 %r1, [test_fpext_2xdouble_param_0];
1606; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
1607; CHECK-NEXT:    cvt.f64.f16 %fd1, %rs2;
1608; CHECK-NEXT:    cvt.f64.f16 %fd2, %rs1;
1609; CHECK-NEXT:    st.param.v2.f64 [func_retval0], {%fd2, %fd1};
1610; CHECK-NEXT:    ret;
1611  %r = fpext <2 x half> %a to <2 x double>
1612  ret <2 x double> %r
1613}
1614
1615
1616define <2 x i16> @test_bitcast_2xhalf_to_2xi16(<2 x half> %a) #0 {
1617; CHECK-LABEL: test_bitcast_2xhalf_to_2xi16(
1618; CHECK:       {
1619; CHECK-NEXT:    .reg .b32 %r<2>;
1620; CHECK-EMPTY:
1621; CHECK-NEXT:  // %bb.0:
1622; CHECK-NEXT:    ld.param.u32 %r1, [test_bitcast_2xhalf_to_2xi16_param_0];
1623; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
1624; CHECK-NEXT:    ret;
1625  %r = bitcast <2 x half> %a to <2 x i16>
1626  ret <2 x i16> %r
1627}
1628
1629define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 {
1630; CHECK-LABEL: test_bitcast_2xi16_to_2xhalf(
1631; CHECK:       {
1632; CHECK-NEXT:    .reg .b32 %r<2>;
1633; CHECK-EMPTY:
1634; CHECK-NEXT:  // %bb.0:
1635; CHECK-NEXT:    ld.param.u32 %r1, [test_bitcast_2xi16_to_2xhalf_param_0];
1636; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
1637; CHECK-NEXT:    ret;
1638  %r = bitcast <2 x i16> %a to <2 x half>
1639  ret <2 x half> %r
1640}
1641
1642define <2 x half> @test_bitcast_float_to_2xhalf(float %a) #0 {
1643; CHECK-LABEL: test_bitcast_float_to_2xhalf(
1644; CHECK:       {
1645; CHECK-NEXT:    .reg .b32 %r<2>;
1646; CHECK-NEXT:    .reg .f32 %f<2>;
1647; CHECK-EMPTY:
1648; CHECK-NEXT:  // %bb.0:
1649; CHECK-NEXT:    ld.param.f32 %f1, [test_bitcast_float_to_2xhalf_param_0];
1650; CHECK-NEXT:    mov.b32 %r1, %f1;
1651; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
1652; CHECK-NEXT:    ret;
1653  %r = bitcast float %a to <2 x half>
1654  ret <2 x half> %r
1655}
1656
1657define float @test_bitcast_2xhalf_to_float(<2 x half> %a) #0 {
1658; CHECK-LABEL: test_bitcast_2xhalf_to_float(
1659; CHECK:       {
1660; CHECK-NEXT:    .reg .b32 %r<2>;
1661; CHECK-NEXT:    .reg .f32 %f<2>;
1662; CHECK-EMPTY:
1663; CHECK-NEXT:  // %bb.0:
1664; CHECK-NEXT:    ld.param.u32 %r1, [test_bitcast_2xhalf_to_float_param_0];
1665; CHECK-NEXT:    mov.b32 %f1, %r1;
1666; CHECK-NEXT:    st.param.f32 [func_retval0], %f1;
1667; CHECK-NEXT:    ret;
1668  %r = bitcast <2 x half> %a to float
1669  ret float %r
1670}
1671
1672declare <2 x half> @llvm.sqrt.f16(<2 x half> %a) #0
1673declare <2 x half> @llvm.powi.f16.i32(<2 x half> %a, <2 x i32> %b) #0
1674declare <2 x half> @llvm.sin.f16(<2 x half> %a) #0
1675declare <2 x half> @llvm.cos.f16(<2 x half> %a) #0
1676declare <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b) #0
1677declare <2 x half> @llvm.exp.f16(<2 x half> %a) #0
1678declare <2 x half> @llvm.exp2.f16(<2 x half> %a) #0
1679declare <2 x half> @llvm.log.f16(<2 x half> %a) #0
1680declare <2 x half> @llvm.log10.f16(<2 x half> %a) #0
1681declare <2 x half> @llvm.log2.f16(<2 x half> %a) #0
1682declare <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
1683declare <2 x half> @llvm.fabs.f16(<2 x half> %a) #0
1684declare <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b) #0
1685declare <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b) #0
1686declare <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b) #0
1687declare <2 x half> @llvm.floor.f16(<2 x half> %a) #0
1688declare <2 x half> @llvm.ceil.f16(<2 x half> %a) #0
1689declare <2 x half> @llvm.trunc.f16(<2 x half> %a) #0
1690declare <2 x half> @llvm.rint.f16(<2 x half> %a) #0
1691declare <2 x half> @llvm.nearbyint.f16(<2 x half> %a) #0
1692declare <2 x half> @llvm.round.f16(<2 x half> %a) #0
1693declare <2 x half> @llvm.roundeven.f16(<2 x half> %a) #0
1694declare <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0
1695
1696define <2 x half> @test_sqrt(<2 x half> %a) #0 {
1697; CHECK-LABEL: test_sqrt(
1698; CHECK:       {
1699; CHECK-NEXT:    .reg .b16 %rs<5>;
1700; CHECK-NEXT:    .reg .b32 %r<3>;
1701; CHECK-NEXT:    .reg .f32 %f<5>;
1702; CHECK-EMPTY:
1703; CHECK-NEXT:  // %bb.0:
1704; CHECK-NEXT:    ld.param.b32 %r1, [test_sqrt_param_0];
1705; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
1706; CHECK-NEXT:    cvt.f32.f16 %f1, %rs2;
1707; CHECK-NEXT:    sqrt.rn.f32 %f2, %f1;
1708; CHECK-NEXT:    cvt.rn.f16.f32 %rs3, %f2;
1709; CHECK-NEXT:    cvt.f32.f16 %f3, %rs1;
1710; CHECK-NEXT:    sqrt.rn.f32 %f4, %f3;
1711; CHECK-NEXT:    cvt.rn.f16.f32 %rs4, %f4;
1712; CHECK-NEXT:    mov.b32 %r2, {%rs4, %rs3};
1713; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
1714; CHECK-NEXT:    ret;
1715  %r = call <2 x half> @llvm.sqrt.f16(<2 x half> %a)
1716  ret <2 x half> %r
1717}
1718
1719;;; Can't do this yet: requires libcall.
1720; XCHECK-LABEL: test_powi(
1721;define <2 x half> @test_powi(<2 x half> %a, <2 x i32> %b) #0 {
1722;  %r = call <2 x half> @llvm.powi.f16.i32(<2 x half> %a, <2 x i32> %b)
1723;  ret <2 x half> %r
1724;}
1725
1726define <2 x half> @test_sin(<2 x half> %a) #0 #1 {
1727; CHECK-LABEL: test_sin(
1728; CHECK:       {
1729; CHECK-NEXT:    .reg .b16 %rs<5>;
1730; CHECK-NEXT:    .reg .b32 %r<3>;
1731; CHECK-NEXT:    .reg .f32 %f<5>;
1732; CHECK-EMPTY:
1733; CHECK-NEXT:  // %bb.0:
1734; CHECK-NEXT:    ld.param.b32 %r1, [test_sin_param_0];
1735; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
1736; CHECK-NEXT:    cvt.f32.f16 %f1, %rs2;
1737; CHECK-NEXT:    sin.approx.f32 %f2, %f1;
1738; CHECK-NEXT:    cvt.rn.f16.f32 %rs3, %f2;
1739; CHECK-NEXT:    cvt.f32.f16 %f3, %rs1;
1740; CHECK-NEXT:    sin.approx.f32 %f4, %f3;
1741; CHECK-NEXT:    cvt.rn.f16.f32 %rs4, %f4;
1742; CHECK-NEXT:    mov.b32 %r2, {%rs4, %rs3};
1743; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
1744; CHECK-NEXT:    ret;
1745  %r = call <2 x half> @llvm.sin.f16(<2 x half> %a)
1746  ret <2 x half> %r
1747}
1748
1749define <2 x half> @test_cos(<2 x half> %a) #0 #1 {
1750; CHECK-LABEL: test_cos(
1751; CHECK:       {
1752; CHECK-NEXT:    .reg .b16 %rs<5>;
1753; CHECK-NEXT:    .reg .b32 %r<3>;
1754; CHECK-NEXT:    .reg .f32 %f<5>;
1755; CHECK-EMPTY:
1756; CHECK-NEXT:  // %bb.0:
1757; CHECK-NEXT:    ld.param.b32 %r1, [test_cos_param_0];
1758; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
1759; CHECK-NEXT:    cvt.f32.f16 %f1, %rs2;
1760; CHECK-NEXT:    cos.approx.f32 %f2, %f1;
1761; CHECK-NEXT:    cvt.rn.f16.f32 %rs3, %f2;
1762; CHECK-NEXT:    cvt.f32.f16 %f3, %rs1;
1763; CHECK-NEXT:    cos.approx.f32 %f4, %f3;
1764; CHECK-NEXT:    cvt.rn.f16.f32 %rs4, %f4;
1765; CHECK-NEXT:    mov.b32 %r2, {%rs4, %rs3};
1766; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
1767; CHECK-NEXT:    ret;
1768  %r = call <2 x half> @llvm.cos.f16(<2 x half> %a)
1769  ret <2 x half> %r
1770}
1771
1772;;; Can't do this yet: requires libcall.
1773; XCHECK-LABEL: test_pow(
1774;define <2 x half> @test_pow(<2 x half> %a, <2 x half> %b) #0 {
1775;  %r = call <2 x half> @llvm.pow.f16(<2 x half> %a, <2 x half> %b)
1776;  ret <2 x half> %r
1777;}
1778
1779;;; Can't do this yet: requires libcall.
1780; XCHECK-LABEL: test_exp(
1781;define <2 x half> @test_exp(<2 x half> %a) #0 {
1782;  %r = call <2 x half> @llvm.exp.f16(<2 x half> %a)
1783;  ret <2 x half> %r
1784;}
1785
1786;;; Can't do this yet: requires libcall.
1787; XCHECK-LABEL: test_exp2(
1788;define <2 x half> @test_exp2(<2 x half> %a) #0 {
1789;  %r = call <2 x half> @llvm.exp2.f16(<2 x half> %a)
1790;  ret <2 x half> %r
1791;}
1792
1793;;; Can't do this yet: requires libcall.
1794; XCHECK-LABEL: test_log(
1795;define <2 x half> @test_log(<2 x half> %a) #0 {
1796;  %r = call <2 x half> @llvm.log.f16(<2 x half> %a)
1797;  ret <2 x half> %r
1798;}
1799
1800;;; Can't do this yet: requires libcall.
1801; XCHECK-LABEL: test_log10(
1802;define <2 x half> @test_log10(<2 x half> %a) #0 {
1803;  %r = call <2 x half> @llvm.log10.f16(<2 x half> %a)
1804;  ret <2 x half> %r
1805;}
1806
1807;;; Can't do this yet: requires libcall.
1808; XCHECK-LABEL: test_log2(
1809;define <2 x half> @test_log2(<2 x half> %a) #0 {
1810;  %r = call <2 x half> @llvm.log2.f16(<2 x half> %a)
1811;  ret <2 x half> %r
1812;}
1813
1814
1815define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
1816; CHECK-F16-LABEL: test_fma(
1817; CHECK-F16:       {
1818; CHECK-F16-NEXT:    .reg .b32 %r<5>;
1819; CHECK-F16-EMPTY:
1820; CHECK-F16-NEXT:  // %bb.0:
1821; CHECK-F16-NEXT:    ld.param.b32 %r3, [test_fma_param_2];
1822; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fma_param_1];
1823; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fma_param_0];
1824; CHECK-F16-NEXT:    fma.rn.f16x2 %r4, %r1, %r2, %r3;
1825; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r4;
1826; CHECK-F16-NEXT:    ret;
1827;
1828; CHECK-NOF16-LABEL: test_fma(
1829; CHECK-NOF16:       {
1830; CHECK-NOF16-NEXT:    .reg .b16 %rs<9>;
1831; CHECK-NOF16-NEXT:    .reg .b32 %r<5>;
1832; CHECK-NOF16-NEXT:    .reg .f32 %f<9>;
1833; CHECK-NOF16-EMPTY:
1834; CHECK-NOF16-NEXT:  // %bb.0:
1835; CHECK-NOF16-NEXT:    ld.param.b32 %r3, [test_fma_param_2];
1836; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fma_param_1];
1837; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fma_param_0];
1838; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
1839; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1840; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r2;
1841; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
1842; CHECK-NOF16-NEXT:    mov.b32 {%rs5, %rs6}, %r1;
1843; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs6;
1844; CHECK-NOF16-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
1845; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs7, %f4;
1846; CHECK-NOF16-NEXT:    cvt.f32.f16 %f5, %rs1;
1847; CHECK-NOF16-NEXT:    cvt.f32.f16 %f6, %rs3;
1848; CHECK-NOF16-NEXT:    cvt.f32.f16 %f7, %rs5;
1849; CHECK-NOF16-NEXT:    fma.rn.f32 %f8, %f7, %f6, %f5;
1850; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs8, %f8;
1851; CHECK-NOF16-NEXT:    mov.b32 %r4, {%rs8, %rs7};
1852; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r4;
1853; CHECK-NOF16-NEXT:    ret;
1854  %r = call <2 x half> @llvm.fma.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
1855  ret <2 x half> %r
1856}
1857
1858define <2 x half> @test_fabs(<2 x half> %a) #0 {
1859; CHECK-F16-LABEL: test_fabs(
1860; CHECK-F16:       {
1861; CHECK-F16-NEXT:    .reg .b32 %r<3>;
1862; CHECK-F16-EMPTY:
1863; CHECK-F16-NEXT:  // %bb.0:
1864; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fabs_param_0];
1865; CHECK-F16-NEXT:    and.b32 %r2, %r1, 2147450879;
1866; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r2;
1867; CHECK-F16-NEXT:    ret;
1868;
1869; CHECK-NOF16-LABEL: test_fabs(
1870; CHECK-NOF16:       {
1871; CHECK-NOF16-NEXT:    .reg .b16 %rs<5>;
1872; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
1873; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
1874; CHECK-NOF16-EMPTY:
1875; CHECK-NOF16-NEXT:  // %bb.0:
1876; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fabs_param_0];
1877; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
1878; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1879; CHECK-NOF16-NEXT:    abs.f32 %f2, %f1;
1880; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs3, %f2;
1881; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs1;
1882; CHECK-NOF16-NEXT:    abs.f32 %f4, %f3;
1883; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs4, %f4;
1884; CHECK-NOF16-NEXT:    mov.b32 %r2, {%rs4, %rs3};
1885; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r2;
1886; CHECK-NOF16-NEXT:    ret;
1887  %r = call <2 x half> @llvm.fabs.f16(<2 x half> %a)
1888  ret <2 x half> %r
1889}
1890
1891define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 {
1892; CHECK-LABEL: test_minnum(
1893; CHECK:       {
1894; CHECK-NEXT:    .reg .b16 %rs<7>;
1895; CHECK-NEXT:    .reg .b32 %r<4>;
1896; CHECK-NEXT:    .reg .f32 %f<7>;
1897; CHECK-EMPTY:
1898; CHECK-NEXT:  // %bb.0:
1899; CHECK-NEXT:    ld.param.b32 %r2, [test_minnum_param_1];
1900; CHECK-NEXT:    ld.param.b32 %r1, [test_minnum_param_0];
1901; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
1902; CHECK-NEXT:    cvt.f32.f16 %f1, %rs2;
1903; CHECK-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
1904; CHECK-NEXT:    cvt.f32.f16 %f2, %rs4;
1905; CHECK-NEXT:    min.f32 %f3, %f2, %f1;
1906; CHECK-NEXT:    cvt.rn.f16.f32 %rs5, %f3;
1907; CHECK-NEXT:    cvt.f32.f16 %f4, %rs1;
1908; CHECK-NEXT:    cvt.f32.f16 %f5, %rs3;
1909; CHECK-NEXT:    min.f32 %f6, %f5, %f4;
1910; CHECK-NEXT:    cvt.rn.f16.f32 %rs6, %f6;
1911; CHECK-NEXT:    mov.b32 %r3, {%rs6, %rs5};
1912; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
1913; CHECK-NEXT:    ret;
1914  %r = call <2 x half> @llvm.minnum.f16(<2 x half> %a, <2 x half> %b)
1915  ret <2 x half> %r
1916}
1917
1918define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 {
1919; CHECK-LABEL: test_maxnum(
1920; CHECK:       {
1921; CHECK-NEXT:    .reg .b16 %rs<7>;
1922; CHECK-NEXT:    .reg .b32 %r<4>;
1923; CHECK-NEXT:    .reg .f32 %f<7>;
1924; CHECK-EMPTY:
1925; CHECK-NEXT:  // %bb.0:
1926; CHECK-NEXT:    ld.param.b32 %r2, [test_maxnum_param_1];
1927; CHECK-NEXT:    ld.param.b32 %r1, [test_maxnum_param_0];
1928; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
1929; CHECK-NEXT:    cvt.f32.f16 %f1, %rs2;
1930; CHECK-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
1931; CHECK-NEXT:    cvt.f32.f16 %f2, %rs4;
1932; CHECK-NEXT:    max.f32 %f3, %f2, %f1;
1933; CHECK-NEXT:    cvt.rn.f16.f32 %rs5, %f3;
1934; CHECK-NEXT:    cvt.f32.f16 %f4, %rs1;
1935; CHECK-NEXT:    cvt.f32.f16 %f5, %rs3;
1936; CHECK-NEXT:    max.f32 %f6, %f5, %f4;
1937; CHECK-NEXT:    cvt.rn.f16.f32 %rs6, %f6;
1938; CHECK-NEXT:    mov.b32 %r3, {%rs6, %rs5};
1939; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
1940; CHECK-NEXT:    ret;
1941  %r = call <2 x half> @llvm.maxnum.f16(<2 x half> %a, <2 x half> %b)
1942  ret <2 x half> %r
1943}
1944
1945define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 {
1946; CHECK-F16-LABEL: test_copysign(
1947; CHECK-F16:       {
1948; CHECK-F16-NEXT:    .reg .b32 %r<6>;
1949; CHECK-F16-EMPTY:
1950; CHECK-F16-NEXT:  // %bb.0:
1951; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_copysign_param_1];
1952; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_copysign_param_0];
1953; CHECK-F16-NEXT:    and.b32 %r3, %r2, -2147450880;
1954; CHECK-F16-NEXT:    and.b32 %r4, %r1, 2147450879;
1955; CHECK-F16-NEXT:    or.b32 %r5, %r4, %r3;
1956; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r5;
1957; CHECK-F16-NEXT:    ret;
1958;
1959; CHECK-NOF16-LABEL: test_copysign(
1960; CHECK-NOF16:       {
1961; CHECK-NOF16-NEXT:    .reg .b16 %rs<11>;
1962; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
1963; CHECK-NOF16-EMPTY:
1964; CHECK-NOF16-NEXT:  // %bb.0:
1965; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_copysign_param_1];
1966; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_copysign_param_0];
1967; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
1968; CHECK-NOF16-NEXT:    and.b16 %rs3, %rs2, -32768;
1969; CHECK-NOF16-NEXT:    mov.b32 {%rs4, %rs5}, %r1;
1970; CHECK-NOF16-NEXT:    and.b16 %rs6, %rs5, 32767;
1971; CHECK-NOF16-NEXT:    or.b16 %rs7, %rs6, %rs3;
1972; CHECK-NOF16-NEXT:    and.b16 %rs8, %rs1, -32768;
1973; CHECK-NOF16-NEXT:    and.b16 %rs9, %rs4, 32767;
1974; CHECK-NOF16-NEXT:    or.b16 %rs10, %rs9, %rs8;
1975; CHECK-NOF16-NEXT:    mov.b32 %r3, {%rs10, %rs7};
1976; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
1977; CHECK-NOF16-NEXT:    ret;
1978  %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b)
1979  ret <2 x half> %r
1980}
1981
1982define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 {
1983; CHECK-F16-LABEL: test_copysign_f32(
1984; CHECK-F16:       {
1985; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
1986; CHECK-F16-NEXT:    .reg .b32 %r<6>;
1987; CHECK-F16-NEXT:    .reg .f32 %f<3>;
1988; CHECK-F16-EMPTY:
1989; CHECK-F16-NEXT:  // %bb.0:
1990; CHECK-F16-NEXT:    ld.param.v2.f32 {%f1, %f2}, [test_copysign_f32_param_1];
1991; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_copysign_f32_param_0];
1992; CHECK-F16-NEXT:    cvt.rn.f16.f32 %rs1, %f2;
1993; CHECK-F16-NEXT:    cvt.rn.f16.f32 %rs2, %f1;
1994; CHECK-F16-NEXT:    mov.b32 %r2, {%rs2, %rs1};
1995; CHECK-F16-NEXT:    and.b32 %r3, %r2, -2147450880;
1996; CHECK-F16-NEXT:    and.b32 %r4, %r1, 2147450879;
1997; CHECK-F16-NEXT:    or.b32 %r5, %r4, %r3;
1998; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r5;
1999; CHECK-F16-NEXT:    ret;
2000;
2001; CHECK-NOF16-LABEL: test_copysign_f32(
2002; CHECK-NOF16:       {
2003; CHECK-NOF16-NEXT:    .reg .b16 %rs<9>;
2004; CHECK-NOF16-NEXT:    .reg .b32 %r<7>;
2005; CHECK-NOF16-NEXT:    .reg .f32 %f<3>;
2006; CHECK-NOF16-EMPTY:
2007; CHECK-NOF16-NEXT:  // %bb.0:
2008; CHECK-NOF16-NEXT:    ld.param.v2.f32 {%f1, %f2}, [test_copysign_f32_param_1];
2009; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_copysign_f32_param_0];
2010; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
2011; CHECK-NOF16-NEXT:    and.b16 %rs3, %rs2, 32767;
2012; CHECK-NOF16-NEXT:    mov.b32 %r2, %f2;
2013; CHECK-NOF16-NEXT:    and.b32 %r3, %r2, -2147483648;
2014; CHECK-NOF16-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs4}, %r3; }
2015; CHECK-NOF16-NEXT:    or.b16 %rs5, %rs3, %rs4;
2016; CHECK-NOF16-NEXT:    and.b16 %rs6, %rs1, 32767;
2017; CHECK-NOF16-NEXT:    mov.b32 %r4, %f1;
2018; CHECK-NOF16-NEXT:    and.b32 %r5, %r4, -2147483648;
2019; CHECK-NOF16-NEXT:    { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r5; }
2020; CHECK-NOF16-NEXT:    or.b16 %rs8, %rs6, %rs7;
2021; CHECK-NOF16-NEXT:    mov.b32 %r6, {%rs8, %rs5};
2022; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r6;
2023; CHECK-NOF16-NEXT:    ret;
2024  %tb = fptrunc <2 x float> %b to <2 x half>
2025  %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb)
2026  ret <2 x half> %r
2027}
2028
2029define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 {
2030; CHECK-F16-LABEL: test_copysign_f64(
2031; CHECK-F16:       {
2032; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
2033; CHECK-F16-NEXT:    .reg .b32 %r<6>;
2034; CHECK-F16-NEXT:    .reg .f64 %fd<3>;
2035; CHECK-F16-EMPTY:
2036; CHECK-F16-NEXT:  // %bb.0:
2037; CHECK-F16-NEXT:    ld.param.v2.f64 {%fd1, %fd2}, [test_copysign_f64_param_1];
2038; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_copysign_f64_param_0];
2039; CHECK-F16-NEXT:    cvt.rn.f16.f64 %rs1, %fd2;
2040; CHECK-F16-NEXT:    cvt.rn.f16.f64 %rs2, %fd1;
2041; CHECK-F16-NEXT:    mov.b32 %r2, {%rs2, %rs1};
2042; CHECK-F16-NEXT:    and.b32 %r3, %r2, -2147450880;
2043; CHECK-F16-NEXT:    and.b32 %r4, %r1, 2147450879;
2044; CHECK-F16-NEXT:    or.b32 %r5, %r4, %r3;
2045; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r5;
2046; CHECK-F16-NEXT:    ret;
2047;
2048; CHECK-NOF16-LABEL: test_copysign_f64(
2049; CHECK-NOF16:       {
2050; CHECK-NOF16-NEXT:    .reg .b16 %rs<9>;
2051; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
2052; CHECK-NOF16-NEXT:    .reg .b64 %rd<7>;
2053; CHECK-NOF16-NEXT:    .reg .f64 %fd<3>;
2054; CHECK-NOF16-EMPTY:
2055; CHECK-NOF16-NEXT:  // %bb.0:
2056; CHECK-NOF16-NEXT:    ld.param.v2.f64 {%fd1, %fd2}, [test_copysign_f64_param_1];
2057; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_copysign_f64_param_0];
2058; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
2059; CHECK-NOF16-NEXT:    and.b16 %rs3, %rs2, 32767;
2060; CHECK-NOF16-NEXT:    mov.b64 %rd1, %fd2;
2061; CHECK-NOF16-NEXT:    and.b64 %rd2, %rd1, -9223372036854775808;
2062; CHECK-NOF16-NEXT:    shr.u64 %rd3, %rd2, 48;
2063; CHECK-NOF16-NEXT:    cvt.u16.u64 %rs4, %rd3;
2064; CHECK-NOF16-NEXT:    or.b16 %rs5, %rs3, %rs4;
2065; CHECK-NOF16-NEXT:    and.b16 %rs6, %rs1, 32767;
2066; CHECK-NOF16-NEXT:    mov.b64 %rd4, %fd1;
2067; CHECK-NOF16-NEXT:    and.b64 %rd5, %rd4, -9223372036854775808;
2068; CHECK-NOF16-NEXT:    shr.u64 %rd6, %rd5, 48;
2069; CHECK-NOF16-NEXT:    cvt.u16.u64 %rs7, %rd6;
2070; CHECK-NOF16-NEXT:    or.b16 %rs8, %rs6, %rs7;
2071; CHECK-NOF16-NEXT:    mov.b32 %r2, {%rs8, %rs5};
2072; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r2;
2073; CHECK-NOF16-NEXT:    ret;
2074  %tb = fptrunc <2 x double> %b to <2 x half>
2075  %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %tb)
2076  ret <2 x half> %r
2077}
2078
2079define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 {
2080; CHECK-F16-LABEL: test_copysign_extended(
2081; CHECK-F16:       {
2082; CHECK-F16-NEXT:    .reg .b16 %rs<3>;
2083; CHECK-F16-NEXT:    .reg .b32 %r<6>;
2084; CHECK-F16-NEXT:    .reg .f32 %f<3>;
2085; CHECK-F16-EMPTY:
2086; CHECK-F16-NEXT:  // %bb.0:
2087; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_copysign_extended_param_1];
2088; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_copysign_extended_param_0];
2089; CHECK-F16-NEXT:    and.b32 %r3, %r2, -2147450880;
2090; CHECK-F16-NEXT:    and.b32 %r4, %r1, 2147450879;
2091; CHECK-F16-NEXT:    or.b32 %r5, %r4, %r3;
2092; CHECK-F16-NEXT:    mov.b32 {%rs1, %rs2}, %r5;
2093; CHECK-F16-NEXT:    cvt.f32.f16 %f1, %rs2;
2094; CHECK-F16-NEXT:    cvt.f32.f16 %f2, %rs1;
2095; CHECK-F16-NEXT:    st.param.v2.f32 [func_retval0], {%f2, %f1};
2096; CHECK-F16-NEXT:    ret;
2097;
2098; CHECK-NOF16-LABEL: test_copysign_extended(
2099; CHECK-NOF16:       {
2100; CHECK-NOF16-NEXT:    .reg .b16 %rs<11>;
2101; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
2102; CHECK-NOF16-NEXT:    .reg .f32 %f<3>;
2103; CHECK-NOF16-EMPTY:
2104; CHECK-NOF16-NEXT:  // %bb.0:
2105; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_copysign_extended_param_1];
2106; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_copysign_extended_param_0];
2107; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
2108; CHECK-NOF16-NEXT:    and.b16 %rs3, %rs1, -32768;
2109; CHECK-NOF16-NEXT:    mov.b32 {%rs4, %rs5}, %r1;
2110; CHECK-NOF16-NEXT:    and.b16 %rs6, %rs4, 32767;
2111; CHECK-NOF16-NEXT:    or.b16 %rs7, %rs6, %rs3;
2112; CHECK-NOF16-NEXT:    and.b16 %rs8, %rs2, -32768;
2113; CHECK-NOF16-NEXT:    and.b16 %rs9, %rs5, 32767;
2114; CHECK-NOF16-NEXT:    or.b16 %rs10, %rs9, %rs8;
2115; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs10;
2116; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs7;
2117; CHECK-NOF16-NEXT:    st.param.v2.f32 [func_retval0], {%f2, %f1};
2118; CHECK-NOF16-NEXT:    ret;
2119  %r = call <2 x half> @llvm.copysign.f16(<2 x half> %a, <2 x half> %b)
2120  %xr = fpext <2 x half> %r to <2 x float>
2121  ret <2 x float> %xr
2122}
2123
2124define <2 x half> @test_floor(<2 x half> %a) #0 {
2125; CHECK-LABEL: test_floor(
2126; CHECK:       {
2127; CHECK-NEXT:    .reg .b16 %rs<5>;
2128; CHECK-NEXT:    .reg .b32 %r<3>;
2129; CHECK-EMPTY:
2130; CHECK-NEXT:  // %bb.0:
2131; CHECK-NEXT:    ld.param.b32 %r1, [test_floor_param_0];
2132; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
2133; CHECK-NEXT:    cvt.rmi.f16.f16 %rs3, %rs2;
2134; CHECK-NEXT:    cvt.rmi.f16.f16 %rs4, %rs1;
2135; CHECK-NEXT:    mov.b32 %r2, {%rs4, %rs3};
2136; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
2137; CHECK-NEXT:    ret;
2138  %r = call <2 x half> @llvm.floor.f16(<2 x half> %a)
2139  ret <2 x half> %r
2140}
2141
2142define <2 x half> @test_ceil(<2 x half> %a) #0 {
2143; CHECK-LABEL: test_ceil(
2144; CHECK:       {
2145; CHECK-NEXT:    .reg .b16 %rs<5>;
2146; CHECK-NEXT:    .reg .b32 %r<3>;
2147; CHECK-EMPTY:
2148; CHECK-NEXT:  // %bb.0:
2149; CHECK-NEXT:    ld.param.b32 %r1, [test_ceil_param_0];
2150; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
2151; CHECK-NEXT:    cvt.rpi.f16.f16 %rs3, %rs2;
2152; CHECK-NEXT:    cvt.rpi.f16.f16 %rs4, %rs1;
2153; CHECK-NEXT:    mov.b32 %r2, {%rs4, %rs3};
2154; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
2155; CHECK-NEXT:    ret;
2156  %r = call <2 x half> @llvm.ceil.f16(<2 x half> %a)
2157  ret <2 x half> %r
2158}
2159
2160define <2 x half> @test_trunc(<2 x half> %a) #0 {
2161; CHECK-LABEL: test_trunc(
2162; CHECK:       {
2163; CHECK-NEXT:    .reg .b16 %rs<5>;
2164; CHECK-NEXT:    .reg .b32 %r<3>;
2165; CHECK-EMPTY:
2166; CHECK-NEXT:  // %bb.0:
2167; CHECK-NEXT:    ld.param.b32 %r1, [test_trunc_param_0];
2168; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
2169; CHECK-NEXT:    cvt.rzi.f16.f16 %rs3, %rs2;
2170; CHECK-NEXT:    cvt.rzi.f16.f16 %rs4, %rs1;
2171; CHECK-NEXT:    mov.b32 %r2, {%rs4, %rs3};
2172; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
2173; CHECK-NEXT:    ret;
2174  %r = call <2 x half> @llvm.trunc.f16(<2 x half> %a)
2175  ret <2 x half> %r
2176}
2177
2178define <2 x half> @test_rint(<2 x half> %a) #0 {
2179; CHECK-LABEL: test_rint(
2180; CHECK:       {
2181; CHECK-NEXT:    .reg .b16 %rs<5>;
2182; CHECK-NEXT:    .reg .b32 %r<3>;
2183; CHECK-EMPTY:
2184; CHECK-NEXT:  // %bb.0:
2185; CHECK-NEXT:    ld.param.b32 %r1, [test_rint_param_0];
2186; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
2187; CHECK-NEXT:    cvt.rni.f16.f16 %rs3, %rs2;
2188; CHECK-NEXT:    cvt.rni.f16.f16 %rs4, %rs1;
2189; CHECK-NEXT:    mov.b32 %r2, {%rs4, %rs3};
2190; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
2191; CHECK-NEXT:    ret;
2192  %r = call <2 x half> @llvm.rint.f16(<2 x half> %a)
2193  ret <2 x half> %r
2194}
2195
2196define <2 x half> @test_nearbyint(<2 x half> %a) #0 {
2197; CHECK-LABEL: test_nearbyint(
2198; CHECK:       {
2199; CHECK-NEXT:    .reg .b16 %rs<5>;
2200; CHECK-NEXT:    .reg .b32 %r<3>;
2201; CHECK-EMPTY:
2202; CHECK-NEXT:  // %bb.0:
2203; CHECK-NEXT:    ld.param.b32 %r1, [test_nearbyint_param_0];
2204; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
2205; CHECK-NEXT:    cvt.rni.f16.f16 %rs3, %rs2;
2206; CHECK-NEXT:    cvt.rni.f16.f16 %rs4, %rs1;
2207; CHECK-NEXT:    mov.b32 %r2, {%rs4, %rs3};
2208; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
2209; CHECK-NEXT:    ret;
2210  %r = call <2 x half> @llvm.nearbyint.f16(<2 x half> %a)
2211  ret <2 x half> %r
2212}
2213
2214define <2 x half> @test_roundeven(<2 x half> %a) #0 {
2215; CHECK-LABEL: test_roundeven(
2216; CHECK:       {
2217; CHECK-NEXT:    .reg .b16 %rs<5>;
2218; CHECK-NEXT:    .reg .b32 %r<3>;
2219; CHECK-EMPTY:
2220; CHECK-NEXT:  // %bb.0:
2221; CHECK-NEXT:    ld.param.b32 %r1, [test_roundeven_param_0];
2222; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
2223; CHECK-NEXT:    cvt.rni.f16.f16 %rs3, %rs2;
2224; CHECK-NEXT:    cvt.rni.f16.f16 %rs4, %rs1;
2225; CHECK-NEXT:    mov.b32 %r2, {%rs4, %rs3};
2226; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
2227; CHECK-NEXT:    ret;
2228  %r = call <2 x half> @llvm.roundeven.f16(<2 x half> %a)
2229  ret <2 x half> %r
2230}
2231
2232; check the use of sign mask and 0.5 to implement round
2233define <2 x half> @test_round(<2 x half> %a) #0 {
2234; CHECK-LABEL: test_round(
2235; CHECK:       {
2236; CHECK-NEXT:    .reg .pred %p<5>;
2237; CHECK-NEXT:    .reg .b16 %rs<5>;
2238; CHECK-NEXT:    .reg .b32 %r<9>;
2239; CHECK-NEXT:    .reg .f32 %f<17>;
2240; CHECK-EMPTY:
2241; CHECK-NEXT:  // %bb.0:
2242; CHECK-NEXT:    ld.param.b32 %r1, [test_round_param_0];
2243; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
2244; CHECK-NEXT:    cvt.f32.f16 %f1, %rs2;
2245; CHECK-NEXT:    mov.b32 %r2, %f1;
2246; CHECK-NEXT:    and.b32 %r3, %r2, -2147483648;
2247; CHECK-NEXT:    or.b32 %r4, %r3, 1056964608;
2248; CHECK-NEXT:    mov.b32 %f2, %r4;
2249; CHECK-NEXT:    add.rn.f32 %f3, %f1, %f2;
2250; CHECK-NEXT:    cvt.rzi.f32.f32 %f4, %f3;
2251; CHECK-NEXT:    abs.f32 %f5, %f1;
2252; CHECK-NEXT:    setp.gt.f32 %p1, %f5, 0f4B000000;
2253; CHECK-NEXT:    selp.f32 %f6, %f1, %f4, %p1;
2254; CHECK-NEXT:    cvt.rzi.f32.f32 %f7, %f1;
2255; CHECK-NEXT:    setp.lt.f32 %p2, %f5, 0f3F000000;
2256; CHECK-NEXT:    selp.f32 %f8, %f7, %f6, %p2;
2257; CHECK-NEXT:    cvt.rn.f16.f32 %rs3, %f8;
2258; CHECK-NEXT:    cvt.f32.f16 %f9, %rs1;
2259; CHECK-NEXT:    mov.b32 %r5, %f9;
2260; CHECK-NEXT:    and.b32 %r6, %r5, -2147483648;
2261; CHECK-NEXT:    or.b32 %r7, %r6, 1056964608;
2262; CHECK-NEXT:    mov.b32 %f10, %r7;
2263; CHECK-NEXT:    add.rn.f32 %f11, %f9, %f10;
2264; CHECK-NEXT:    cvt.rzi.f32.f32 %f12, %f11;
2265; CHECK-NEXT:    abs.f32 %f13, %f9;
2266; CHECK-NEXT:    setp.gt.f32 %p3, %f13, 0f4B000000;
2267; CHECK-NEXT:    selp.f32 %f14, %f9, %f12, %p3;
2268; CHECK-NEXT:    cvt.rzi.f32.f32 %f15, %f9;
2269; CHECK-NEXT:    setp.lt.f32 %p4, %f13, 0f3F000000;
2270; CHECK-NEXT:    selp.f32 %f16, %f15, %f14, %p4;
2271; CHECK-NEXT:    cvt.rn.f16.f32 %rs4, %f16;
2272; CHECK-NEXT:    mov.b32 %r8, {%rs4, %rs3};
2273; CHECK-NEXT:    st.param.b32 [func_retval0], %r8;
2274; CHECK-NEXT:    ret;
2275  %r = call <2 x half> @llvm.round.f16(<2 x half> %a)
2276  ret <2 x half> %r
2277}
2278
2279define <2 x half> @test_fmuladd(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 {
2280; CHECK-F16-LABEL: test_fmuladd(
2281; CHECK-F16:       {
2282; CHECK-F16-NEXT:    .reg .b32 %r<5>;
2283; CHECK-F16-EMPTY:
2284; CHECK-F16-NEXT:  // %bb.0:
2285; CHECK-F16-NEXT:    ld.param.b32 %r3, [test_fmuladd_param_2];
2286; CHECK-F16-NEXT:    ld.param.b32 %r2, [test_fmuladd_param_1];
2287; CHECK-F16-NEXT:    ld.param.b32 %r1, [test_fmuladd_param_0];
2288; CHECK-F16-NEXT:    fma.rn.f16x2 %r4, %r1, %r2, %r3;
2289; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r4;
2290; CHECK-F16-NEXT:    ret;
2291;
2292; CHECK-NOF16-LABEL: test_fmuladd(
2293; CHECK-NOF16:       {
2294; CHECK-NOF16-NEXT:    .reg .b16 %rs<9>;
2295; CHECK-NOF16-NEXT:    .reg .b32 %r<5>;
2296; CHECK-NOF16-NEXT:    .reg .f32 %f<9>;
2297; CHECK-NOF16-EMPTY:
2298; CHECK-NOF16-NEXT:  // %bb.0:
2299; CHECK-NOF16-NEXT:    ld.param.b32 %r3, [test_fmuladd_param_2];
2300; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [test_fmuladd_param_1];
2301; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [test_fmuladd_param_0];
2302; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r3;
2303; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
2304; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r2;
2305; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
2306; CHECK-NOF16-NEXT:    mov.b32 {%rs5, %rs6}, %r1;
2307; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs6;
2308; CHECK-NOF16-NEXT:    fma.rn.f32 %f4, %f3, %f2, %f1;
2309; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs7, %f4;
2310; CHECK-NOF16-NEXT:    cvt.f32.f16 %f5, %rs1;
2311; CHECK-NOF16-NEXT:    cvt.f32.f16 %f6, %rs3;
2312; CHECK-NOF16-NEXT:    cvt.f32.f16 %f7, %rs5;
2313; CHECK-NOF16-NEXT:    fma.rn.f32 %f8, %f7, %f6, %f5;
2314; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs8, %f8;
2315; CHECK-NOF16-NEXT:    mov.b32 %r4, {%rs8, %rs7};
2316; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r4;
2317; CHECK-NOF16-NEXT:    ret;
2318  %r = call <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
2319  ret <2 x half> %r
2320}
2321
2322define <2 x half> @test_shufflevector(<2 x half> %a) #0 {
2323; CHECK-LABEL: test_shufflevector(
2324; CHECK:       {
2325; CHECK-NEXT:    .reg .b16 %rs<3>;
2326; CHECK-NEXT:    .reg .b32 %r<3>;
2327; CHECK-EMPTY:
2328; CHECK-NEXT:  // %bb.0:
2329; CHECK-NEXT:    ld.param.b32 %r1, [test_shufflevector_param_0];
2330; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
2331; CHECK-NEXT:    mov.b32 %r2, {%rs2, %rs1};
2332; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
2333; CHECK-NEXT:    ret;
2334  %s = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
2335  ret <2 x half> %s
2336}
2337
2338define <2 x half> @test_insertelement(<2 x half> %a, half %x) #0 {
2339; CHECK-LABEL: test_insertelement(
2340; CHECK:       {
2341; CHECK-NEXT:    .reg .b16 %rs<3>;
2342; CHECK-NEXT:    .reg .b32 %r<3>;
2343; CHECK-EMPTY:
2344; CHECK-NEXT:  // %bb.0:
2345; CHECK-NEXT:    ld.param.b16 %rs1, [test_insertelement_param_1];
2346; CHECK-NEXT:    ld.param.b32 %r1, [test_insertelement_param_0];
2347; CHECK-NEXT:    { .reg .b16 tmp; mov.b32 {%rs2, tmp}, %r1; }
2348; CHECK-NEXT:    mov.b32 %r2, {%rs2, %rs1};
2349; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
2350; CHECK-NEXT:    ret;
2351  %i = insertelement <2 x half> %a, half %x, i64 1
2352  ret <2 x half> %i
2353}
2354
2355define <2 x half> @test_sitofp_2xi16_to_2xhalf(<2 x i16> %a) #0 {
2356; CHECK-LABEL: test_sitofp_2xi16_to_2xhalf(
2357; CHECK:       {
2358; CHECK-NEXT:    .reg .b16 %rs<5>;
2359; CHECK-NEXT:    .reg .b32 %r<3>;
2360; CHECK-EMPTY:
2361; CHECK-NEXT:  // %bb.0:
2362; CHECK-NEXT:    ld.param.u32 %r1, [test_sitofp_2xi16_to_2xhalf_param_0];
2363; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
2364; CHECK-NEXT:    cvt.rn.f16.s16 %rs3, %rs2;
2365; CHECK-NEXT:    cvt.rn.f16.s16 %rs4, %rs1;
2366; CHECK-NEXT:    mov.b32 %r2, {%rs4, %rs3};
2367; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
2368; CHECK-NEXT:    ret;
2369  %r = sitofp <2 x i16> %a to <2 x half>
2370  ret <2 x half> %r
2371}
2372
2373define <2 x half> @test_uitofp_2xi16_to_2xhalf(<2 x i16> %a) #0 {
2374; CHECK-LABEL: test_uitofp_2xi16_to_2xhalf(
2375; CHECK:       {
2376; CHECK-NEXT:    .reg .b16 %rs<5>;
2377; CHECK-NEXT:    .reg .b32 %r<3>;
2378; CHECK-EMPTY:
2379; CHECK-NEXT:  // %bb.0:
2380; CHECK-NEXT:    ld.param.u32 %r1, [test_uitofp_2xi16_to_2xhalf_param_0];
2381; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
2382; CHECK-NEXT:    cvt.rn.f16.u16 %rs3, %rs2;
2383; CHECK-NEXT:    cvt.rn.f16.u16 %rs4, %rs1;
2384; CHECK-NEXT:    mov.b32 %r2, {%rs4, %rs3};
2385; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
2386; CHECK-NEXT:    ret;
2387  %r = uitofp <2 x i16> %a to <2 x half>
2388  ret <2 x half> %r
2389}
2390
2391attributes #0 = { nounwind }
2392attributes #1 = { "unsafe-fp-math" = "true" }
2393