xref: /llvm-project/llvm/test/CodeGen/NVPTX/math-intrins.ll (revision 310e79875752886a7713911e2a1ec14bc75bd4b3)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16
3; RUN: llc < %s -mcpu=sm_80 -mattr +ptx70 | FileCheck %s --check-prefixes=CHECK,CHECK-F16
4; RUN: llc < %s -mcpu=sm_80 -mattr +ptx70 --nvptx-no-f16-math | FileCheck %s --check-prefixes=CHECK,CHECK-SM80-NOF16
5; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
6; RUN: %if ptxas-11.0 %{ llc < %s -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
7; RUN: %if ptxas-11.0 %{ llc < %s -mcpu=sm_80 --nvptx-no-f16-math | %ptxas-verify -arch=sm_80 %}
8
9target triple = "nvptx64-nvidia-cuda"
10
11; Checks that llvm intrinsics for math functions are correctly lowered to PTX.
12
13declare float @llvm.ceil.f32(float) #0
14declare double @llvm.ceil.f64(double) #0
15declare float @llvm.floor.f32(float) #0
16declare double @llvm.floor.f64(double) #0
17declare float @llvm.round.f32(float) #0
18declare double @llvm.round.f64(double) #0
19declare float @llvm.nearbyint.f32(float) #0
20declare double @llvm.nearbyint.f64(double) #0
21declare float @llvm.rint.f32(float) #0
22declare double @llvm.rint.f64(double) #0
23declare float @llvm.roundeven.f32(float) #0
24declare double @llvm.roundeven.f64(double) #0
25declare float @llvm.trunc.f32(float) #0
26declare double @llvm.trunc.f64(double) #0
27declare float @llvm.fabs.f32(float) #0
28declare double @llvm.fabs.f64(double) #0
29declare half @llvm.minnum.f16(half, half) #0
30declare float @llvm.minnum.f32(float, float) #0
31declare double @llvm.minnum.f64(double, double) #0
32declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #0
33declare half @llvm.minimum.f16(half, half) #0
34declare float @llvm.minimum.f32(float, float) #0
35declare double @llvm.minimum.f64(double, double) #0
36declare <2 x half> @llvm.minimum.v2f16(<2 x half>, <2 x half>) #0
37declare half @llvm.maxnum.f16(half, half) #0
38declare float @llvm.maxnum.f32(float, float) #0
39declare double @llvm.maxnum.f64(double, double) #0
40declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #0
41declare half @llvm.maximum.f16(half, half) #0
42declare float @llvm.maximum.f32(float, float) #0
43declare double @llvm.maximum.f64(double, double) #0
44declare <2 x half> @llvm.maximum.v2f16(<2 x half>, <2 x half>) #0
45declare float @llvm.fma.f32(float, float, float) #0
46declare double @llvm.fma.f64(double, double, double) #0
47
48; ---- ceil ----
49
50define float @ceil_float(float %a) {
51; CHECK-LABEL: ceil_float(
52; CHECK:       {
53; CHECK-NEXT:    .reg .f32 %f<3>;
54; CHECK-EMPTY:
55; CHECK-NEXT:  // %bb.0:
56; CHECK-NEXT:    ld.param.f32 %f1, [ceil_float_param_0];
57; CHECK-NEXT:    cvt.rpi.f32.f32 %f2, %f1;
58; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
59; CHECK-NEXT:    ret;
60  %b = call float @llvm.ceil.f32(float %a)
61  ret float %b
62}
63
64define float @ceil_float_ftz(float %a) #1 {
65; CHECK-LABEL: ceil_float_ftz(
66; CHECK:       {
67; CHECK-NEXT:    .reg .f32 %f<3>;
68; CHECK-EMPTY:
69; CHECK-NEXT:  // %bb.0:
70; CHECK-NEXT:    ld.param.f32 %f1, [ceil_float_ftz_param_0];
71; CHECK-NEXT:    cvt.rpi.ftz.f32.f32 %f2, %f1;
72; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
73; CHECK-NEXT:    ret;
74  %b = call float @llvm.ceil.f32(float %a)
75  ret float %b
76}
77
78define double @ceil_double(double %a) {
79; CHECK-LABEL: ceil_double(
80; CHECK:       {
81; CHECK-NEXT:    .reg .f64 %fd<3>;
82; CHECK-EMPTY:
83; CHECK-NEXT:  // %bb.0:
84; CHECK-NEXT:    ld.param.f64 %fd1, [ceil_double_param_0];
85; CHECK-NEXT:    cvt.rpi.f64.f64 %fd2, %fd1;
86; CHECK-NEXT:    st.param.f64 [func_retval0], %fd2;
87; CHECK-NEXT:    ret;
88  %b = call double @llvm.ceil.f64(double %a)
89  ret double %b
90}
91
92; ---- floor ----
93
94define float @floor_float(float %a) {
95; CHECK-LABEL: floor_float(
96; CHECK:       {
97; CHECK-NEXT:    .reg .f32 %f<3>;
98; CHECK-EMPTY:
99; CHECK-NEXT:  // %bb.0:
100; CHECK-NEXT:    ld.param.f32 %f1, [floor_float_param_0];
101; CHECK-NEXT:    cvt.rmi.f32.f32 %f2, %f1;
102; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
103; CHECK-NEXT:    ret;
104  %b = call float @llvm.floor.f32(float %a)
105  ret float %b
106}
107
108define float @floor_float_ftz(float %a) #1 {
109; CHECK-LABEL: floor_float_ftz(
110; CHECK:       {
111; CHECK-NEXT:    .reg .f32 %f<3>;
112; CHECK-EMPTY:
113; CHECK-NEXT:  // %bb.0:
114; CHECK-NEXT:    ld.param.f32 %f1, [floor_float_ftz_param_0];
115; CHECK-NEXT:    cvt.rmi.ftz.f32.f32 %f2, %f1;
116; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
117; CHECK-NEXT:    ret;
118  %b = call float @llvm.floor.f32(float %a)
119  ret float %b
120}
121
122define double @floor_double(double %a) {
123; CHECK-LABEL: floor_double(
124; CHECK:       {
125; CHECK-NEXT:    .reg .f64 %fd<3>;
126; CHECK-EMPTY:
127; CHECK-NEXT:  // %bb.0:
128; CHECK-NEXT:    ld.param.f64 %fd1, [floor_double_param_0];
129; CHECK-NEXT:    cvt.rmi.f64.f64 %fd2, %fd1;
130; CHECK-NEXT:    st.param.f64 [func_retval0], %fd2;
131; CHECK-NEXT:    ret;
132  %b = call double @llvm.floor.f64(double %a)
133  ret double %b
134}
135
136; ---- round ----
137
138define float @round_float(float %a) {
139; check the use of sign mask and 0.5 to implement round
140; CHECK-LABEL: round_float(
141; CHECK:       {
142; CHECK-NEXT:    .reg .pred %p<3>;
143; CHECK-NEXT:    .reg .b32 %r<4>;
144; CHECK-NEXT:    .reg .f32 %f<9>;
145; CHECK-EMPTY:
146; CHECK-NEXT:  // %bb.0:
147; CHECK-NEXT:    ld.param.f32 %f1, [round_float_param_0];
148; CHECK-NEXT:    mov.b32 %r1, %f1;
149; CHECK-NEXT:    and.b32 %r2, %r1, -2147483648;
150; CHECK-NEXT:    or.b32 %r3, %r2, 1056964608;
151; CHECK-NEXT:    mov.b32 %f2, %r3;
152; CHECK-NEXT:    add.rn.f32 %f3, %f1, %f2;
153; CHECK-NEXT:    cvt.rzi.f32.f32 %f4, %f3;
154; CHECK-NEXT:    abs.f32 %f5, %f1;
155; CHECK-NEXT:    setp.gt.f32 %p1, %f5, 0f4B000000;
156; CHECK-NEXT:    selp.f32 %f6, %f1, %f4, %p1;
157; CHECK-NEXT:    cvt.rzi.f32.f32 %f7, %f1;
158; CHECK-NEXT:    setp.lt.f32 %p2, %f5, 0f3F000000;
159; CHECK-NEXT:    selp.f32 %f8, %f7, %f6, %p2;
160; CHECK-NEXT:    st.param.f32 [func_retval0], %f8;
161; CHECK-NEXT:    ret;
162  %b = call float @llvm.round.f32(float %a)
163  ret float %b
164}
165
166define float @round_float_ftz(float %a) #1 {
167; check the use of sign mask and 0.5 to implement round
168; CHECK-LABEL: round_float_ftz(
169; CHECK:       {
170; CHECK-NEXT:    .reg .pred %p<3>;
171; CHECK-NEXT:    .reg .b32 %r<4>;
172; CHECK-NEXT:    .reg .f32 %f<9>;
173; CHECK-EMPTY:
174; CHECK-NEXT:  // %bb.0:
175; CHECK-NEXT:    ld.param.f32 %f1, [round_float_ftz_param_0];
176; CHECK-NEXT:    mov.b32 %r1, %f1;
177; CHECK-NEXT:    and.b32 %r2, %r1, -2147483648;
178; CHECK-NEXT:    or.b32 %r3, %r2, 1056964608;
179; CHECK-NEXT:    mov.b32 %f2, %r3;
180; CHECK-NEXT:    add.rn.ftz.f32 %f3, %f1, %f2;
181; CHECK-NEXT:    cvt.rzi.ftz.f32.f32 %f4, %f3;
182; CHECK-NEXT:    abs.ftz.f32 %f5, %f1;
183; CHECK-NEXT:    setp.gt.ftz.f32 %p1, %f5, 0f4B000000;
184; CHECK-NEXT:    selp.f32 %f6, %f1, %f4, %p1;
185; CHECK-NEXT:    cvt.rzi.ftz.f32.f32 %f7, %f1;
186; CHECK-NEXT:    setp.lt.ftz.f32 %p2, %f5, 0f3F000000;
187; CHECK-NEXT:    selp.f32 %f8, %f7, %f6, %p2;
188; CHECK-NEXT:    st.param.f32 [func_retval0], %f8;
189; CHECK-NEXT:    ret;
190  %b = call float @llvm.round.f32(float %a)
191  ret float %b
192}
193
194define double @round_double(double %a) {
195; check the use of 0.5 to implement round
196; CHECK-LABEL: round_double(
197; CHECK:       {
198; CHECK-NEXT:    .reg .pred %p<3>;
199; CHECK-NEXT:    .reg .f64 %fd<8>;
200; CHECK-EMPTY:
201; CHECK-NEXT:  // %bb.0:
202; CHECK-NEXT:    ld.param.f64 %fd1, [round_double_param_0];
203; CHECK-NEXT:    abs.f64 %fd2, %fd1;
204; CHECK-NEXT:    setp.lt.f64 %p1, %fd2, 0d3FE0000000000000;
205; CHECK-NEXT:    add.rn.f64 %fd3, %fd2, 0d3FE0000000000000;
206; CHECK-NEXT:    cvt.rzi.f64.f64 %fd4, %fd3;
207; CHECK-NEXT:    selp.f64 %fd5, 0d0000000000000000, %fd4, %p1;
208; CHECK-NEXT:    copysign.f64 %fd6, %fd1, %fd5;
209; CHECK-NEXT:    setp.gt.f64 %p2, %fd2, 0d4330000000000000;
210; CHECK-NEXT:    selp.f64 %fd7, %fd1, %fd6, %p2;
211; CHECK-NEXT:    st.param.f64 [func_retval0], %fd7;
212; CHECK-NEXT:    ret;
213  %b = call double @llvm.round.f64(double %a)
214  ret double %b
215}
216
217; ---- nearbyint ----
218
219define float @nearbyint_float(float %a) {
220; CHECK-LABEL: nearbyint_float(
221; CHECK:       {
222; CHECK-NEXT:    .reg .f32 %f<3>;
223; CHECK-EMPTY:
224; CHECK-NEXT:  // %bb.0:
225; CHECK-NEXT:    ld.param.f32 %f1, [nearbyint_float_param_0];
226; CHECK-NEXT:    cvt.rni.f32.f32 %f2, %f1;
227; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
228; CHECK-NEXT:    ret;
229  %b = call float @llvm.nearbyint.f32(float %a)
230  ret float %b
231}
232
233define float @nearbyint_float_ftz(float %a) #1 {
234; CHECK-LABEL: nearbyint_float_ftz(
235; CHECK:       {
236; CHECK-NEXT:    .reg .f32 %f<3>;
237; CHECK-EMPTY:
238; CHECK-NEXT:  // %bb.0:
239; CHECK-NEXT:    ld.param.f32 %f1, [nearbyint_float_ftz_param_0];
240; CHECK-NEXT:    cvt.rni.ftz.f32.f32 %f2, %f1;
241; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
242; CHECK-NEXT:    ret;
243  %b = call float @llvm.nearbyint.f32(float %a)
244  ret float %b
245}
246
247define double @nearbyint_double(double %a) {
248; CHECK-LABEL: nearbyint_double(
249; CHECK:       {
250; CHECK-NEXT:    .reg .f64 %fd<3>;
251; CHECK-EMPTY:
252; CHECK-NEXT:  // %bb.0:
253; CHECK-NEXT:    ld.param.f64 %fd1, [nearbyint_double_param_0];
254; CHECK-NEXT:    cvt.rni.f64.f64 %fd2, %fd1;
255; CHECK-NEXT:    st.param.f64 [func_retval0], %fd2;
256; CHECK-NEXT:    ret;
257  %b = call double @llvm.nearbyint.f64(double %a)
258  ret double %b
259}
260
261; ---- rint ----
262
263define float @rint_float(float %a) {
264; CHECK-LABEL: rint_float(
265; CHECK:       {
266; CHECK-NEXT:    .reg .f32 %f<3>;
267; CHECK-EMPTY:
268; CHECK-NEXT:  // %bb.0:
269; CHECK-NEXT:    ld.param.f32 %f1, [rint_float_param_0];
270; CHECK-NEXT:    cvt.rni.f32.f32 %f2, %f1;
271; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
272; CHECK-NEXT:    ret;
273  %b = call float @llvm.rint.f32(float %a)
274  ret float %b
275}
276
277define float @rint_float_ftz(float %a) #1 {
278; CHECK-LABEL: rint_float_ftz(
279; CHECK:       {
280; CHECK-NEXT:    .reg .f32 %f<3>;
281; CHECK-EMPTY:
282; CHECK-NEXT:  // %bb.0:
283; CHECK-NEXT:    ld.param.f32 %f1, [rint_float_ftz_param_0];
284; CHECK-NEXT:    cvt.rni.ftz.f32.f32 %f2, %f1;
285; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
286; CHECK-NEXT:    ret;
287  %b = call float @llvm.rint.f32(float %a)
288  ret float %b
289}
290
291define double @rint_double(double %a) {
292; CHECK-LABEL: rint_double(
293; CHECK:       {
294; CHECK-NEXT:    .reg .f64 %fd<3>;
295; CHECK-EMPTY:
296; CHECK-NEXT:  // %bb.0:
297; CHECK-NEXT:    ld.param.f64 %fd1, [rint_double_param_0];
298; CHECK-NEXT:    cvt.rni.f64.f64 %fd2, %fd1;
299; CHECK-NEXT:    st.param.f64 [func_retval0], %fd2;
300; CHECK-NEXT:    ret;
301  %b = call double @llvm.rint.f64(double %a)
302  ret double %b
303}
304
305; ---- roundeven ----
306
307define float @roundeven_float(float %a) {
308; CHECK-LABEL: roundeven_float(
309; CHECK:       {
310; CHECK-NEXT:    .reg .f32 %f<3>;
311; CHECK-EMPTY:
312; CHECK-NEXT:  // %bb.0:
313; CHECK-NEXT:    ld.param.f32 %f1, [roundeven_float_param_0];
314; CHECK-NEXT:    cvt.rni.f32.f32 %f2, %f1;
315; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
316; CHECK-NEXT:    ret;
317  %b = call float @llvm.roundeven.f32(float %a)
318  ret float %b
319}
320
321define float @roundeven_float_ftz(float %a) #1 {
322; CHECK-LABEL: roundeven_float_ftz(
323; CHECK:       {
324; CHECK-NEXT:    .reg .f32 %f<3>;
325; CHECK-EMPTY:
326; CHECK-NEXT:  // %bb.0:
327; CHECK-NEXT:    ld.param.f32 %f1, [roundeven_float_ftz_param_0];
328; CHECK-NEXT:    cvt.rni.ftz.f32.f32 %f2, %f1;
329; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
330; CHECK-NEXT:    ret;
331  %b = call float @llvm.roundeven.f32(float %a)
332  ret float %b
333}
334
335define double @roundeven_double(double %a) {
336; CHECK-LABEL: roundeven_double(
337; CHECK:       {
338; CHECK-NEXT:    .reg .f64 %fd<3>;
339; CHECK-EMPTY:
340; CHECK-NEXT:  // %bb.0:
341; CHECK-NEXT:    ld.param.f64 %fd1, [roundeven_double_param_0];
342; CHECK-NEXT:    cvt.rni.f64.f64 %fd2, %fd1;
343; CHECK-NEXT:    st.param.f64 [func_retval0], %fd2;
344; CHECK-NEXT:    ret;
345  %b = call double @llvm.roundeven.f64(double %a)
346  ret double %b
347}
348
349; ---- trunc ----
350
351define float @trunc_float(float %a) {
352; CHECK-LABEL: trunc_float(
353; CHECK:       {
354; CHECK-NEXT:    .reg .f32 %f<3>;
355; CHECK-EMPTY:
356; CHECK-NEXT:  // %bb.0:
357; CHECK-NEXT:    ld.param.f32 %f1, [trunc_float_param_0];
358; CHECK-NEXT:    cvt.rzi.f32.f32 %f2, %f1;
359; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
360; CHECK-NEXT:    ret;
361  %b = call float @llvm.trunc.f32(float %a)
362  ret float %b
363}
364
365define float @trunc_float_ftz(float %a) #1 {
366; CHECK-LABEL: trunc_float_ftz(
367; CHECK:       {
368; CHECK-NEXT:    .reg .f32 %f<3>;
369; CHECK-EMPTY:
370; CHECK-NEXT:  // %bb.0:
371; CHECK-NEXT:    ld.param.f32 %f1, [trunc_float_ftz_param_0];
372; CHECK-NEXT:    cvt.rzi.ftz.f32.f32 %f2, %f1;
373; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
374; CHECK-NEXT:    ret;
375  %b = call float @llvm.trunc.f32(float %a)
376  ret float %b
377}
378
379define double @trunc_double(double %a) {
380; CHECK-LABEL: trunc_double(
381; CHECK:       {
382; CHECK-NEXT:    .reg .f64 %fd<3>;
383; CHECK-EMPTY:
384; CHECK-NEXT:  // %bb.0:
385; CHECK-NEXT:    ld.param.f64 %fd1, [trunc_double_param_0];
386; CHECK-NEXT:    cvt.rzi.f64.f64 %fd2, %fd1;
387; CHECK-NEXT:    st.param.f64 [func_retval0], %fd2;
388; CHECK-NEXT:    ret;
389  %b = call double @llvm.trunc.f64(double %a)
390  ret double %b
391}
392
393; ---- abs ----
394
395define float @abs_float(float %a) {
396; CHECK-LABEL: abs_float(
397; CHECK:       {
398; CHECK-NEXT:    .reg .f32 %f<3>;
399; CHECK-EMPTY:
400; CHECK-NEXT:  // %bb.0:
401; CHECK-NEXT:    ld.param.f32 %f1, [abs_float_param_0];
402; CHECK-NEXT:    abs.f32 %f2, %f1;
403; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
404; CHECK-NEXT:    ret;
405  %b = call float @llvm.fabs.f32(float %a)
406  ret float %b
407}
408
409define float @abs_float_ftz(float %a) #1 {
410; CHECK-LABEL: abs_float_ftz(
411; CHECK:       {
412; CHECK-NEXT:    .reg .f32 %f<3>;
413; CHECK-EMPTY:
414; CHECK-NEXT:  // %bb.0:
415; CHECK-NEXT:    ld.param.f32 %f1, [abs_float_ftz_param_0];
416; CHECK-NEXT:    abs.ftz.f32 %f2, %f1;
417; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
418; CHECK-NEXT:    ret;
419  %b = call float @llvm.fabs.f32(float %a)
420  ret float %b
421}
422
423define double @abs_double(double %a) {
424; CHECK-LABEL: abs_double(
425; CHECK:       {
426; CHECK-NEXT:    .reg .f64 %fd<3>;
427; CHECK-EMPTY:
428; CHECK-NEXT:  // %bb.0:
429; CHECK-NEXT:    ld.param.f64 %fd1, [abs_double_param_0];
430; CHECK-NEXT:    abs.f64 %fd2, %fd1;
431; CHECK-NEXT:    st.param.f64 [func_retval0], %fd2;
432; CHECK-NEXT:    ret;
433  %b = call double @llvm.fabs.f64(double %a)
434  ret double %b
435}
436
437; ---- minnum ----
438
439define half @minnum_half(half %a, half %b) {
440; CHECK-NOF16-LABEL: minnum_half(
441; CHECK-NOF16:       {
442; CHECK-NOF16-NEXT:    .reg .b16 %rs<4>;
443; CHECK-NOF16-NEXT:    .reg .f32 %f<4>;
444; CHECK-NOF16-EMPTY:
445; CHECK-NOF16-NEXT:  // %bb.0:
446; CHECK-NOF16-NEXT:    ld.param.b16 %rs1, [minnum_half_param_0];
447; CHECK-NOF16-NEXT:    ld.param.b16 %rs2, [minnum_half_param_1];
448; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
449; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs1;
450; CHECK-NOF16-NEXT:    min.f32 %f3, %f2, %f1;
451; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs3, %f3;
452; CHECK-NOF16-NEXT:    st.param.b16 [func_retval0], %rs3;
453; CHECK-NOF16-NEXT:    ret;
454;
455; CHECK-F16-LABEL: minnum_half(
456; CHECK-F16:       {
457; CHECK-F16-NEXT:    .reg .b16 %rs<4>;
458; CHECK-F16-EMPTY:
459; CHECK-F16-NEXT:  // %bb.0:
460; CHECK-F16-NEXT:    ld.param.b16 %rs1, [minnum_half_param_0];
461; CHECK-F16-NEXT:    ld.param.b16 %rs2, [minnum_half_param_1];
462; CHECK-F16-NEXT:    min.f16 %rs3, %rs1, %rs2;
463; CHECK-F16-NEXT:    st.param.b16 [func_retval0], %rs3;
464; CHECK-F16-NEXT:    ret;
465;
466; CHECK-SM80-NOF16-LABEL: minnum_half(
467; CHECK-SM80-NOF16:       {
468; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<4>;
469; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<4>;
470; CHECK-SM80-NOF16-EMPTY:
471; CHECK-SM80-NOF16-NEXT:  // %bb.0:
472; CHECK-SM80-NOF16-NEXT:    ld.param.b16 %rs1, [minnum_half_param_0];
473; CHECK-SM80-NOF16-NEXT:    ld.param.b16 %rs2, [minnum_half_param_1];
474; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
475; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f2, %rs1;
476; CHECK-SM80-NOF16-NEXT:    min.f32 %f3, %f2, %f1;
477; CHECK-SM80-NOF16-NEXT:    cvt.rn.f16.f32 %rs3, %f3;
478; CHECK-SM80-NOF16-NEXT:    st.param.b16 [func_retval0], %rs3;
479; CHECK-SM80-NOF16-NEXT:    ret;
480  %x = call half @llvm.minnum.f16(half %a, half %b)
481  ret half %x
482}
483
484define float @minnum_float(float %a, float %b) {
485; CHECK-LABEL: minnum_float(
486; CHECK:       {
487; CHECK-NEXT:    .reg .f32 %f<4>;
488; CHECK-EMPTY:
489; CHECK-NEXT:  // %bb.0:
490; CHECK-NEXT:    ld.param.f32 %f1, [minnum_float_param_0];
491; CHECK-NEXT:    ld.param.f32 %f2, [minnum_float_param_1];
492; CHECK-NEXT:    min.f32 %f3, %f1, %f2;
493; CHECK-NEXT:    st.param.f32 [func_retval0], %f3;
494; CHECK-NEXT:    ret;
495  %x = call float @llvm.minnum.f32(float %a, float %b)
496  ret float %x
497}
498
499define float @minnum_imm1(float %a) {
500; CHECK-LABEL: minnum_imm1(
501; CHECK:       {
502; CHECK-NEXT:    .reg .f32 %f<3>;
503; CHECK-EMPTY:
504; CHECK-NEXT:  // %bb.0:
505; CHECK-NEXT:    ld.param.f32 %f1, [minnum_imm1_param_0];
506; CHECK-NEXT:    min.f32 %f2, %f1, 0f00000000;
507; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
508; CHECK-NEXT:    ret;
509  %x = call float @llvm.minnum.f32(float %a, float 0.0)
510  ret float %x
511}
512
513define float @minnum_imm2(float %a) {
514; CHECK-LABEL: minnum_imm2(
515; CHECK:       {
516; CHECK-NEXT:    .reg .f32 %f<3>;
517; CHECK-EMPTY:
518; CHECK-NEXT:  // %bb.0:
519; CHECK-NEXT:    ld.param.f32 %f1, [minnum_imm2_param_0];
520; CHECK-NEXT:    min.f32 %f2, %f1, 0f00000000;
521; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
522; CHECK-NEXT:    ret;
523  %x = call float @llvm.minnum.f32(float 0.0, float %a)
524  ret float %x
525}
526
527define float @minnum_float_ftz(float %a, float %b) #1 {
528; CHECK-LABEL: minnum_float_ftz(
529; CHECK:       {
530; CHECK-NEXT:    .reg .f32 %f<4>;
531; CHECK-EMPTY:
532; CHECK-NEXT:  // %bb.0:
533; CHECK-NEXT:    ld.param.f32 %f1, [minnum_float_ftz_param_0];
534; CHECK-NEXT:    ld.param.f32 %f2, [minnum_float_ftz_param_1];
535; CHECK-NEXT:    min.ftz.f32 %f3, %f1, %f2;
536; CHECK-NEXT:    st.param.f32 [func_retval0], %f3;
537; CHECK-NEXT:    ret;
538  %x = call float @llvm.minnum.f32(float %a, float %b)
539  ret float %x
540}
541
542define double @minnum_double(double %a, double %b) {
543; CHECK-LABEL: minnum_double(
544; CHECK:       {
545; CHECK-NEXT:    .reg .f64 %fd<4>;
546; CHECK-EMPTY:
547; CHECK-NEXT:  // %bb.0:
548; CHECK-NEXT:    ld.param.f64 %fd1, [minnum_double_param_0];
549; CHECK-NEXT:    ld.param.f64 %fd2, [minnum_double_param_1];
550; CHECK-NEXT:    min.f64 %fd3, %fd1, %fd2;
551; CHECK-NEXT:    st.param.f64 [func_retval0], %fd3;
552; CHECK-NEXT:    ret;
553  %x = call double @llvm.minnum.f64(double %a, double %b)
554  ret double %x
555}
556
557define <2 x half> @minnum_v2half(<2 x half> %a, <2 x half> %b) {
558; CHECK-NOF16-LABEL: minnum_v2half(
559; CHECK-NOF16:       {
560; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
561; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
562; CHECK-NOF16-NEXT:    .reg .f32 %f<7>;
563; CHECK-NOF16-EMPTY:
564; CHECK-NOF16-NEXT:  // %bb.0:
565; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minnum_v2half_param_0];
566; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [minnum_v2half_param_1];
567; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
568; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
569; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
570; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
571; CHECK-NOF16-NEXT:    min.f32 %f3, %f2, %f1;
572; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs5, %f3;
573; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs1;
574; CHECK-NOF16-NEXT:    cvt.f32.f16 %f5, %rs3;
575; CHECK-NOF16-NEXT:    min.f32 %f6, %f5, %f4;
576; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs6, %f6;
577; CHECK-NOF16-NEXT:    mov.b32 %r3, {%rs6, %rs5};
578; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
579; CHECK-NOF16-NEXT:    ret;
580;
581; CHECK-F16-LABEL: minnum_v2half(
582; CHECK-F16:       {
583; CHECK-F16-NEXT:    .reg .b32 %r<4>;
584; CHECK-F16-EMPTY:
585; CHECK-F16-NEXT:  // %bb.0:
586; CHECK-F16-NEXT:    ld.param.b32 %r1, [minnum_v2half_param_1];
587; CHECK-F16-NEXT:    ld.param.b32 %r2, [minnum_v2half_param_0];
588; CHECK-F16-NEXT:    min.f16x2 %r3, %r2, %r1;
589; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r3;
590; CHECK-F16-NEXT:    ret;
591;
592; CHECK-SM80-NOF16-LABEL: minnum_v2half(
593; CHECK-SM80-NOF16:       {
594; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<7>;
595; CHECK-SM80-NOF16-NEXT:    .reg .b32 %r<4>;
596; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<7>;
597; CHECK-SM80-NOF16-EMPTY:
598; CHECK-SM80-NOF16-NEXT:  // %bb.0:
599; CHECK-SM80-NOF16-NEXT:    ld.param.b32 %r1, [minnum_v2half_param_0];
600; CHECK-SM80-NOF16-NEXT:    ld.param.b32 %r2, [minnum_v2half_param_1];
601; CHECK-SM80-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
602; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
603; CHECK-SM80-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
604; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
605; CHECK-SM80-NOF16-NEXT:    min.f32 %f3, %f2, %f1;
606; CHECK-SM80-NOF16-NEXT:    cvt.rn.f16.f32 %rs5, %f3;
607; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f4, %rs1;
608; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f5, %rs3;
609; CHECK-SM80-NOF16-NEXT:    min.f32 %f6, %f5, %f4;
610; CHECK-SM80-NOF16-NEXT:    cvt.rn.f16.f32 %rs6, %f6;
611; CHECK-SM80-NOF16-NEXT:    mov.b32 %r3, {%rs6, %rs5};
612; CHECK-SM80-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
613; CHECK-SM80-NOF16-NEXT:    ret;
614  %x = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
615  ret <2 x half> %x
616}
617
618; ---- minimum ----
619
620define half @minimum_half(half %a, half %b) {
621; CHECK-NOF16-LABEL: minimum_half(
622; CHECK-NOF16:       {
623; CHECK-NOF16-NEXT:    .reg .pred %p<6>;
624; CHECK-NOF16-NEXT:    .reg .b16 %rs<8>;
625; CHECK-NOF16-NEXT:    .reg .f32 %f<4>;
626; CHECK-NOF16-EMPTY:
627; CHECK-NOF16-NEXT:  // %bb.0:
628; CHECK-NOF16-NEXT:    ld.param.b16 %rs1, [minimum_half_param_0];
629; CHECK-NOF16-NEXT:    ld.param.b16 %rs2, [minimum_half_param_1];
630; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
631; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs1;
632; CHECK-NOF16-NEXT:    setp.lt.f32 %p1, %f2, %f1;
633; CHECK-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
634; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %f2, %f1;
635; CHECK-NOF16-NEXT:    selp.b16 %rs4, 0x7E00, %rs3, %p2;
636; CHECK-NOF16-NEXT:    setp.eq.s16 %p3, %rs1, -32768;
637; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
638; CHECK-NOF16-NEXT:    setp.eq.s16 %p4, %rs2, -32768;
639; CHECK-NOF16-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
640; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs4;
641; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %f3, 0f00000000;
642; CHECK-NOF16-NEXT:    selp.b16 %rs7, %rs6, %rs4, %p5;
643; CHECK-NOF16-NEXT:    st.param.b16 [func_retval0], %rs7;
644; CHECK-NOF16-NEXT:    ret;
645;
646; CHECK-F16-LABEL: minimum_half(
647; CHECK-F16:       {
648; CHECK-F16-NEXT:    .reg .b16 %rs<4>;
649; CHECK-F16-EMPTY:
650; CHECK-F16-NEXT:  // %bb.0:
651; CHECK-F16-NEXT:    ld.param.b16 %rs1, [minimum_half_param_0];
652; CHECK-F16-NEXT:    ld.param.b16 %rs2, [minimum_half_param_1];
653; CHECK-F16-NEXT:    min.NaN.f16 %rs3, %rs1, %rs2;
654; CHECK-F16-NEXT:    st.param.b16 [func_retval0], %rs3;
655; CHECK-F16-NEXT:    ret;
656;
657; CHECK-SM80-NOF16-LABEL: minimum_half(
658; CHECK-SM80-NOF16:       {
659; CHECK-SM80-NOF16-NEXT:    .reg .pred %p<6>;
660; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<8>;
661; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<4>;
662; CHECK-SM80-NOF16-EMPTY:
663; CHECK-SM80-NOF16-NEXT:  // %bb.0:
664; CHECK-SM80-NOF16-NEXT:    ld.param.b16 %rs1, [minimum_half_param_0];
665; CHECK-SM80-NOF16-NEXT:    ld.param.b16 %rs2, [minimum_half_param_1];
666; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
667; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f2, %rs1;
668; CHECK-SM80-NOF16-NEXT:    setp.lt.f32 %p1, %f2, %f1;
669; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
670; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p2, %f2, %f1;
671; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs4, 0x7E00, %rs3, %p2;
672; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p3, %rs1, -32768;
673; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
674; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p4, %rs2, -32768;
675; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
676; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f3, %rs4;
677; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %f3, 0f00000000;
678; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs7, %rs6, %rs4, %p5;
679; CHECK-SM80-NOF16-NEXT:    st.param.b16 [func_retval0], %rs7;
680; CHECK-SM80-NOF16-NEXT:    ret;
681  %x = call half @llvm.minimum.f16(half %a, half %b)
682  ret half %x
683}
684
685define float @minimum_float(float %a, float %b) {
686; CHECK-NOF16-LABEL: minimum_float(
687; CHECK-NOF16:       {
688; CHECK-NOF16-NEXT:    .reg .pred %p<5>;
689; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
690; CHECK-NOF16-NEXT:    .reg .f32 %f<8>;
691; CHECK-NOF16-EMPTY:
692; CHECK-NOF16-NEXT:  // %bb.0:
693; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [minimum_float_param_0];
694; CHECK-NOF16-NEXT:    mov.b32 %r1, %f1;
695; CHECK-NOF16-NEXT:    ld.param.f32 %f2, [minimum_float_param_1];
696; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f2;
697; CHECK-NOF16-NEXT:    min.f32 %f3, %f1, %f2;
698; CHECK-NOF16-NEXT:    selp.f32 %f4, 0f7FC00000, %f3, %p1;
699; CHECK-NOF16-NEXT:    setp.eq.s32 %p2, %r1, -2147483648;
700; CHECK-NOF16-NEXT:    selp.f32 %f5, %f1, %f4, %p2;
701; CHECK-NOF16-NEXT:    mov.b32 %r2, %f2;
702; CHECK-NOF16-NEXT:    setp.eq.s32 %p3, %r2, -2147483648;
703; CHECK-NOF16-NEXT:    selp.f32 %f6, %f2, %f5, %p3;
704; CHECK-NOF16-NEXT:    setp.eq.f32 %p4, %f4, 0f00000000;
705; CHECK-NOF16-NEXT:    selp.f32 %f7, %f6, %f4, %p4;
706; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f7;
707; CHECK-NOF16-NEXT:    ret;
708;
709; CHECK-F16-LABEL: minimum_float(
710; CHECK-F16:       {
711; CHECK-F16-NEXT:    .reg .f32 %f<4>;
712; CHECK-F16-EMPTY:
713; CHECK-F16-NEXT:  // %bb.0:
714; CHECK-F16-NEXT:    ld.param.f32 %f1, [minimum_float_param_0];
715; CHECK-F16-NEXT:    ld.param.f32 %f2, [minimum_float_param_1];
716; CHECK-F16-NEXT:    min.NaN.f32 %f3, %f1, %f2;
717; CHECK-F16-NEXT:    st.param.f32 [func_retval0], %f3;
718; CHECK-F16-NEXT:    ret;
719;
720; CHECK-SM80-NOF16-LABEL: minimum_float(
721; CHECK-SM80-NOF16:       {
722; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<4>;
723; CHECK-SM80-NOF16-EMPTY:
724; CHECK-SM80-NOF16-NEXT:  // %bb.0:
725; CHECK-SM80-NOF16-NEXT:    ld.param.f32 %f1, [minimum_float_param_0];
726; CHECK-SM80-NOF16-NEXT:    ld.param.f32 %f2, [minimum_float_param_1];
727; CHECK-SM80-NOF16-NEXT:    min.NaN.f32 %f3, %f1, %f2;
728; CHECK-SM80-NOF16-NEXT:    st.param.f32 [func_retval0], %f3;
729; CHECK-SM80-NOF16-NEXT:    ret;
730  %x = call float @llvm.minimum.f32(float %a, float %b)
731  ret float %x
732}
733
734define float @minimum_imm1(float %a) {
735; CHECK-NOF16-LABEL: minimum_imm1(
736; CHECK-NOF16:       {
737; CHECK-NOF16-NEXT:    .reg .pred %p<4>;
738; CHECK-NOF16-NEXT:    .reg .b32 %r<2>;
739; CHECK-NOF16-NEXT:    .reg .f32 %f<6>;
740; CHECK-NOF16-EMPTY:
741; CHECK-NOF16-NEXT:  // %bb.0:
742; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [minimum_imm1_param_0];
743; CHECK-NOF16-NEXT:    mov.b32 %r1, %f1;
744; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f1;
745; CHECK-NOF16-NEXT:    min.f32 %f2, %f1, 0f00000000;
746; CHECK-NOF16-NEXT:    selp.f32 %f3, 0f7FC00000, %f2, %p1;
747; CHECK-NOF16-NEXT:    setp.eq.s32 %p2, %r1, -2147483648;
748; CHECK-NOF16-NEXT:    selp.f32 %f4, %f1, %f3, %p2;
749; CHECK-NOF16-NEXT:    setp.eq.f32 %p3, %f3, 0f00000000;
750; CHECK-NOF16-NEXT:    selp.f32 %f5, %f4, %f3, %p3;
751; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f5;
752; CHECK-NOF16-NEXT:    ret;
753;
754; CHECK-F16-LABEL: minimum_imm1(
755; CHECK-F16:       {
756; CHECK-F16-NEXT:    .reg .f32 %f<3>;
757; CHECK-F16-EMPTY:
758; CHECK-F16-NEXT:  // %bb.0:
759; CHECK-F16-NEXT:    ld.param.f32 %f1, [minimum_imm1_param_0];
760; CHECK-F16-NEXT:    min.NaN.f32 %f2, %f1, 0f00000000;
761; CHECK-F16-NEXT:    st.param.f32 [func_retval0], %f2;
762; CHECK-F16-NEXT:    ret;
763;
764; CHECK-SM80-NOF16-LABEL: minimum_imm1(
765; CHECK-SM80-NOF16:       {
766; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<3>;
767; CHECK-SM80-NOF16-EMPTY:
768; CHECK-SM80-NOF16-NEXT:  // %bb.0:
769; CHECK-SM80-NOF16-NEXT:    ld.param.f32 %f1, [minimum_imm1_param_0];
770; CHECK-SM80-NOF16-NEXT:    min.NaN.f32 %f2, %f1, 0f00000000;
771; CHECK-SM80-NOF16-NEXT:    st.param.f32 [func_retval0], %f2;
772; CHECK-SM80-NOF16-NEXT:    ret;
773  %x = call float @llvm.minimum.f32(float %a, float 0.0)
774  ret float %x
775}
776
777define float @minimum_imm2(float %a) {
778; CHECK-NOF16-LABEL: minimum_imm2(
779; CHECK-NOF16:       {
780; CHECK-NOF16-NEXT:    .reg .pred %p<4>;
781; CHECK-NOF16-NEXT:    .reg .b32 %r<2>;
782; CHECK-NOF16-NEXT:    .reg .f32 %f<6>;
783; CHECK-NOF16-EMPTY:
784; CHECK-NOF16-NEXT:  // %bb.0:
785; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [minimum_imm2_param_0];
786; CHECK-NOF16-NEXT:    mov.b32 %r1, %f1;
787; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f1;
788; CHECK-NOF16-NEXT:    min.f32 %f2, %f1, 0f00000000;
789; CHECK-NOF16-NEXT:    selp.f32 %f3, 0f7FC00000, %f2, %p1;
790; CHECK-NOF16-NEXT:    setp.eq.s32 %p2, %r1, -2147483648;
791; CHECK-NOF16-NEXT:    selp.f32 %f4, %f1, %f3, %p2;
792; CHECK-NOF16-NEXT:    setp.eq.f32 %p3, %f3, 0f00000000;
793; CHECK-NOF16-NEXT:    selp.f32 %f5, %f4, %f3, %p3;
794; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f5;
795; CHECK-NOF16-NEXT:    ret;
796;
797; CHECK-F16-LABEL: minimum_imm2(
798; CHECK-F16:       {
799; CHECK-F16-NEXT:    .reg .f32 %f<3>;
800; CHECK-F16-EMPTY:
801; CHECK-F16-NEXT:  // %bb.0:
802; CHECK-F16-NEXT:    ld.param.f32 %f1, [minimum_imm2_param_0];
803; CHECK-F16-NEXT:    min.NaN.f32 %f2, %f1, 0f00000000;
804; CHECK-F16-NEXT:    st.param.f32 [func_retval0], %f2;
805; CHECK-F16-NEXT:    ret;
806;
807; CHECK-SM80-NOF16-LABEL: minimum_imm2(
808; CHECK-SM80-NOF16:       {
809; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<3>;
810; CHECK-SM80-NOF16-EMPTY:
811; CHECK-SM80-NOF16-NEXT:  // %bb.0:
812; CHECK-SM80-NOF16-NEXT:    ld.param.f32 %f1, [minimum_imm2_param_0];
813; CHECK-SM80-NOF16-NEXT:    min.NaN.f32 %f2, %f1, 0f00000000;
814; CHECK-SM80-NOF16-NEXT:    st.param.f32 [func_retval0], %f2;
815; CHECK-SM80-NOF16-NEXT:    ret;
816  %x = call float @llvm.minimum.f32(float 0.0, float %a)
817  ret float %x
818}
819
820define float @minimum_float_ftz(float %a, float %b) #1 {
821; CHECK-NOF16-LABEL: minimum_float_ftz(
822; CHECK-NOF16:       {
823; CHECK-NOF16-NEXT:    .reg .pred %p<5>;
824; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
825; CHECK-NOF16-NEXT:    .reg .f32 %f<8>;
826; CHECK-NOF16-EMPTY:
827; CHECK-NOF16-NEXT:  // %bb.0:
828; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [minimum_float_ftz_param_0];
829; CHECK-NOF16-NEXT:    mov.b32 %r1, %f1;
830; CHECK-NOF16-NEXT:    ld.param.f32 %f2, [minimum_float_ftz_param_1];
831; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %f1, %f2;
832; CHECK-NOF16-NEXT:    min.ftz.f32 %f3, %f1, %f2;
833; CHECK-NOF16-NEXT:    selp.f32 %f4, 0f7FC00000, %f3, %p1;
834; CHECK-NOF16-NEXT:    setp.eq.s32 %p2, %r1, -2147483648;
835; CHECK-NOF16-NEXT:    selp.f32 %f5, %f1, %f4, %p2;
836; CHECK-NOF16-NEXT:    mov.b32 %r2, %f2;
837; CHECK-NOF16-NEXT:    setp.eq.s32 %p3, %r2, -2147483648;
838; CHECK-NOF16-NEXT:    selp.f32 %f6, %f2, %f5, %p3;
839; CHECK-NOF16-NEXT:    setp.eq.ftz.f32 %p4, %f4, 0f00000000;
840; CHECK-NOF16-NEXT:    selp.f32 %f7, %f6, %f4, %p4;
841; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f7;
842; CHECK-NOF16-NEXT:    ret;
843;
844; CHECK-F16-LABEL: minimum_float_ftz(
845; CHECK-F16:       {
846; CHECK-F16-NEXT:    .reg .f32 %f<4>;
847; CHECK-F16-EMPTY:
848; CHECK-F16-NEXT:  // %bb.0:
849; CHECK-F16-NEXT:    ld.param.f32 %f1, [minimum_float_ftz_param_0];
850; CHECK-F16-NEXT:    ld.param.f32 %f2, [minimum_float_ftz_param_1];
851; CHECK-F16-NEXT:    min.NaN.ftz.f32 %f3, %f1, %f2;
852; CHECK-F16-NEXT:    st.param.f32 [func_retval0], %f3;
853; CHECK-F16-NEXT:    ret;
854;
855; CHECK-SM80-NOF16-LABEL: minimum_float_ftz(
856; CHECK-SM80-NOF16:       {
857; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<4>;
858; CHECK-SM80-NOF16-EMPTY:
859; CHECK-SM80-NOF16-NEXT:  // %bb.0:
860; CHECK-SM80-NOF16-NEXT:    ld.param.f32 %f1, [minimum_float_ftz_param_0];
861; CHECK-SM80-NOF16-NEXT:    ld.param.f32 %f2, [minimum_float_ftz_param_1];
862; CHECK-SM80-NOF16-NEXT:    min.NaN.ftz.f32 %f3, %f1, %f2;
863; CHECK-SM80-NOF16-NEXT:    st.param.f32 [func_retval0], %f3;
864; CHECK-SM80-NOF16-NEXT:    ret;
865  %x = call float @llvm.minimum.f32(float %a, float %b)
866  ret float %x
867}
868
869define double @minimum_double(double %a, double %b) {
870; CHECK-LABEL: minimum_double(
871; CHECK:       {
872; CHECK-NEXT:    .reg .pred %p<5>;
873; CHECK-NEXT:    .reg .b64 %rd<3>;
874; CHECK-NEXT:    .reg .f64 %fd<8>;
875; CHECK-EMPTY:
876; CHECK-NEXT:  // %bb.0:
877; CHECK-NEXT:    ld.param.f64 %fd1, [minimum_double_param_0];
878; CHECK-NEXT:    mov.b64 %rd1, %fd1;
879; CHECK-NEXT:    ld.param.f64 %fd2, [minimum_double_param_1];
880; CHECK-NEXT:    setp.nan.f64 %p1, %fd1, %fd2;
881; CHECK-NEXT:    min.f64 %fd3, %fd1, %fd2;
882; CHECK-NEXT:    selp.f64 %fd4, 0d7FF8000000000000, %fd3, %p1;
883; CHECK-NEXT:    setp.eq.s64 %p2, %rd1, -9223372036854775808;
884; CHECK-NEXT:    selp.f64 %fd5, %fd1, %fd4, %p2;
885; CHECK-NEXT:    mov.b64 %rd2, %fd2;
886; CHECK-NEXT:    setp.eq.s64 %p3, %rd2, -9223372036854775808;
887; CHECK-NEXT:    selp.f64 %fd6, %fd2, %fd5, %p3;
888; CHECK-NEXT:    setp.eq.f64 %p4, %fd4, 0d0000000000000000;
889; CHECK-NEXT:    selp.f64 %fd7, %fd6, %fd4, %p4;
890; CHECK-NEXT:    st.param.f64 [func_retval0], %fd7;
891; CHECK-NEXT:    ret;
892  %x = call double @llvm.minimum.f64(double %a, double %b)
893  ret double %x
894}
895
896define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) {
897; CHECK-NOF16-LABEL: minimum_v2half(
898; CHECK-NOF16:       {
899; CHECK-NOF16-NEXT:    .reg .pred %p<11>;
900; CHECK-NOF16-NEXT:    .reg .b16 %rs<15>;
901; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
902; CHECK-NOF16-NEXT:    .reg .f32 %f<7>;
903; CHECK-NOF16-EMPTY:
904; CHECK-NOF16-NEXT:  // %bb.0:
905; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [minimum_v2half_param_0];
906; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [minimum_v2half_param_1];
907; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
908; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
909; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
910; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
911; CHECK-NOF16-NEXT:    setp.lt.f32 %p1, %f2, %f1;
912; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs4, %rs2, %p1;
913; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %f2, %f1;
914; CHECK-NOF16-NEXT:    selp.b16 %rs6, 0x7E00, %rs5, %p2;
915; CHECK-NOF16-NEXT:    setp.eq.s16 %p3, %rs4, -32768;
916; CHECK-NOF16-NEXT:    selp.b16 %rs7, %rs4, %rs6, %p3;
917; CHECK-NOF16-NEXT:    setp.eq.s16 %p4, %rs2, -32768;
918; CHECK-NOF16-NEXT:    selp.b16 %rs8, %rs2, %rs7, %p4;
919; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs6;
920; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %f3, 0f00000000;
921; CHECK-NOF16-NEXT:    selp.b16 %rs9, %rs8, %rs6, %p5;
922; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs1;
923; CHECK-NOF16-NEXT:    cvt.f32.f16 %f5, %rs3;
924; CHECK-NOF16-NEXT:    setp.lt.f32 %p6, %f5, %f4;
925; CHECK-NOF16-NEXT:    selp.b16 %rs10, %rs3, %rs1, %p6;
926; CHECK-NOF16-NEXT:    setp.nan.f32 %p7, %f5, %f4;
927; CHECK-NOF16-NEXT:    selp.b16 %rs11, 0x7E00, %rs10, %p7;
928; CHECK-NOF16-NEXT:    setp.eq.s16 %p8, %rs3, -32768;
929; CHECK-NOF16-NEXT:    selp.b16 %rs12, %rs3, %rs11, %p8;
930; CHECK-NOF16-NEXT:    setp.eq.s16 %p9, %rs1, -32768;
931; CHECK-NOF16-NEXT:    selp.b16 %rs13, %rs1, %rs12, %p9;
932; CHECK-NOF16-NEXT:    cvt.f32.f16 %f6, %rs11;
933; CHECK-NOF16-NEXT:    setp.eq.f32 %p10, %f6, 0f00000000;
934; CHECK-NOF16-NEXT:    selp.b16 %rs14, %rs13, %rs11, %p10;
935; CHECK-NOF16-NEXT:    mov.b32 %r3, {%rs14, %rs9};
936; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
937; CHECK-NOF16-NEXT:    ret;
938;
939; CHECK-F16-LABEL: minimum_v2half(
940; CHECK-F16:       {
941; CHECK-F16-NEXT:    .reg .b32 %r<4>;
942; CHECK-F16-EMPTY:
943; CHECK-F16-NEXT:  // %bb.0:
944; CHECK-F16-NEXT:    ld.param.b32 %r1, [minimum_v2half_param_1];
945; CHECK-F16-NEXT:    ld.param.b32 %r2, [minimum_v2half_param_0];
946; CHECK-F16-NEXT:    min.NaN.f16x2 %r3, %r2, %r1;
947; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r3;
948; CHECK-F16-NEXT:    ret;
949;
950; CHECK-SM80-NOF16-LABEL: minimum_v2half(
951; CHECK-SM80-NOF16:       {
952; CHECK-SM80-NOF16-NEXT:    .reg .pred %p<11>;
953; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<15>;
954; CHECK-SM80-NOF16-NEXT:    .reg .b32 %r<4>;
955; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<7>;
956; CHECK-SM80-NOF16-EMPTY:
957; CHECK-SM80-NOF16-NEXT:  // %bb.0:
958; CHECK-SM80-NOF16-NEXT:    ld.param.b32 %r1, [minimum_v2half_param_0];
959; CHECK-SM80-NOF16-NEXT:    ld.param.b32 %r2, [minimum_v2half_param_1];
960; CHECK-SM80-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
961; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
962; CHECK-SM80-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
963; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
964; CHECK-SM80-NOF16-NEXT:    setp.lt.f32 %p1, %f2, %f1;
965; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs4, %rs2, %p1;
966; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p2, %f2, %f1;
967; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, 0x7E00, %rs5, %p2;
968; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p3, %rs4, -32768;
969; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs7, %rs4, %rs6, %p3;
970; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p4, %rs2, -32768;
971; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs8, %rs2, %rs7, %p4;
972; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f3, %rs6;
973; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %f3, 0f00000000;
974; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs9, %rs8, %rs6, %p5;
975; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f4, %rs1;
976; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f5, %rs3;
977; CHECK-SM80-NOF16-NEXT:    setp.lt.f32 %p6, %f5, %f4;
978; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs10, %rs3, %rs1, %p6;
979; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p7, %f5, %f4;
980; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs11, 0x7E00, %rs10, %p7;
981; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p8, %rs3, -32768;
982; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs12, %rs3, %rs11, %p8;
983; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p9, %rs1, -32768;
984; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs13, %rs1, %rs12, %p9;
985; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f6, %rs11;
986; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p10, %f6, 0f00000000;
987; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs14, %rs13, %rs11, %p10;
988; CHECK-SM80-NOF16-NEXT:    mov.b32 %r3, {%rs14, %rs9};
989; CHECK-SM80-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
990; CHECK-SM80-NOF16-NEXT:    ret;
991  %x = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b)
992  ret <2 x half> %x
993}
994
995; ---- maxnum ----
996
997define half @maxnum_half(half %a, half %b) {
998; CHECK-NOF16-LABEL: maxnum_half(
999; CHECK-NOF16:       {
1000; CHECK-NOF16-NEXT:    .reg .b16 %rs<4>;
1001; CHECK-NOF16-NEXT:    .reg .f32 %f<4>;
1002; CHECK-NOF16-EMPTY:
1003; CHECK-NOF16-NEXT:  // %bb.0:
1004; CHECK-NOF16-NEXT:    ld.param.b16 %rs1, [maxnum_half_param_0];
1005; CHECK-NOF16-NEXT:    ld.param.b16 %rs2, [maxnum_half_param_1];
1006; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1007; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs1;
1008; CHECK-NOF16-NEXT:    max.f32 %f3, %f2, %f1;
1009; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs3, %f3;
1010; CHECK-NOF16-NEXT:    st.param.b16 [func_retval0], %rs3;
1011; CHECK-NOF16-NEXT:    ret;
1012;
1013; CHECK-F16-LABEL: maxnum_half(
1014; CHECK-F16:       {
1015; CHECK-F16-NEXT:    .reg .b16 %rs<4>;
1016; CHECK-F16-EMPTY:
1017; CHECK-F16-NEXT:  // %bb.0:
1018; CHECK-F16-NEXT:    ld.param.b16 %rs1, [maxnum_half_param_0];
1019; CHECK-F16-NEXT:    ld.param.b16 %rs2, [maxnum_half_param_1];
1020; CHECK-F16-NEXT:    max.f16 %rs3, %rs1, %rs2;
1021; CHECK-F16-NEXT:    st.param.b16 [func_retval0], %rs3;
1022; CHECK-F16-NEXT:    ret;
1023;
1024; CHECK-SM80-NOF16-LABEL: maxnum_half(
1025; CHECK-SM80-NOF16:       {
1026; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<4>;
1027; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<4>;
1028; CHECK-SM80-NOF16-EMPTY:
1029; CHECK-SM80-NOF16-NEXT:  // %bb.0:
1030; CHECK-SM80-NOF16-NEXT:    ld.param.b16 %rs1, [maxnum_half_param_0];
1031; CHECK-SM80-NOF16-NEXT:    ld.param.b16 %rs2, [maxnum_half_param_1];
1032; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1033; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f2, %rs1;
1034; CHECK-SM80-NOF16-NEXT:    max.f32 %f3, %f2, %f1;
1035; CHECK-SM80-NOF16-NEXT:    cvt.rn.f16.f32 %rs3, %f3;
1036; CHECK-SM80-NOF16-NEXT:    st.param.b16 [func_retval0], %rs3;
1037; CHECK-SM80-NOF16-NEXT:    ret;
1038  %x = call half @llvm.maxnum.f16(half %a, half %b)
1039  ret half %x
1040}
1041
1042define float @maxnum_imm1(float %a) {
1043; CHECK-LABEL: maxnum_imm1(
1044; CHECK:       {
1045; CHECK-NEXT:    .reg .f32 %f<3>;
1046; CHECK-EMPTY:
1047; CHECK-NEXT:  // %bb.0:
1048; CHECK-NEXT:    ld.param.f32 %f1, [maxnum_imm1_param_0];
1049; CHECK-NEXT:    max.f32 %f2, %f1, 0f00000000;
1050; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
1051; CHECK-NEXT:    ret;
1052  %x = call float @llvm.maxnum.f32(float %a, float 0.0)
1053  ret float %x
1054}
1055
1056define float @maxnum_imm2(float %a) {
1057; CHECK-LABEL: maxnum_imm2(
1058; CHECK:       {
1059; CHECK-NEXT:    .reg .f32 %f<3>;
1060; CHECK-EMPTY:
1061; CHECK-NEXT:  // %bb.0:
1062; CHECK-NEXT:    ld.param.f32 %f1, [maxnum_imm2_param_0];
1063; CHECK-NEXT:    max.f32 %f2, %f1, 0f00000000;
1064; CHECK-NEXT:    st.param.f32 [func_retval0], %f2;
1065; CHECK-NEXT:    ret;
1066  %x = call float @llvm.maxnum.f32(float 0.0, float %a)
1067  ret float %x
1068}
1069
1070define float @maxnum_float(float %a, float %b) {
1071; CHECK-LABEL: maxnum_float(
1072; CHECK:       {
1073; CHECK-NEXT:    .reg .f32 %f<4>;
1074; CHECK-EMPTY:
1075; CHECK-NEXT:  // %bb.0:
1076; CHECK-NEXT:    ld.param.f32 %f1, [maxnum_float_param_0];
1077; CHECK-NEXT:    ld.param.f32 %f2, [maxnum_float_param_1];
1078; CHECK-NEXT:    max.f32 %f3, %f1, %f2;
1079; CHECK-NEXT:    st.param.f32 [func_retval0], %f3;
1080; CHECK-NEXT:    ret;
1081  %x = call float @llvm.maxnum.f32(float %a, float %b)
1082  ret float %x
1083}
1084
1085define float @maxnum_float_ftz(float %a, float %b) #1 {
1086; CHECK-LABEL: maxnum_float_ftz(
1087; CHECK:       {
1088; CHECK-NEXT:    .reg .f32 %f<4>;
1089; CHECK-EMPTY:
1090; CHECK-NEXT:  // %bb.0:
1091; CHECK-NEXT:    ld.param.f32 %f1, [maxnum_float_ftz_param_0];
1092; CHECK-NEXT:    ld.param.f32 %f2, [maxnum_float_ftz_param_1];
1093; CHECK-NEXT:    max.ftz.f32 %f3, %f1, %f2;
1094; CHECK-NEXT:    st.param.f32 [func_retval0], %f3;
1095; CHECK-NEXT:    ret;
1096  %x = call float @llvm.maxnum.f32(float %a, float %b)
1097  ret float %x
1098}
1099
1100define double @maxnum_double(double %a, double %b) {
1101; CHECK-LABEL: maxnum_double(
1102; CHECK:       {
1103; CHECK-NEXT:    .reg .f64 %fd<4>;
1104; CHECK-EMPTY:
1105; CHECK-NEXT:  // %bb.0:
1106; CHECK-NEXT:    ld.param.f64 %fd1, [maxnum_double_param_0];
1107; CHECK-NEXT:    ld.param.f64 %fd2, [maxnum_double_param_1];
1108; CHECK-NEXT:    max.f64 %fd3, %fd1, %fd2;
1109; CHECK-NEXT:    st.param.f64 [func_retval0], %fd3;
1110; CHECK-NEXT:    ret;
1111  %x = call double @llvm.maxnum.f64(double %a, double %b)
1112  ret double %x
1113}
1114
1115define <2 x half> @maxnum_v2half(<2 x half> %a, <2 x half> %b) {
1116; CHECK-NOF16-LABEL: maxnum_v2half(
1117; CHECK-NOF16:       {
1118; CHECK-NOF16-NEXT:    .reg .b16 %rs<7>;
1119; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
1120; CHECK-NOF16-NEXT:    .reg .f32 %f<7>;
1121; CHECK-NOF16-EMPTY:
1122; CHECK-NOF16-NEXT:  // %bb.0:
1123; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [maxnum_v2half_param_0];
1124; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [maxnum_v2half_param_1];
1125; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
1126; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1127; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
1128; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
1129; CHECK-NOF16-NEXT:    max.f32 %f3, %f2, %f1;
1130; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs5, %f3;
1131; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs1;
1132; CHECK-NOF16-NEXT:    cvt.f32.f16 %f5, %rs3;
1133; CHECK-NOF16-NEXT:    max.f32 %f6, %f5, %f4;
1134; CHECK-NOF16-NEXT:    cvt.rn.f16.f32 %rs6, %f6;
1135; CHECK-NOF16-NEXT:    mov.b32 %r3, {%rs6, %rs5};
1136; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
1137; CHECK-NOF16-NEXT:    ret;
1138;
1139; CHECK-F16-LABEL: maxnum_v2half(
1140; CHECK-F16:       {
1141; CHECK-F16-NEXT:    .reg .b32 %r<4>;
1142; CHECK-F16-EMPTY:
1143; CHECK-F16-NEXT:  // %bb.0:
1144; CHECK-F16-NEXT:    ld.param.b32 %r1, [maxnum_v2half_param_1];
1145; CHECK-F16-NEXT:    ld.param.b32 %r2, [maxnum_v2half_param_0];
1146; CHECK-F16-NEXT:    max.f16x2 %r3, %r2, %r1;
1147; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r3;
1148; CHECK-F16-NEXT:    ret;
1149;
1150; CHECK-SM80-NOF16-LABEL: maxnum_v2half(
1151; CHECK-SM80-NOF16:       {
1152; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<7>;
1153; CHECK-SM80-NOF16-NEXT:    .reg .b32 %r<4>;
1154; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<7>;
1155; CHECK-SM80-NOF16-EMPTY:
1156; CHECK-SM80-NOF16-NEXT:  // %bb.0:
1157; CHECK-SM80-NOF16-NEXT:    ld.param.b32 %r1, [maxnum_v2half_param_0];
1158; CHECK-SM80-NOF16-NEXT:    ld.param.b32 %r2, [maxnum_v2half_param_1];
1159; CHECK-SM80-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
1160; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1161; CHECK-SM80-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
1162; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
1163; CHECK-SM80-NOF16-NEXT:    max.f32 %f3, %f2, %f1;
1164; CHECK-SM80-NOF16-NEXT:    cvt.rn.f16.f32 %rs5, %f3;
1165; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f4, %rs1;
1166; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f5, %rs3;
1167; CHECK-SM80-NOF16-NEXT:    max.f32 %f6, %f5, %f4;
1168; CHECK-SM80-NOF16-NEXT:    cvt.rn.f16.f32 %rs6, %f6;
1169; CHECK-SM80-NOF16-NEXT:    mov.b32 %r3, {%rs6, %rs5};
1170; CHECK-SM80-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
1171; CHECK-SM80-NOF16-NEXT:    ret;
1172  %x = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
1173  ret <2 x half> %x
1174}
1175
1176; ---- maximum ----
1177
1178define half @maximum_half(half %a, half %b) {
1179; CHECK-NOF16-LABEL: maximum_half(
1180; CHECK-NOF16:       {
1181; CHECK-NOF16-NEXT:    .reg .pred %p<6>;
1182; CHECK-NOF16-NEXT:    .reg .b16 %rs<8>;
1183; CHECK-NOF16-NEXT:    .reg .f32 %f<4>;
1184; CHECK-NOF16-EMPTY:
1185; CHECK-NOF16-NEXT:  // %bb.0:
1186; CHECK-NOF16-NEXT:    ld.param.b16 %rs1, [maximum_half_param_0];
1187; CHECK-NOF16-NEXT:    ld.param.b16 %rs2, [maximum_half_param_1];
1188; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1189; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs1;
1190; CHECK-NOF16-NEXT:    setp.gt.f32 %p1, %f2, %f1;
1191; CHECK-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
1192; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %f2, %f1;
1193; CHECK-NOF16-NEXT:    selp.b16 %rs4, 0x7E00, %rs3, %p2;
1194; CHECK-NOF16-NEXT:    setp.eq.s16 %p3, %rs1, 0;
1195; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
1196; CHECK-NOF16-NEXT:    setp.eq.s16 %p4, %rs2, 0;
1197; CHECK-NOF16-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
1198; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs4;
1199; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %f3, 0f00000000;
1200; CHECK-NOF16-NEXT:    selp.b16 %rs7, %rs6, %rs4, %p5;
1201; CHECK-NOF16-NEXT:    st.param.b16 [func_retval0], %rs7;
1202; CHECK-NOF16-NEXT:    ret;
1203;
1204; CHECK-F16-LABEL: maximum_half(
1205; CHECK-F16:       {
1206; CHECK-F16-NEXT:    .reg .b16 %rs<4>;
1207; CHECK-F16-EMPTY:
1208; CHECK-F16-NEXT:  // %bb.0:
1209; CHECK-F16-NEXT:    ld.param.b16 %rs1, [maximum_half_param_0];
1210; CHECK-F16-NEXT:    ld.param.b16 %rs2, [maximum_half_param_1];
1211; CHECK-F16-NEXT:    max.NaN.f16 %rs3, %rs1, %rs2;
1212; CHECK-F16-NEXT:    st.param.b16 [func_retval0], %rs3;
1213; CHECK-F16-NEXT:    ret;
1214;
1215; CHECK-SM80-NOF16-LABEL: maximum_half(
1216; CHECK-SM80-NOF16:       {
1217; CHECK-SM80-NOF16-NEXT:    .reg .pred %p<6>;
1218; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<8>;
1219; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<4>;
1220; CHECK-SM80-NOF16-EMPTY:
1221; CHECK-SM80-NOF16-NEXT:  // %bb.0:
1222; CHECK-SM80-NOF16-NEXT:    ld.param.b16 %rs1, [maximum_half_param_0];
1223; CHECK-SM80-NOF16-NEXT:    ld.param.b16 %rs2, [maximum_half_param_1];
1224; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1225; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f2, %rs1;
1226; CHECK-SM80-NOF16-NEXT:    setp.gt.f32 %p1, %f2, %f1;
1227; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
1228; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p2, %f2, %f1;
1229; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs4, 0x7E00, %rs3, %p2;
1230; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p3, %rs1, 0;
1231; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
1232; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p4, %rs2, 0;
1233; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
1234; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f3, %rs4;
1235; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %f3, 0f00000000;
1236; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs7, %rs6, %rs4, %p5;
1237; CHECK-SM80-NOF16-NEXT:    st.param.b16 [func_retval0], %rs7;
1238; CHECK-SM80-NOF16-NEXT:    ret;
1239  %x = call half @llvm.maximum.f16(half %a, half %b)
1240  ret half %x
1241}
1242
1243define float @maximum_imm1(float %a) {
1244; CHECK-NOF16-LABEL: maximum_imm1(
1245; CHECK-NOF16:       {
1246; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
1247; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
1248; CHECK-NOF16-EMPTY:
1249; CHECK-NOF16-NEXT:  // %bb.0:
1250; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [maximum_imm1_param_0];
1251; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f1;
1252; CHECK-NOF16-NEXT:    max.f32 %f2, %f1, 0f00000000;
1253; CHECK-NOF16-NEXT:    selp.f32 %f3, 0f7FC00000, %f2, %p1;
1254; CHECK-NOF16-NEXT:    setp.eq.f32 %p2, %f3, 0f00000000;
1255; CHECK-NOF16-NEXT:    selp.f32 %f4, 0f00000000, %f3, %p2;
1256; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f4;
1257; CHECK-NOF16-NEXT:    ret;
1258;
1259; CHECK-F16-LABEL: maximum_imm1(
1260; CHECK-F16:       {
1261; CHECK-F16-NEXT:    .reg .f32 %f<3>;
1262; CHECK-F16-EMPTY:
1263; CHECK-F16-NEXT:  // %bb.0:
1264; CHECK-F16-NEXT:    ld.param.f32 %f1, [maximum_imm1_param_0];
1265; CHECK-F16-NEXT:    max.NaN.f32 %f2, %f1, 0f00000000;
1266; CHECK-F16-NEXT:    st.param.f32 [func_retval0], %f2;
1267; CHECK-F16-NEXT:    ret;
1268;
1269; CHECK-SM80-NOF16-LABEL: maximum_imm1(
1270; CHECK-SM80-NOF16:       {
1271; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<3>;
1272; CHECK-SM80-NOF16-EMPTY:
1273; CHECK-SM80-NOF16-NEXT:  // %bb.0:
1274; CHECK-SM80-NOF16-NEXT:    ld.param.f32 %f1, [maximum_imm1_param_0];
1275; CHECK-SM80-NOF16-NEXT:    max.NaN.f32 %f2, %f1, 0f00000000;
1276; CHECK-SM80-NOF16-NEXT:    st.param.f32 [func_retval0], %f2;
1277; CHECK-SM80-NOF16-NEXT:    ret;
1278  %x = call float @llvm.maximum.f32(float %a, float 0.0)
1279  ret float %x
1280}
1281
1282define float @maximum_imm2(float %a) {
1283; CHECK-NOF16-LABEL: maximum_imm2(
1284; CHECK-NOF16:       {
1285; CHECK-NOF16-NEXT:    .reg .pred %p<3>;
1286; CHECK-NOF16-NEXT:    .reg .f32 %f<5>;
1287; CHECK-NOF16-EMPTY:
1288; CHECK-NOF16-NEXT:  // %bb.0:
1289; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [maximum_imm2_param_0];
1290; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f1;
1291; CHECK-NOF16-NEXT:    max.f32 %f2, %f1, 0f00000000;
1292; CHECK-NOF16-NEXT:    selp.f32 %f3, 0f7FC00000, %f2, %p1;
1293; CHECK-NOF16-NEXT:    setp.eq.f32 %p2, %f3, 0f00000000;
1294; CHECK-NOF16-NEXT:    selp.f32 %f4, 0f00000000, %f3, %p2;
1295; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f4;
1296; CHECK-NOF16-NEXT:    ret;
1297;
1298; CHECK-F16-LABEL: maximum_imm2(
1299; CHECK-F16:       {
1300; CHECK-F16-NEXT:    .reg .f32 %f<3>;
1301; CHECK-F16-EMPTY:
1302; CHECK-F16-NEXT:  // %bb.0:
1303; CHECK-F16-NEXT:    ld.param.f32 %f1, [maximum_imm2_param_0];
1304; CHECK-F16-NEXT:    max.NaN.f32 %f2, %f1, 0f00000000;
1305; CHECK-F16-NEXT:    st.param.f32 [func_retval0], %f2;
1306; CHECK-F16-NEXT:    ret;
1307;
1308; CHECK-SM80-NOF16-LABEL: maximum_imm2(
1309; CHECK-SM80-NOF16:       {
1310; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<3>;
1311; CHECK-SM80-NOF16-EMPTY:
1312; CHECK-SM80-NOF16-NEXT:  // %bb.0:
1313; CHECK-SM80-NOF16-NEXT:    ld.param.f32 %f1, [maximum_imm2_param_0];
1314; CHECK-SM80-NOF16-NEXT:    max.NaN.f32 %f2, %f1, 0f00000000;
1315; CHECK-SM80-NOF16-NEXT:    st.param.f32 [func_retval0], %f2;
1316; CHECK-SM80-NOF16-NEXT:    ret;
1317  %x = call float @llvm.maximum.f32(float 0.0, float %a)
1318  ret float %x
1319}
1320
1321define float @maximum_float(float %a, float %b) {
1322; CHECK-NOF16-LABEL: maximum_float(
1323; CHECK-NOF16:       {
1324; CHECK-NOF16-NEXT:    .reg .pred %p<5>;
1325; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
1326; CHECK-NOF16-NEXT:    .reg .f32 %f<8>;
1327; CHECK-NOF16-EMPTY:
1328; CHECK-NOF16-NEXT:  // %bb.0:
1329; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [maximum_float_param_0];
1330; CHECK-NOF16-NEXT:    mov.b32 %r1, %f1;
1331; CHECK-NOF16-NEXT:    ld.param.f32 %f2, [maximum_float_param_1];
1332; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %f1, %f2;
1333; CHECK-NOF16-NEXT:    max.f32 %f3, %f1, %f2;
1334; CHECK-NOF16-NEXT:    selp.f32 %f4, 0f7FC00000, %f3, %p1;
1335; CHECK-NOF16-NEXT:    setp.eq.s32 %p2, %r1, 0;
1336; CHECK-NOF16-NEXT:    selp.f32 %f5, %f1, %f4, %p2;
1337; CHECK-NOF16-NEXT:    mov.b32 %r2, %f2;
1338; CHECK-NOF16-NEXT:    setp.eq.s32 %p3, %r2, 0;
1339; CHECK-NOF16-NEXT:    selp.f32 %f6, %f2, %f5, %p3;
1340; CHECK-NOF16-NEXT:    setp.eq.f32 %p4, %f4, 0f00000000;
1341; CHECK-NOF16-NEXT:    selp.f32 %f7, %f6, %f4, %p4;
1342; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f7;
1343; CHECK-NOF16-NEXT:    ret;
1344;
1345; CHECK-F16-LABEL: maximum_float(
1346; CHECK-F16:       {
1347; CHECK-F16-NEXT:    .reg .f32 %f<4>;
1348; CHECK-F16-EMPTY:
1349; CHECK-F16-NEXT:  // %bb.0:
1350; CHECK-F16-NEXT:    ld.param.f32 %f1, [maximum_float_param_0];
1351; CHECK-F16-NEXT:    ld.param.f32 %f2, [maximum_float_param_1];
1352; CHECK-F16-NEXT:    max.NaN.f32 %f3, %f1, %f2;
1353; CHECK-F16-NEXT:    st.param.f32 [func_retval0], %f3;
1354; CHECK-F16-NEXT:    ret;
1355;
1356; CHECK-SM80-NOF16-LABEL: maximum_float(
1357; CHECK-SM80-NOF16:       {
1358; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<4>;
1359; CHECK-SM80-NOF16-EMPTY:
1360; CHECK-SM80-NOF16-NEXT:  // %bb.0:
1361; CHECK-SM80-NOF16-NEXT:    ld.param.f32 %f1, [maximum_float_param_0];
1362; CHECK-SM80-NOF16-NEXT:    ld.param.f32 %f2, [maximum_float_param_1];
1363; CHECK-SM80-NOF16-NEXT:    max.NaN.f32 %f3, %f1, %f2;
1364; CHECK-SM80-NOF16-NEXT:    st.param.f32 [func_retval0], %f3;
1365; CHECK-SM80-NOF16-NEXT:    ret;
1366  %x = call float @llvm.maximum.f32(float %a, float %b)
1367  ret float %x
1368}
1369
1370define float @maximum_float_ftz(float %a, float %b) #1 {
1371; CHECK-NOF16-LABEL: maximum_float_ftz(
1372; CHECK-NOF16:       {
1373; CHECK-NOF16-NEXT:    .reg .pred %p<5>;
1374; CHECK-NOF16-NEXT:    .reg .b32 %r<3>;
1375; CHECK-NOF16-NEXT:    .reg .f32 %f<8>;
1376; CHECK-NOF16-EMPTY:
1377; CHECK-NOF16-NEXT:  // %bb.0:
1378; CHECK-NOF16-NEXT:    ld.param.f32 %f1, [maximum_float_ftz_param_0];
1379; CHECK-NOF16-NEXT:    mov.b32 %r1, %f1;
1380; CHECK-NOF16-NEXT:    ld.param.f32 %f2, [maximum_float_ftz_param_1];
1381; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %f1, %f2;
1382; CHECK-NOF16-NEXT:    max.ftz.f32 %f3, %f1, %f2;
1383; CHECK-NOF16-NEXT:    selp.f32 %f4, 0f7FC00000, %f3, %p1;
1384; CHECK-NOF16-NEXT:    setp.eq.s32 %p2, %r1, 0;
1385; CHECK-NOF16-NEXT:    selp.f32 %f5, %f1, %f4, %p2;
1386; CHECK-NOF16-NEXT:    mov.b32 %r2, %f2;
1387; CHECK-NOF16-NEXT:    setp.eq.s32 %p3, %r2, 0;
1388; CHECK-NOF16-NEXT:    selp.f32 %f6, %f2, %f5, %p3;
1389; CHECK-NOF16-NEXT:    setp.eq.ftz.f32 %p4, %f4, 0f00000000;
1390; CHECK-NOF16-NEXT:    selp.f32 %f7, %f6, %f4, %p4;
1391; CHECK-NOF16-NEXT:    st.param.f32 [func_retval0], %f7;
1392; CHECK-NOF16-NEXT:    ret;
1393;
1394; CHECK-F16-LABEL: maximum_float_ftz(
1395; CHECK-F16:       {
1396; CHECK-F16-NEXT:    .reg .f32 %f<4>;
1397; CHECK-F16-EMPTY:
1398; CHECK-F16-NEXT:  // %bb.0:
1399; CHECK-F16-NEXT:    ld.param.f32 %f1, [maximum_float_ftz_param_0];
1400; CHECK-F16-NEXT:    ld.param.f32 %f2, [maximum_float_ftz_param_1];
1401; CHECK-F16-NEXT:    max.NaN.ftz.f32 %f3, %f1, %f2;
1402; CHECK-F16-NEXT:    st.param.f32 [func_retval0], %f3;
1403; CHECK-F16-NEXT:    ret;
1404;
1405; CHECK-SM80-NOF16-LABEL: maximum_float_ftz(
1406; CHECK-SM80-NOF16:       {
1407; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<4>;
1408; CHECK-SM80-NOF16-EMPTY:
1409; CHECK-SM80-NOF16-NEXT:  // %bb.0:
1410; CHECK-SM80-NOF16-NEXT:    ld.param.f32 %f1, [maximum_float_ftz_param_0];
1411; CHECK-SM80-NOF16-NEXT:    ld.param.f32 %f2, [maximum_float_ftz_param_1];
1412; CHECK-SM80-NOF16-NEXT:    max.NaN.ftz.f32 %f3, %f1, %f2;
1413; CHECK-SM80-NOF16-NEXT:    st.param.f32 [func_retval0], %f3;
1414; CHECK-SM80-NOF16-NEXT:    ret;
1415  %x = call float @llvm.maximum.f32(float %a, float %b)
1416  ret float %x
1417}
1418
1419define double @maximum_double(double %a, double %b) {
1420; CHECK-LABEL: maximum_double(
1421; CHECK:       {
1422; CHECK-NEXT:    .reg .pred %p<5>;
1423; CHECK-NEXT:    .reg .b64 %rd<3>;
1424; CHECK-NEXT:    .reg .f64 %fd<8>;
1425; CHECK-EMPTY:
1426; CHECK-NEXT:  // %bb.0:
1427; CHECK-NEXT:    ld.param.f64 %fd1, [maximum_double_param_0];
1428; CHECK-NEXT:    mov.b64 %rd1, %fd1;
1429; CHECK-NEXT:    ld.param.f64 %fd2, [maximum_double_param_1];
1430; CHECK-NEXT:    setp.nan.f64 %p1, %fd1, %fd2;
1431; CHECK-NEXT:    max.f64 %fd3, %fd1, %fd2;
1432; CHECK-NEXT:    selp.f64 %fd4, 0d7FF8000000000000, %fd3, %p1;
1433; CHECK-NEXT:    setp.eq.s64 %p2, %rd1, 0;
1434; CHECK-NEXT:    selp.f64 %fd5, %fd1, %fd4, %p2;
1435; CHECK-NEXT:    mov.b64 %rd2, %fd2;
1436; CHECK-NEXT:    setp.eq.s64 %p3, %rd2, 0;
1437; CHECK-NEXT:    selp.f64 %fd6, %fd2, %fd5, %p3;
1438; CHECK-NEXT:    setp.eq.f64 %p4, %fd4, 0d0000000000000000;
1439; CHECK-NEXT:    selp.f64 %fd7, %fd6, %fd4, %p4;
1440; CHECK-NEXT:    st.param.f64 [func_retval0], %fd7;
1441; CHECK-NEXT:    ret;
1442  %x = call double @llvm.maximum.f64(double %a, double %b)
1443  ret double %x
1444}
1445
1446define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) {
1447; CHECK-NOF16-LABEL: maximum_v2half(
1448; CHECK-NOF16:       {
1449; CHECK-NOF16-NEXT:    .reg .pred %p<11>;
1450; CHECK-NOF16-NEXT:    .reg .b16 %rs<15>;
1451; CHECK-NOF16-NEXT:    .reg .b32 %r<4>;
1452; CHECK-NOF16-NEXT:    .reg .f32 %f<7>;
1453; CHECK-NOF16-EMPTY:
1454; CHECK-NOF16-NEXT:  // %bb.0:
1455; CHECK-NOF16-NEXT:    ld.param.b32 %r1, [maximum_v2half_param_0];
1456; CHECK-NOF16-NEXT:    ld.param.b32 %r2, [maximum_v2half_param_1];
1457; CHECK-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
1458; CHECK-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1459; CHECK-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
1460; CHECK-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
1461; CHECK-NOF16-NEXT:    setp.gt.f32 %p1, %f2, %f1;
1462; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs4, %rs2, %p1;
1463; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %f2, %f1;
1464; CHECK-NOF16-NEXT:    selp.b16 %rs6, 0x7E00, %rs5, %p2;
1465; CHECK-NOF16-NEXT:    setp.eq.s16 %p3, %rs4, 0;
1466; CHECK-NOF16-NEXT:    selp.b16 %rs7, %rs4, %rs6, %p3;
1467; CHECK-NOF16-NEXT:    setp.eq.s16 %p4, %rs2, 0;
1468; CHECK-NOF16-NEXT:    selp.b16 %rs8, %rs2, %rs7, %p4;
1469; CHECK-NOF16-NEXT:    cvt.f32.f16 %f3, %rs6;
1470; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %f3, 0f00000000;
1471; CHECK-NOF16-NEXT:    selp.b16 %rs9, %rs8, %rs6, %p5;
1472; CHECK-NOF16-NEXT:    cvt.f32.f16 %f4, %rs1;
1473; CHECK-NOF16-NEXT:    cvt.f32.f16 %f5, %rs3;
1474; CHECK-NOF16-NEXT:    setp.gt.f32 %p6, %f5, %f4;
1475; CHECK-NOF16-NEXT:    selp.b16 %rs10, %rs3, %rs1, %p6;
1476; CHECK-NOF16-NEXT:    setp.nan.f32 %p7, %f5, %f4;
1477; CHECK-NOF16-NEXT:    selp.b16 %rs11, 0x7E00, %rs10, %p7;
1478; CHECK-NOF16-NEXT:    setp.eq.s16 %p8, %rs3, 0;
1479; CHECK-NOF16-NEXT:    selp.b16 %rs12, %rs3, %rs11, %p8;
1480; CHECK-NOF16-NEXT:    setp.eq.s16 %p9, %rs1, 0;
1481; CHECK-NOF16-NEXT:    selp.b16 %rs13, %rs1, %rs12, %p9;
1482; CHECK-NOF16-NEXT:    cvt.f32.f16 %f6, %rs11;
1483; CHECK-NOF16-NEXT:    setp.eq.f32 %p10, %f6, 0f00000000;
1484; CHECK-NOF16-NEXT:    selp.b16 %rs14, %rs13, %rs11, %p10;
1485; CHECK-NOF16-NEXT:    mov.b32 %r3, {%rs14, %rs9};
1486; CHECK-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
1487; CHECK-NOF16-NEXT:    ret;
1488;
1489; CHECK-F16-LABEL: maximum_v2half(
1490; CHECK-F16:       {
1491; CHECK-F16-NEXT:    .reg .b32 %r<4>;
1492; CHECK-F16-EMPTY:
1493; CHECK-F16-NEXT:  // %bb.0:
1494; CHECK-F16-NEXT:    ld.param.b32 %r1, [maximum_v2half_param_1];
1495; CHECK-F16-NEXT:    ld.param.b32 %r2, [maximum_v2half_param_0];
1496; CHECK-F16-NEXT:    max.NaN.f16x2 %r3, %r2, %r1;
1497; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r3;
1498; CHECK-F16-NEXT:    ret;
1499;
1500; CHECK-SM80-NOF16-LABEL: maximum_v2half(
1501; CHECK-SM80-NOF16:       {
1502; CHECK-SM80-NOF16-NEXT:    .reg .pred %p<11>;
1503; CHECK-SM80-NOF16-NEXT:    .reg .b16 %rs<15>;
1504; CHECK-SM80-NOF16-NEXT:    .reg .b32 %r<4>;
1505; CHECK-SM80-NOF16-NEXT:    .reg .f32 %f<7>;
1506; CHECK-SM80-NOF16-EMPTY:
1507; CHECK-SM80-NOF16-NEXT:  // %bb.0:
1508; CHECK-SM80-NOF16-NEXT:    ld.param.b32 %r1, [maximum_v2half_param_0];
1509; CHECK-SM80-NOF16-NEXT:    ld.param.b32 %r2, [maximum_v2half_param_1];
1510; CHECK-SM80-NOF16-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
1511; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f1, %rs2;
1512; CHECK-SM80-NOF16-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
1513; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f2, %rs4;
1514; CHECK-SM80-NOF16-NEXT:    setp.gt.f32 %p1, %f2, %f1;
1515; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs4, %rs2, %p1;
1516; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p2, %f2, %f1;
1517; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, 0x7E00, %rs5, %p2;
1518; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p3, %rs4, 0;
1519; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs7, %rs4, %rs6, %p3;
1520; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p4, %rs2, 0;
1521; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs8, %rs2, %rs7, %p4;
1522; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f3, %rs6;
1523; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %f3, 0f00000000;
1524; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs9, %rs8, %rs6, %p5;
1525; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f4, %rs1;
1526; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f5, %rs3;
1527; CHECK-SM80-NOF16-NEXT:    setp.gt.f32 %p6, %f5, %f4;
1528; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs10, %rs3, %rs1, %p6;
1529; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p7, %f5, %f4;
1530; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs11, 0x7E00, %rs10, %p7;
1531; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p8, %rs3, 0;
1532; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs12, %rs3, %rs11, %p8;
1533; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p9, %rs1, 0;
1534; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs13, %rs1, %rs12, %p9;
1535; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %f6, %rs11;
1536; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p10, %f6, 0f00000000;
1537; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs14, %rs13, %rs11, %p10;
1538; CHECK-SM80-NOF16-NEXT:    mov.b32 %r3, {%rs14, %rs9};
1539; CHECK-SM80-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
1540; CHECK-SM80-NOF16-NEXT:    ret;
1541  %x = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b)
1542  ret <2 x half> %x
1543}
1544
1545; ---- fma ----
1546
1547define float @fma_float(float %a, float %b, float %c) {
1548; CHECK-LABEL: fma_float(
1549; CHECK:       {
1550; CHECK-NEXT:    .reg .f32 %f<5>;
1551; CHECK-EMPTY:
1552; CHECK-NEXT:  // %bb.0:
1553; CHECK-NEXT:    ld.param.f32 %f1, [fma_float_param_0];
1554; CHECK-NEXT:    ld.param.f32 %f2, [fma_float_param_1];
1555; CHECK-NEXT:    ld.param.f32 %f3, [fma_float_param_2];
1556; CHECK-NEXT:    fma.rn.f32 %f4, %f1, %f2, %f3;
1557; CHECK-NEXT:    st.param.f32 [func_retval0], %f4;
1558; CHECK-NEXT:    ret;
1559  %x = call float @llvm.fma.f32(float %a, float %b, float %c)
1560  ret float %x
1561}
1562
1563define float @fma_float_ftz(float %a, float %b, float %c) #1 {
1564; CHECK-LABEL: fma_float_ftz(
1565; CHECK:       {
1566; CHECK-NEXT:    .reg .f32 %f<5>;
1567; CHECK-EMPTY:
1568; CHECK-NEXT:  // %bb.0:
1569; CHECK-NEXT:    ld.param.f32 %f1, [fma_float_ftz_param_0];
1570; CHECK-NEXT:    ld.param.f32 %f2, [fma_float_ftz_param_1];
1571; CHECK-NEXT:    ld.param.f32 %f3, [fma_float_ftz_param_2];
1572; CHECK-NEXT:    fma.rn.ftz.f32 %f4, %f1, %f2, %f3;
1573; CHECK-NEXT:    st.param.f32 [func_retval0], %f4;
1574; CHECK-NEXT:    ret;
1575  %x = call float @llvm.fma.f32(float %a, float %b, float %c)
1576  ret float %x
1577}
1578
1579define double @fma_double(double %a, double %b, double %c) {
1580; CHECK-LABEL: fma_double(
1581; CHECK:       {
1582; CHECK-NEXT:    .reg .f64 %fd<5>;
1583; CHECK-EMPTY:
1584; CHECK-NEXT:  // %bb.0:
1585; CHECK-NEXT:    ld.param.f64 %fd1, [fma_double_param_0];
1586; CHECK-NEXT:    ld.param.f64 %fd2, [fma_double_param_1];
1587; CHECK-NEXT:    ld.param.f64 %fd3, [fma_double_param_2];
1588; CHECK-NEXT:    fma.rn.f64 %fd4, %fd1, %fd2, %fd3;
1589; CHECK-NEXT:    st.param.f64 [func_retval0], %fd4;
1590; CHECK-NEXT:    ret;
1591  %x = call double @llvm.fma.f64(double %a, double %b, double %c)
1592  ret double %x
1593}
1594
1595attributes #0 = { nounwind readnone }
1596attributes #1 = { "denormal-fp-math-f32" = "preserve-sign" }
1597