xref: /llvm-project/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll (revision 4c1b1f6d219d66feb4795fdbe80e8b380b53eade)
1; Check that nvvm intrinsics get simplified to target-generic intrinsics where
2; possible.
3;
4; We run this test twice; once with ftz on, and again with ftz off.  Behold the
5; hackery:
6
7; RUN: cat %s > %t.ftz
8; RUN: echo 'attributes #0 = { "denormal-fp-math-f32" = "preserve-sign" }' >> %t.ftz
9; RUN: opt < %t.ftz -passes=instcombine -mtriple=nvptx64-nvidia-cuda -S | FileCheck %s --check-prefix=CHECK --check-prefix=FTZ
10
11; RUN: cat %s > %t.noftz
12; RUN: echo 'attributes #0 = { "denormal-fp-math-f32" = "ieee" }' >> %t.noftz
13; RUN: opt < %t.noftz -passes=instcombine -mtriple=nvptx64-nvidia-cuda -S | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ
14
15; We handle nvvm intrinsics with ftz variants as follows:
16;  - If the module is in ftz mode, the ftz variant is transformed into the
17;    regular llvm intrinsic, and the non-ftz variant is left alone.
18;  - If the module is not in ftz mode, it's the reverse: Only the non-ftz
19;    variant is transformed, and the ftz variant is left alone.
20
21; Check NVVM intrinsics that map directly to LLVM target-generic intrinsics.
22
23; CHECK-LABEL: @ceil_double
24define double @ceil_double(double %a) #0 {
25; CHECK: call double @llvm.ceil.f64
26  %ret = call double @llvm.nvvm.ceil.d(double %a)
27  ret double %ret
28}
29; CHECK-LABEL: @ceil_float
30define float @ceil_float(float %a) #0 {
31; NOFTZ: call float @llvm.ceil.f32
32; FTZ: call float @llvm.nvvm.ceil.f
33  %ret = call float @llvm.nvvm.ceil.f(float %a)
34  ret float %ret
35}
36; CHECK-LABEL: @ceil_float_ftz
37define float @ceil_float_ftz(float %a) #0 {
38; NOFTZ: call float @llvm.nvvm.ceil.ftz.f
39; FTZ: call float @llvm.ceil.f32
40  %ret = call float @llvm.nvvm.ceil.ftz.f(float %a)
41  ret float %ret
42}
43
44; CHECK-LABEL: @fabs_double
45define double @fabs_double(double %a) #0 {
46; CHECK: call double @llvm.fabs.f64
47  %ret = call double @llvm.nvvm.fabs.d(double %a)
48  ret double %ret
49}
50; CHECK-LABEL: @fabs_float
51define float @fabs_float(float %a) #0 {
52; CHECK: call float @llvm.nvvm.fabs.f
53  %ret = call float @llvm.nvvm.fabs.f(float %a)
54  ret float %ret
55}
56; CHECK-LABEL: @fabs_float_ftz
57define float @fabs_float_ftz(float %a) #0 {
58; CHECK: call float @llvm.nvvm.fabs.ftz.f
59  %ret = call float @llvm.nvvm.fabs.ftz.f(float %a)
60  ret float %ret
61}
62
63; CHECK-LABEL: @floor_double
64define double @floor_double(double %a) #0 {
65; CHECK: call double @llvm.floor.f64
66  %ret = call double @llvm.nvvm.floor.d(double %a)
67  ret double %ret
68}
69; CHECK-LABEL: @floor_float
70define float @floor_float(float %a) #0 {
71; NOFTZ: call float @llvm.floor.f32
72; FTZ: call float @llvm.nvvm.floor.f
73  %ret = call float @llvm.nvvm.floor.f(float %a)
74  ret float %ret
75}
76; CHECK-LABEL: @floor_float_ftz
77define float @floor_float_ftz(float %a) #0 {
78; NOFTZ: call float @llvm.nvvm.floor.ftz.f
79; FTZ: call float @llvm.floor.f32
80  %ret = call float @llvm.nvvm.floor.ftz.f(float %a)
81  ret float %ret
82}
83
84; CHECK-LABEL: @fma_double
85define double @fma_double(double %a, double %b, double %c) #0 {
86; CHECK: call double @llvm.fma.f64
87  %ret = call double @llvm.nvvm.fma.rn.d(double %a, double %b, double %c)
88  ret double %ret
89}
90; CHECK-LABEL: @fma_float
91define float @fma_float(float %a, float %b, float %c) #0 {
92; NOFTZ: call float @llvm.fma.f32
93; FTZ: call float @llvm.nvvm.fma.rn.f
94  %ret = call float @llvm.nvvm.fma.rn.f(float %a, float %b, float %c)
95  ret float %ret
96}
97; CHECK-LABEL: @fma_float_ftz
98define float @fma_float_ftz(float %a, float %b, float %c) #0 {
99; NOFTZ: call float @llvm.nvvm.fma.rn.ftz.f
100; FTZ: call float @llvm.fma.f32
101  %ret = call float @llvm.nvvm.fma.rn.ftz.f(float %a, float %b, float %c)
102  ret float %ret
103}
104
105; CHECK-LABEL: @fmax_double
106define double @fmax_double(double %a, double %b) #0 {
107; CHECK: call double @llvm.maxnum.f64
108  %ret = call double @llvm.nvvm.fmax.d(double %a, double %b)
109  ret double %ret
110}
111; CHECK-LABEL: @fmax_float
112define float @fmax_float(float %a, float %b) #0 {
113; NOFTZ: call float @llvm.maxnum.f32
114; FTZ: call float @llvm.nvvm.fmax.f
115  %ret = call float @llvm.nvvm.fmax.f(float %a, float %b)
116  ret float %ret
117}
118; CHECK-LABEL: @fmax_float_ftz
119define float @fmax_float_ftz(float %a, float %b) #0 {
120; NOFTZ: call float @llvm.nvvm.fmax.ftz.f
121; FTZ: call float @llvm.maxnum.f32
122  %ret = call float @llvm.nvvm.fmax.ftz.f(float %a, float %b)
123  ret float %ret
124}
125
126; CHECK-LABEL: @fmin_double
127define double @fmin_double(double %a, double %b) #0 {
128; CHECK: call double @llvm.minnum.f64
129  %ret = call double @llvm.nvvm.fmin.d(double %a, double %b)
130  ret double %ret
131}
132; CHECK-LABEL: @fmin_float
133define float @fmin_float(float %a, float %b) #0 {
134; NOFTZ: call float @llvm.minnum.f32
135; FTZ: call float @llvm.nvvm.fmin.f
136  %ret = call float @llvm.nvvm.fmin.f(float %a, float %b)
137  ret float %ret
138}
139; CHECK-LABEL: @fmin_float_ftz
140define float @fmin_float_ftz(float %a, float %b) #0 {
141; NOFTZ: call float @llvm.nvvm.fmin.ftz.f
142; FTZ: call float @llvm.minnum.f32
143  %ret = call float @llvm.nvvm.fmin.ftz.f(float %a, float %b)
144  ret float %ret
145}
146
147; CHECK-LABEL: @round_double
148define double @round_double(double %a) #0 {
149; CHECK: call double @llvm.nvvm.round.d
150  %ret = call double @llvm.nvvm.round.d(double %a)
151  ret double %ret
152}
153; CHECK-LABEL: @round_float
154define float @round_float(float %a) #0 {
155; CHECK: call float @llvm.nvvm.round.f
156  %ret = call float @llvm.nvvm.round.f(float %a)
157  ret float %ret
158}
159; CHECK-LABEL: @round_float_ftz
160define float @round_float_ftz(float %a) #0 {
161; CHECK: call float @llvm.nvvm.round.ftz.f
162  %ret = call float @llvm.nvvm.round.ftz.f(float %a)
163  ret float %ret
164}
165
166; CHECK-LABEL: @trunc_double
167define double @trunc_double(double %a) #0 {
168; CHECK: call double @llvm.trunc.f64
169  %ret = call double @llvm.nvvm.trunc.d(double %a)
170  ret double %ret
171}
172; CHECK-LABEL: @trunc_float
173define float @trunc_float(float %a) #0 {
174; NOFTZ: call float @llvm.trunc.f32
175; FTZ: call float @llvm.nvvm.trunc.f
176  %ret = call float @llvm.nvvm.trunc.f(float %a)
177  ret float %ret
178}
179; CHECK-LABEL: @trunc_float_ftz
180define float @trunc_float_ftz(float %a) #0 {
181; NOFTZ: call float @llvm.nvvm.trunc.ftz.f
182; FTZ: call float @llvm.trunc.f32
183  %ret = call float @llvm.nvvm.trunc.ftz.f(float %a)
184  ret float %ret
185}
186
187; Check NVVM intrinsics that correspond to LLVM cast operations.
188
189; CHECK-LABEL: @test_d2i
190define i32 @test_d2i(double %a) #0 {
191; CHECK: fptosi double %a to i32
192  %ret = call i32 @llvm.nvvm.d2i.rz(double %a)
193  ret i32 %ret
194}
195; CHECK-LABEL: @test_f2i
196define i32 @test_f2i(float %a) #0 {
197; CHECK: fptosi float %a to i32
198  %ret = call i32 @llvm.nvvm.f2i.rz(float %a)
199  ret i32 %ret
200}
201; CHECK-LABEL: @test_d2ll
202define i64 @test_d2ll(double %a) #0 {
203; CHECK: fptosi double %a to i64
204  %ret = call i64 @llvm.nvvm.d2ll.rz(double %a)
205  ret i64 %ret
206}
207; CHECK-LABEL: @test_f2ll
208define i64 @test_f2ll(float %a) #0 {
209; CHECK: fptosi float %a to i64
210  %ret = call i64 @llvm.nvvm.f2ll.rz(float %a)
211  ret i64 %ret
212}
213; CHECK-LABEL: @test_d2ui
214define i32 @test_d2ui(double %a) #0 {
215; CHECK: fptoui double %a to i32
216  %ret = call i32 @llvm.nvvm.d2ui.rz(double %a)
217  ret i32 %ret
218}
219; CHECK-LABEL: @test_f2ui
220define i32 @test_f2ui(float %a) #0 {
221; CHECK: fptoui float %a to i32
222  %ret = call i32 @llvm.nvvm.f2ui.rz(float %a)
223  ret i32 %ret
224}
225; CHECK-LABEL: @test_d2ull
226define i64 @test_d2ull(double %a) #0 {
227; CHECK: fptoui double %a to i64
228  %ret = call i64 @llvm.nvvm.d2ull.rz(double %a)
229  ret i64 %ret
230}
231; CHECK-LABEL: @test_f2ull
232define i64 @test_f2ull(float %a) #0 {
233; CHECK: fptoui float %a to i64
234  %ret = call i64 @llvm.nvvm.f2ull.rz(float %a)
235  ret i64 %ret
236}
237
238; CHECK-LABEL: @test_i2d
239define double @test_i2d(i32 %a) #0 {
240; CHECK: sitofp i32 %a to double
241  %ret = call double @llvm.nvvm.i2d.rn(i32 %a)
242  ret double %ret
243}
244; CHECK-LABEL: @test_i2f
245define float @test_i2f(i32 %a) #0 {
246; CHECK: sitofp i32 %a to float
247  %ret = call float @llvm.nvvm.i2f.rn(i32 %a)
248  ret float %ret
249}
250; CHECK-LABEL: @test_ll2d
251define double @test_ll2d(i64 %a) #0 {
252; CHECK: sitofp i64 %a to double
253  %ret = call double @llvm.nvvm.ll2d.rn(i64 %a)
254  ret double %ret
255}
256; CHECK-LABEL: @test_ll2f
257define float @test_ll2f(i64 %a) #0 {
258; CHECK: sitofp i64 %a to float
259  %ret = call float @llvm.nvvm.ll2f.rn(i64 %a)
260  ret float %ret
261}
262; CHECK-LABEL: @test_ui2d
263define double @test_ui2d(i32 %a) #0 {
264; CHECK: uitofp i32 %a to double
265  %ret = call double @llvm.nvvm.ui2d.rn(i32 %a)
266  ret double %ret
267}
268; CHECK-LABEL: @test_ui2f
269define float @test_ui2f(i32 %a) #0 {
270; CHECK: uitofp i32 %a to float
271  %ret = call float @llvm.nvvm.ui2f.rn(i32 %a)
272  ret float %ret
273}
274; CHECK-LABEL: @test_ull2d
275define double @test_ull2d(i64 %a) #0 {
276; CHECK: uitofp i64 %a to double
277  %ret = call double @llvm.nvvm.ull2d.rn(i64 %a)
278  ret double %ret
279}
280; CHECK-LABEL: @test_ull2f
281define float @test_ull2f(i64 %a) #0 {
282; CHECK: uitofp i64 %a to float
283  %ret = call float @llvm.nvvm.ull2f.rn(i64 %a)
284  ret float %ret
285}
286
287; Check NVVM intrinsics that map to LLVM binary operations.
288
289; CHECK-LABEL: @test_add_rn_d
290define double @test_add_rn_d(double %a, double %b) #0 {
291; CHECK: call double @llvm.nvvm.add.rn.d
292  %ret = call double @llvm.nvvm.add.rn.d(double %a, double %b)
293  ret double %ret
294}
295; CHECK-LABEL: @test_add_rn_f
296define float @test_add_rn_f(float %a, float %b) #0 {
297; CHECK: call float @llvm.nvvm.add.rn.f
298  %ret = call float @llvm.nvvm.add.rn.f(float %a, float %b)
299  ret float %ret
300}
301; CHECK-LABEL: @test_add_rn_f_ftz
302define float @test_add_rn_f_ftz(float %a, float %b) #0 {
303; CHECK: call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
304  %ret = call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
305  ret float %ret
306}
307
308; CHECK-LABEL: @test_mul_rn_d
309define double @test_mul_rn_d(double %a, double %b) #0 {
310; CHECK: call double @llvm.nvvm.mul.rn.d
311  %ret = call double @llvm.nvvm.mul.rn.d(double %a, double %b)
312  ret double %ret
313}
314; CHECK-LABEL: @test_mul_rn_f
315define float @test_mul_rn_f(float %a, float %b) #0 {
316; CHECK: call float @llvm.nvvm.mul.rn.f
317  %ret = call float @llvm.nvvm.mul.rn.f(float %a, float %b)
318  ret float %ret
319}
320; CHECK-LABEL: @test_mul_rn_f_ftz
321define float @test_mul_rn_f_ftz(float %a, float %b) #0 {
322; CHECK: call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
323  %ret = call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
324  ret float %ret
325}
326
327; CHECK-LABEL: @test_div_rn_d
328define double @test_div_rn_d(double %a, double %b) #0 {
329; CHECK: fdiv
330  %ret = call double @llvm.nvvm.div.rn.d(double %a, double %b)
331  ret double %ret
332}
333; CHECK-LABEL: @test_div_rn_f
334define float @test_div_rn_f(float %a, float %b) #0 {
335; CHECK: call float @llvm.nvvm.div.rn.f
336  %ret = call float @llvm.nvvm.div.rn.f(float %a, float %b)
337  ret float %ret
338}
339; CHECK-LABEL: @test_div_rn_f_ftz
340define float @test_div_rn_f_ftz(float %a, float %b) #0 {
341; CHECK: call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
342  %ret = call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
343  ret float %ret
344}
345
346; Check NVVM intrinsics that require us to emit custom IR.
347
348; CHECK-LABEL: @test_rcp_rn_f
349define float @test_rcp_rn_f(float %a) #0 {
350; CHECK: call float @llvm.nvvm.rcp.rn.f
351  %ret = call float @llvm.nvvm.rcp.rn.f(float %a)
352  ret float %ret
353}
354; CHECK-LABEL: @test_rcp_rn_f_ftz
355define float @test_rcp_rn_f_ftz(float %a) #0 {
356; CHECK: call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
357  %ret = call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
358  ret float %ret
359}
360
361; CHECK-LABEL: @test_sqrt_rn_d
362define double @test_sqrt_rn_d(double %a) #0 {
363; CHECK: call double @llvm.sqrt.f64(double %a)
364  %ret = call double @llvm.nvvm.sqrt.rn.d(double %a)
365  ret double %ret
366}
367; nvvm.sqrt.f is a special case: It goes to a llvm.sqrt.f
368; CHECK-LABEL: @test_sqrt_f
369define float @test_sqrt_f(float %a) #0 {
370; CHECK: call float @llvm.sqrt.f32(float %a)
371  %ret = call float @llvm.nvvm.sqrt.f(float %a)
372  ret float %ret
373}
374; CHECK-LABEL: @test_sqrt_rn_f
375define float @test_sqrt_rn_f(float %a) #0 {
376; CHECK: call float @llvm.nvvm.sqrt.rn.f
377  %ret = call float @llvm.nvvm.sqrt.rn.f(float %a)
378  ret float %ret
379}
380; CHECK-LABEL: @test_sqrt_rn_f_ftz
381define float @test_sqrt_rn_f_ftz(float %a) #0 {
382; CHECK: call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
383  %ret = call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
384  ret float %ret
385}
386
387; CHECK-LABEL: @test_fshl_clamp_1
388define i32 @test_fshl_clamp_1(i32 %a, i32 %b) {
389; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 3)
390  %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 3)
391  ret i32 %call
392}
393
394; CHECK-LABEL: @test_fshl_clamp_2
395define i32 @test_fshl_clamp_2(i32 %a, i32 %b) {
396; CHECK: ret i32 %b
397  %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 300)
398  ret i32 %call
399}
400
401; CHECK-LABEL: @test_fshl_clamp_3
402define i32 @test_fshl_clamp_3(i32 %a, i32 %b, i32 %c) {
403; CHECK: call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 %c)
404  %call = call i32 @llvm.nvvm.fshl.clamp.i32(i32 %a, i32 %b, i32 %c)
405  ret i32 %call
406}
407
408; CHECK-LABEL: @test_fshr_clamp_1
409define i32 @test_fshr_clamp_1(i32 %a, i32 %b) {
410; CHECK: call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 29)
411  %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 3)
412  ret i32 %call
413}
414
415; CHECK-LABEL: @test_fshr_clamp_2
416define i32 @test_fshr_clamp_2(i32 %a, i32 %b) {
417; CHECK: ret i32 %a
418  %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 300)
419  ret i32 %call
420}
421
422; CHECK-LABEL: @test_fshr_clamp_3
423define i32 @test_fshr_clamp_3(i32 %a, i32 %b, i32 %c) {
424; CHECK: call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 %c)
425  %call = call i32 @llvm.nvvm.fshr.clamp.i32(i32 %a, i32 %b, i32 %c)
426  ret i32 %call
427}
428
429declare double @llvm.nvvm.add.rn.d(double, double)
430declare float @llvm.nvvm.add.rn.f(float, float)
431declare float @llvm.nvvm.add.rn.ftz.f(float, float)
432declare double @llvm.nvvm.ceil.d(double)
433declare float @llvm.nvvm.ceil.f(float)
434declare float @llvm.nvvm.ceil.ftz.f(float)
435declare float @llvm.nvvm.d2f.rm(double)
436declare float @llvm.nvvm.d2f.rm.ftz(double)
437declare float @llvm.nvvm.d2f.rp(double)
438declare float @llvm.nvvm.d2f.rp.ftz(double)
439declare float @llvm.nvvm.d2f.rz(double)
440declare float @llvm.nvvm.d2f.rz.ftz(double)
441declare i32 @llvm.nvvm.d2i.rz(double)
442declare i64 @llvm.nvvm.d2ll.rz(double)
443declare i32 @llvm.nvvm.d2ui.rz(double)
444declare i64 @llvm.nvvm.d2ull.rz(double)
445declare double @llvm.nvvm.div.rn.d(double, double)
446declare float @llvm.nvvm.div.rn.f(float, float)
447declare float @llvm.nvvm.div.rn.ftz.f(float, float)
448declare i16 @llvm.nvvm.f2h.rz(float)
449declare i16 @llvm.nvvm.f2h.rz.ftz(float)
450declare i32 @llvm.nvvm.f2i.rz(float)
451declare i32 @llvm.nvvm.f2i.rz.ftz(float)
452declare i64 @llvm.nvvm.f2ll.rz(float)
453declare i64 @llvm.nvvm.f2ll.rz.ftz(float)
454declare i32 @llvm.nvvm.f2ui.rz(float)
455declare i32 @llvm.nvvm.f2ui.rz.ftz(float)
456declare i64 @llvm.nvvm.f2ull.rz(float)
457declare i64 @llvm.nvvm.f2ull.rz.ftz(float)
458declare double @llvm.nvvm.fabs.d(double)
459declare float @llvm.nvvm.fabs.f(float)
460declare float @llvm.nvvm.fabs.ftz.f(float)
461declare double @llvm.nvvm.floor.d(double)
462declare float @llvm.nvvm.floor.f(float)
463declare float @llvm.nvvm.floor.ftz.f(float)
464declare double @llvm.nvvm.fma.rn.d(double, double, double)
465declare float @llvm.nvvm.fma.rn.f(float, float, float)
466declare float @llvm.nvvm.fma.rn.ftz.f(float, float, float)
467declare double @llvm.nvvm.fmax.d(double, double)
468declare float @llvm.nvvm.fmax.f(float, float)
469declare float @llvm.nvvm.fmax.ftz.f(float, float)
470declare double @llvm.nvvm.fmin.d(double, double)
471declare float @llvm.nvvm.fmin.f(float, float)
472declare float @llvm.nvvm.fmin.ftz.f(float, float)
473declare double @llvm.nvvm.i2d.rn(i32)
474declare float @llvm.nvvm.i2f.rn(i32)
475declare double @llvm.nvvm.ll2d.rn(i64)
476declare float @llvm.nvvm.ll2f.rn(i64)
477declare double @llvm.nvvm.lohi.i2d(i32, i32)
478declare double @llvm.nvvm.mul.rn.d(double, double)
479declare float @llvm.nvvm.mul.rn.f(float, float)
480declare float @llvm.nvvm.mul.rn.ftz.f(float, float)
481declare double @llvm.nvvm.rcp.rm.d(double)
482declare double @llvm.nvvm.rcp.rn.d(double)
483declare float @llvm.nvvm.rcp.rn.f(float)
484declare float @llvm.nvvm.rcp.rn.ftz.f(float)
485declare double @llvm.nvvm.round.d(double)
486declare float @llvm.nvvm.round.f(float)
487declare float @llvm.nvvm.round.ftz.f(float)
488declare float @llvm.nvvm.sqrt.f(float)
489declare double @llvm.nvvm.sqrt.rn.d(double)
490declare float @llvm.nvvm.sqrt.rn.f(float)
491declare float @llvm.nvvm.sqrt.rn.ftz.f(float)
492declare double @llvm.nvvm.trunc.d(double)
493declare float @llvm.nvvm.trunc.f(float)
494declare float @llvm.nvvm.trunc.ftz.f(float)
495declare double @llvm.nvvm.ui2d.rn(i32)
496declare float @llvm.nvvm.ui2f.rn(i32)
497declare double @llvm.nvvm.ull2d.rn(i64)
498declare float @llvm.nvvm.ull2f.rn(i64)
499declare i32 @llvm.nvvm.fshr.clamp.i32(i32, i32, i32)
500declare i32 @llvm.nvvm.fshl.clamp.i32(i32, i32, i32)