xref: /llvm-project/llvm/test/CodeGen/NVPTX/divrem-combine.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; RUN: llc -O2 < %s -mtriple=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix=O2 --check-prefix=CHECK
2; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix=O0 --check-prefix=CHECK
3; RUN: %if ptxas %{ llc -O2 < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
4; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
5
6; The following IR
7;
8;   quot = n / d
9;   rem  = n % d
10;
11; should be transformed into
12;
13;   quot = n / d
14;   rem = n - (n / d) * d
15;
16; during NVPTX isel, at -O2.  At -O0, we should leave it alone.
17
18; CHECK-LABEL: sdiv32(
19define void @sdiv32(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
20  ; CHECK: div.s32 [[quot:%r[0-9]+]], [[num:%r[0-9]+]], [[den:%r[0-9]+]];
21  %quot = sdiv i32 %n, %d
22
23  ; O0: rem.s32
24  ; (This is unfortunately order-sensitive, even though mul is commutative.)
25  ; O2: mul.lo.s32 [[mul:%r[0-9]+]], [[quot]], [[den]];
26  ; O2: sub.s32 [[rem:%r[0-9]+]], [[num]], [[mul]]
27  %rem = srem i32 %n, %d
28
29  ; O2: st{{.*}}[[quot]]
30  store i32 %quot, ptr %quot_ret
31  ; O2: st{{.*}}[[rem]]
32  store i32 %rem, ptr %rem_ret
33  ret void
34}
35
36; CHECK-LABEL: udiv32(
37define void @udiv32(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
38  ; CHECK: div.u32 [[quot:%r[0-9]+]], [[num:%r[0-9]+]], [[den:%r[0-9]+]];
39  %quot = udiv i32 %n, %d
40
41  ; O0: rem.u32
42
43  ; Selection DAG doesn't know whether this is signed or unsigned
44  ; multiplication and subtraction, but it doesn't make a difference either
45  ; way.
46  ; O2: mul.lo.{{u|s}}32 [[mul:%r[0-9]+]], [[quot]], [[den]];
47  ; O2: sub.{{u|s}}32 [[rem:%r[0-9]+]], [[num]], [[mul]]
48  %rem = urem i32 %n, %d
49
50  ; O2: st{{.*}}[[quot]]
51  store i32 %quot, ptr %quot_ret
52  ; O2: st{{.*}}[[rem]]
53  store i32 %rem, ptr %rem_ret
54  ret void
55}
56
57; Check that we don't perform this optimization if one operation is signed and
58; the other isn't.
59; CHECK-LABEL: mismatched_types1(
60define void @mismatched_types1(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
61  ; CHECK: div.u32
62  ; CHECK: rem.s32
63  %quot = udiv i32 %n, %d
64  %rem = srem i32 %n, %d
65  store i32 %quot, ptr %quot_ret
66  store i32 %rem, ptr %rem_ret
67  ret void
68}
69
70; CHECK-LABEL: mismatched_types2(
71define void @mismatched_types2(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
72  ; CHECK: div.s32
73  ; CHECK: rem.u32
74  %quot = sdiv i32 %n, %d
75  %rem = urem i32 %n, %d
76  store i32 %quot, ptr %quot_ret
77  store i32 %rem, ptr %rem_ret
78  ret void
79}
80
81; Check that we don't perform this optimization if the inputs to the div don't
82; match the inputs to the rem.
83; CHECK-LABEL: mismatched_inputs1(
84define void @mismatched_inputs1(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) {
85  ; CHECK: div.s32
86  ; CHECK: rem.s32
87  %quot = sdiv i32 %n, %d
88  %rem = srem i32 %d, %n
89  store i32 %quot, ptr %quot_ret
90  store i32 %rem, ptr %rem_ret
91  ret void
92}
93
94; CHECK-LABEL: mismatched_inputs2(
95define void @mismatched_inputs2(i32 %n1, i32 %n2, i32 %d, ptr %quot_ret, ptr %rem_ret) {
96  ; CHECK: div.s32
97  ; CHECK: rem.s32
98  %quot = sdiv i32 %n1, %d
99  %rem = srem i32 %n2, %d
100  store i32 %quot, ptr %quot_ret
101  store i32 %rem, ptr %rem_ret
102  ret void
103}
104
105; CHECK-LABEL: mismatched_inputs3(
106define void @mismatched_inputs3(i32 %n, i32 %d1, i32 %d2, ptr %quot_ret, ptr %rem_ret) {
107  ; CHECK: div.s32
108  ; CHECK: rem.s32
109  %quot = sdiv i32 %n, %d1
110  %rem = srem i32 %n, %d2
111  store i32 %quot, ptr %quot_ret
112  store i32 %rem, ptr %rem_ret
113  ret void
114}
115