xref: /llvm-project/llvm/test/CodeGen/NVPTX/combine-mad.ll (revision 0068078dca60b41ad1c7bdd4448e7de718b82a5d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -O1 | FileCheck %s
3; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O1 | FileCheck %s
4; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -O1 | %ptxas-verify %}
5; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O1 | %ptxas-verify %}
6
7define i32 @test1(i32 %n, i32 %m) {
8;
9; CHECK-LABEL: test1(
10; CHECK:       {
11; CHECK-NEXT:    .reg .b32 %r<4>;
12; CHECK-EMPTY:
13; CHECK-NEXT:  // %bb.0:
14; CHECK-NEXT:    ld.param.u32 %r1, [test1_param_0];
15; CHECK-NEXT:    ld.param.u32 %r2, [test1_param_1];
16; CHECK-NEXT:    mad.lo.s32 %r3, %r2, %r1, %r2;
17; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
18; CHECK-NEXT:    ret;
19  %add = add i32 %n, 1
20  %mul = mul i32 %add, %m
21  ret i32 %mul
22}
23
24define i32 @test1_rev(i32 %n, i32 %m) {
25;
26; CHECK-LABEL: test1_rev(
27; CHECK:       {
28; CHECK-NEXT:    .reg .b32 %r<4>;
29; CHECK-EMPTY:
30; CHECK-NEXT:  // %bb.0:
31; CHECK-NEXT:    ld.param.u32 %r1, [test1_rev_param_0];
32; CHECK-NEXT:    ld.param.u32 %r2, [test1_rev_param_1];
33; CHECK-NEXT:    mad.lo.s32 %r3, %r2, %r1, %r2;
34; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
35; CHECK-NEXT:    ret;
36  %add = add i32 %n, 1
37  %mul = mul i32 %m, %add
38  ret i32 %mul
39}
40
41; Transpose (mul (select)) if it can then be folded to mad
42define i32 @test2(i32 %n, i32 %m, i32 %s) {
43;
44; CHECK-LABEL: test2(
45; CHECK:       {
46; CHECK-NEXT:    .reg .pred %p<2>;
47; CHECK-NEXT:    .reg .b32 %r<6>;
48; CHECK-EMPTY:
49; CHECK-NEXT:  // %bb.0:
50; CHECK-NEXT:    ld.param.u32 %r1, [test2_param_0];
51; CHECK-NEXT:    ld.param.u32 %r2, [test2_param_1];
52; CHECK-NEXT:    ld.param.u32 %r3, [test2_param_2];
53; CHECK-NEXT:    setp.lt.s32 %p1, %r3, 1;
54; CHECK-NEXT:    mad.lo.s32 %r4, %r2, %r1, %r2;
55; CHECK-NEXT:    selp.b32 %r5, %r2, %r4, %p1;
56; CHECK-NEXT:    st.param.b32 [func_retval0], %r5;
57; CHECK-NEXT:    ret;
58  %add = add i32 %n, 1
59  %cond = icmp slt i32 %s, 1
60  %sel = select i1 %cond, i32 1, i32 %add
61  %mul = mul i32 %sel, %m
62  ret i32 %mul
63}
64
65;; Transpose (mul (select)) if it can then be folded to mad
66define i32 @test2_rev1(i32 %n, i32 %m, i32 %s) {
67;
68; CHECK-LABEL: test2_rev1(
69; CHECK:       {
70; CHECK-NEXT:    .reg .pred %p<2>;
71; CHECK-NEXT:    .reg .b32 %r<6>;
72; CHECK-EMPTY:
73; CHECK-NEXT:  // %bb.0:
74; CHECK-NEXT:    ld.param.u32 %r1, [test2_rev1_param_0];
75; CHECK-NEXT:    ld.param.u32 %r2, [test2_rev1_param_1];
76; CHECK-NEXT:    ld.param.u32 %r3, [test2_rev1_param_2];
77; CHECK-NEXT:    setp.lt.s32 %p1, %r3, 1;
78; CHECK-NEXT:    mad.lo.s32 %r4, %r2, %r1, %r2;
79; CHECK-NEXT:    selp.b32 %r5, %r4, %r2, %p1;
80; CHECK-NEXT:    st.param.b32 [func_retval0], %r5;
81; CHECK-NEXT:    ret;
82  %add = add i32 %n, 1
83  %cond = icmp slt i32 %s, 1
84  %sel = select i1 %cond, i32 %add, i32 1
85  %mul = mul i32 %sel, %m
86  ret i32 %mul
87}
88
89;; Transpose (mul (select)) if it can then be folded to mad
90define i32 @test2_rev2(i32 %n, i32 %m, i32 %s) {
91;
92; CHECK-LABEL: test2_rev2(
93; CHECK:       {
94; CHECK-NEXT:    .reg .pred %p<2>;
95; CHECK-NEXT:    .reg .b32 %r<6>;
96; CHECK-EMPTY:
97; CHECK-NEXT:  // %bb.0:
98; CHECK-NEXT:    ld.param.u32 %r1, [test2_rev2_param_0];
99; CHECK-NEXT:    ld.param.u32 %r2, [test2_rev2_param_1];
100; CHECK-NEXT:    ld.param.u32 %r3, [test2_rev2_param_2];
101; CHECK-NEXT:    setp.lt.s32 %p1, %r3, 1;
102; CHECK-NEXT:    mad.lo.s32 %r4, %r2, %r1, %r2;
103; CHECK-NEXT:    selp.b32 %r5, %r4, %r2, %p1;
104; CHECK-NEXT:    st.param.b32 [func_retval0], %r5;
105; CHECK-NEXT:    ret;
106  %add = add i32 %n, 1
107  %cond = icmp slt i32 %s, 1
108  %sel = select i1 %cond, i32 %add, i32 1
109  %mul = mul i32  %m, %sel
110  ret i32 %mul
111}
112
113;; Leave (mul (select)) intact if it transposing is not profitable
114define i32 @test3(i32 %n, i32 %m, i32 %s) {
115;
116; CHECK-LABEL: test3(
117; CHECK:       {
118; CHECK-NEXT:    .reg .pred %p<2>;
119; CHECK-NEXT:    .reg .b32 %r<7>;
120; CHECK-EMPTY:
121; CHECK-NEXT:  // %bb.0:
122; CHECK-NEXT:    ld.param.u32 %r1, [test3_param_0];
123; CHECK-NEXT:    add.s32 %r2, %r1, 3;
124; CHECK-NEXT:    ld.param.u32 %r3, [test3_param_1];
125; CHECK-NEXT:    ld.param.u32 %r4, [test3_param_2];
126; CHECK-NEXT:    setp.lt.s32 %p1, %r4, 1;
127; CHECK-NEXT:    selp.b32 %r5, 1, %r2, %p1;
128; CHECK-NEXT:    mul.lo.s32 %r6, %r5, %r3;
129; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
130; CHECK-NEXT:    ret;
131  %add = add i32 %n, 3
132  %cond = icmp slt i32 %s, 1
133  %sel = select i1 %cond, i32 1, i32 %add
134  %mul = mul i32 %sel, %m
135  ret i32 %mul
136}
137
138;; (add (select 0, (mul a, b)), c) -> (select (mad a, b, c), c)
139define i32 @test4(i32 %a, i32 %b, i32 %c, i1 %p) {
140; CHECK-LABEL: test4(
141; CHECK:       {
142; CHECK-NEXT:    .reg .pred %p<2>;
143; CHECK-NEXT:    .reg .b16 %rs<3>;
144; CHECK-NEXT:    .reg .b32 %r<6>;
145; CHECK-EMPTY:
146; CHECK-NEXT:  // %bb.0:
147; CHECK-NEXT:    ld.param.u8 %rs1, [test4_param_3];
148; CHECK-NEXT:    and.b16 %rs2, %rs1, 1;
149; CHECK-NEXT:    setp.eq.b16 %p1, %rs2, 1;
150; CHECK-NEXT:    ld.param.u32 %r1, [test4_param_0];
151; CHECK-NEXT:    ld.param.u32 %r2, [test4_param_1];
152; CHECK-NEXT:    ld.param.u32 %r3, [test4_param_2];
153; CHECK-NEXT:    mad.lo.s32 %r4, %r1, %r2, %r3;
154; CHECK-NEXT:    selp.b32 %r5, %r4, %r3, %p1;
155; CHECK-NEXT:    st.param.b32 [func_retval0], %r5;
156; CHECK-NEXT:    ret;
157  %mul = mul i32 %a, %b
158  %sel = select i1 %p, i32 %mul, i32 0
159  %add = add i32 %c, %sel
160  ret i32 %add
161}
162
163define i32 @test4_rev(i32 %a, i32 %b, i32 %c, i1 %p) {
164; CHECK-LABEL: test4_rev(
165; CHECK:       {
166; CHECK-NEXT:    .reg .pred %p<2>;
167; CHECK-NEXT:    .reg .b16 %rs<3>;
168; CHECK-NEXT:    .reg .b32 %r<6>;
169; CHECK-EMPTY:
170; CHECK-NEXT:  // %bb.0:
171; CHECK-NEXT:    ld.param.u8 %rs1, [test4_rev_param_3];
172; CHECK-NEXT:    and.b16 %rs2, %rs1, 1;
173; CHECK-NEXT:    setp.eq.b16 %p1, %rs2, 1;
174; CHECK-NEXT:    ld.param.u32 %r1, [test4_rev_param_0];
175; CHECK-NEXT:    ld.param.u32 %r2, [test4_rev_param_1];
176; CHECK-NEXT:    ld.param.u32 %r3, [test4_rev_param_2];
177; CHECK-NEXT:    mad.lo.s32 %r4, %r1, %r2, %r3;
178; CHECK-NEXT:    selp.b32 %r5, %r3, %r4, %p1;
179; CHECK-NEXT:    st.param.b32 [func_retval0], %r5;
180; CHECK-NEXT:    ret;
181  %mul = mul i32 %a, %b
182  %sel = select i1 %p, i32 0, i32 %mul
183  %add = add i32 %c, %sel
184  ret i32 %add
185}
186
187declare i32 @use(i32 %0, i32 %1)
188
189define i32 @test_mad_multi_use(i32 %a, i32 %b, i32 %c) {
190; CHECK-LABEL: test_mad_multi_use(
191; CHECK:       {
192; CHECK-NEXT:    .reg .b32 %r<8>;
193; CHECK-EMPTY:
194; CHECK-NEXT:  // %bb.0:
195; CHECK-NEXT:    ld.param.u32 %r1, [test_mad_multi_use_param_0];
196; CHECK-NEXT:    ld.param.u32 %r2, [test_mad_multi_use_param_1];
197; CHECK-NEXT:    mul.lo.s32 %r3, %r1, %r2;
198; CHECK-NEXT:    ld.param.u32 %r4, [test_mad_multi_use_param_2];
199; CHECK-NEXT:    add.s32 %r5, %r3, %r4;
200; CHECK-NEXT:    { // callseq 0, 0
201; CHECK-NEXT:    .param .b32 param0;
202; CHECK-NEXT:    st.param.b32 [param0], %r3;
203; CHECK-NEXT:    .param .b32 param1;
204; CHECK-NEXT:    st.param.b32 [param1], %r5;
205; CHECK-NEXT:    .param .b32 retval0;
206; CHECK-NEXT:    call.uni (retval0),
207; CHECK-NEXT:    use,
208; CHECK-NEXT:    (
209; CHECK-NEXT:    param0,
210; CHECK-NEXT:    param1
211; CHECK-NEXT:    );
212; CHECK-NEXT:    ld.param.b32 %r6, [retval0];
213; CHECK-NEXT:    } // callseq 0
214; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
215; CHECK-NEXT:    ret;
216  %mul = mul i32 %a, %b
217  %add = add i32 %mul, %c
218  %res = call i32 @use(i32 %mul, i32 %add)
219  ret i32 %res
220}
221
222;; This case relies on mad x 1 y => add x y, previously we emit:
223;;     mad.lo.s32      %r3, %r1, 1, %r2;
224define i32 @test_mad_fold(i32 %x) {
225; CHECK-LABEL: test_mad_fold(
226; CHECK:       {
227; CHECK-NEXT:    .reg .b32 %r<7>;
228; CHECK-EMPTY:
229; CHECK-NEXT:  // %bb.0:
230; CHECK-NEXT:    ld.param.u32 %r1, [test_mad_fold_param_0];
231; CHECK-NEXT:    mul.hi.s32 %r2, %r1, -2147221471;
232; CHECK-NEXT:    add.s32 %r3, %r2, %r1;
233; CHECK-NEXT:    shr.u32 %r4, %r3, 31;
234; CHECK-NEXT:    shr.s32 %r5, %r3, 12;
235; CHECK-NEXT:    add.s32 %r6, %r5, %r4;
236; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
237; CHECK-NEXT:    ret;
238  %div = sdiv i32 %x, 8191
239  ret i32 %div
240}
241