xref: /llvm-project/llvm/test/CodeGen/SystemZ/int-ssub-04.ll (revision a1710eb3cd5823c5d14899112ca3086acbdbe9cb)
1; Test subtractions between an i64 and a sign-extended i32.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
4
5declare i64 @foo()
6
7; Check SGFR.
8define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, ptr %res) {
9; CHECK-LABEL: f1:
10; CHECK: sgfr %r3, %r4
11; CHECK-DAG: stg %r3, 0(%r5)
12; CHECK-DAG: ipm [[REG:%r[0-5]]]
13; CHECK-DAG: afi [[REG]], 1342177280
14; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
15; CHECK: br %r14
16  %bext = sext i32 %b to i64
17  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext)
18  %val = extractvalue {i64, i1} %t, 0
19  %obit = extractvalue {i64, i1} %t, 1
20  store i64 %val, ptr %res
21  ret i1 %obit
22}
23
24; Check using the overflow result for a branch.
25define void @f2(i64 %dummy, i64 %a, i32 %b, ptr %res) {
26; CHECK-LABEL: f2:
27; CHECK: sgfr %r3, %r4
28; CHECK: stg %r3, 0(%r5)
29; CHECK: jgo foo@PLT
30; CHECK: br %r14
31  %bext = sext i32 %b to i64
32  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext)
33  %val = extractvalue {i64, i1} %t, 0
34  %obit = extractvalue {i64, i1} %t, 1
35  store i64 %val, ptr %res
36  br i1 %obit, label %call, label %exit
37
38call:
39  tail call i64 @foo()
40  br label %exit
41
42exit:
43  ret void
44}
45
46; ... and the same with the inverted direction.
47define void @f3(i64 %dummy, i64 %a, i32 %b, ptr %res) {
48; CHECK-LABEL: f3:
49; CHECK: sgfr %r3, %r4
50; CHECK: stg %r3, 0(%r5)
51; CHECK: jgno foo@PLT
52; CHECK: br %r14
53  %bext = sext i32 %b to i64
54  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext)
55  %val = extractvalue {i64, i1} %t, 0
56  %obit = extractvalue {i64, i1} %t, 1
57  store i64 %val, ptr %res
58  br i1 %obit, label %exit, label %call
59
60call:
61  tail call i64 @foo()
62  br label %exit
63
64exit:
65  ret void
66}
67
68; Check SGF with no displacement.
69define zeroext i1 @f4(i64 %dummy, i64 %a, ptr %src, ptr %res) {
70; CHECK-LABEL: f4:
71; CHECK: sgf %r3, 0(%r4)
72; CHECK-DAG: stg %r3, 0(%r5)
73; CHECK-DAG: ipm [[REG:%r[0-5]]]
74; CHECK-DAG: afi [[REG]], 1342177280
75; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
76; CHECK: br %r14
77  %b = load i32, ptr %src
78  %bext = sext i32 %b to i64
79  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext)
80  %val = extractvalue {i64, i1} %t, 0
81  %obit = extractvalue {i64, i1} %t, 1
82  store i64 %val, ptr %res
83  ret i1 %obit
84}
85
86; Check the high end of the aligned SGF range.
87define zeroext i1 @f5(i64 %dummy, i64 %a, ptr %src, ptr %res) {
88; CHECK-LABEL: f5:
89; CHECK: sgf %r3, 524284(%r4)
90; CHECK-DAG: stg %r3, 0(%r5)
91; CHECK-DAG: ipm [[REG:%r[0-5]]]
92; CHECK-DAG: afi [[REG]], 1342177280
93; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
94; CHECK: br %r14
95  %ptr = getelementptr i32, ptr %src, i64 131071
96  %b = load i32, ptr %ptr
97  %bext = sext i32 %b to i64
98  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext)
99  %val = extractvalue {i64, i1} %t, 0
100  %obit = extractvalue {i64, i1} %t, 1
101  store i64 %val, ptr %res
102  ret i1 %obit
103}
104
105; Check the next word up, which needs separate address logic.
106; Other sequences besides this one would be OK.
107define zeroext i1 @f6(i64 %dummy, i64 %a, ptr %src, ptr %res) {
108; CHECK-LABEL: f6:
109; CHECK: agfi %r4, 524288
110; CHECK: sgf %r3, 0(%r4)
111; CHECK-DAG: stg %r3, 0(%r5)
112; CHECK-DAG: ipm [[REG:%r[0-5]]]
113; CHECK-DAG: afi [[REG]], 1342177280
114; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
115; CHECK: br %r14
116  %ptr = getelementptr i32, ptr %src, i64 131072
117  %b = load i32, ptr %ptr
118  %bext = sext i32 %b to i64
119  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext)
120  %val = extractvalue {i64, i1} %t, 0
121  %obit = extractvalue {i64, i1} %t, 1
122  store i64 %val, ptr %res
123  ret i1 %obit
124}
125
126; Check the high end of the negative aligned SGF range.
127define zeroext i1 @f7(i64 %dummy, i64 %a, ptr %src, ptr %res) {
128; CHECK-LABEL: f7:
129; CHECK: sgf %r3, -4(%r4)
130; CHECK-DAG: stg %r3, 0(%r5)
131; CHECK-DAG: ipm [[REG:%r[0-5]]]
132; CHECK-DAG: afi [[REG]], 1342177280
133; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
134; CHECK: br %r14
135  %ptr = getelementptr i32, ptr %src, i64 -1
136  %b = load i32, ptr %ptr
137  %bext = sext i32 %b to i64
138  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext)
139  %val = extractvalue {i64, i1} %t, 0
140  %obit = extractvalue {i64, i1} %t, 1
141  store i64 %val, ptr %res
142  ret i1 %obit
143}
144
145; Check the low end of the SGF range.
146define zeroext i1 @f8(i64 %dummy, i64 %a, ptr %src, ptr %res) {
147; CHECK-LABEL: f8:
148; CHECK: sgf %r3, -524288(%r4)
149; CHECK-DAG: stg %r3, 0(%r5)
150; CHECK-DAG: ipm [[REG:%r[0-5]]]
151; CHECK-DAG: afi [[REG]], 1342177280
152; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
153; CHECK: br %r14
154  %ptr = getelementptr i32, ptr %src, i64 -131072
155  %b = load i32, ptr %ptr
156  %bext = sext i32 %b to i64
157  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext)
158  %val = extractvalue {i64, i1} %t, 0
159  %obit = extractvalue {i64, i1} %t, 1
160  store i64 %val, ptr %res
161  ret i1 %obit
162}
163
164; Check the next word down, which needs separate address logic.
165; Other sequences besides this one would be OK.
166define zeroext i1 @f9(i64 %dummy, i64 %a, ptr %src, ptr %res) {
167; CHECK-LABEL: f9:
168; CHECK: agfi %r4, -524292
169; CHECK: sgf %r3, 0(%r4)
170; CHECK-DAG: stg %r3, 0(%r5)
171; CHECK-DAG: ipm [[REG:%r[0-5]]]
172; CHECK-DAG: afi [[REG]], 1342177280
173; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
174; CHECK: br %r14
175  %ptr = getelementptr i32, ptr %src, i64 -131073
176  %b = load i32, ptr %ptr
177  %bext = sext i32 %b to i64
178  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext)
179  %val = extractvalue {i64, i1} %t, 0
180  %obit = extractvalue {i64, i1} %t, 1
181  store i64 %val, ptr %res
182  ret i1 %obit
183}
184
185; Check that SGF allows an index.
186define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, ptr %res) {
187; CHECK-LABEL: f10:
188; CHECK: sgf %r4, 524284({{%r3,%r2|%r2,%r3}})
189; CHECK-DAG: stg %r4, 0(%r5)
190; CHECK-DAG: ipm [[REG:%r[0-5]]]
191; CHECK-DAG: afi [[REG]], 1342177280
192; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33
193; CHECK: br %r14
194  %add1 = add i64 %src, %index
195  %add2 = add i64 %add1, 524284
196  %ptr = inttoptr i64 %add2 to ptr
197  %b = load i32, ptr %ptr
198  %bext = sext i32 %b to i64
199  %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext)
200  %val = extractvalue {i64, i1} %t, 0
201  %obit = extractvalue {i64, i1} %t, 1
202  store i64 %val, ptr %res
203  ret i1 %obit
204}
205
206; Check that subtractions of spilled values can use SGF rather than SGFR.
207define zeroext i1 @f11(ptr %ptr0) {
208; CHECK-LABEL: f11:
209; CHECK: brasl %r14, foo@PLT
210; CHECK: sgf %r2, 16{{[04]}}(%r15)
211; CHECK: br %r14
212  %ptr1 = getelementptr i32, ptr %ptr0, i64 2
213  %ptr2 = getelementptr i32, ptr %ptr0, i64 4
214  %ptr3 = getelementptr i32, ptr %ptr0, i64 6
215  %ptr4 = getelementptr i32, ptr %ptr0, i64 8
216  %ptr5 = getelementptr i32, ptr %ptr0, i64 10
217  %ptr6 = getelementptr i32, ptr %ptr0, i64 12
218  %ptr7 = getelementptr i32, ptr %ptr0, i64 14
219  %ptr8 = getelementptr i32, ptr %ptr0, i64 16
220  %ptr9 = getelementptr i32, ptr %ptr0, i64 18
221
222  %val0 = load i32, ptr %ptr0
223  %val1 = load i32, ptr %ptr1
224  %val2 = load i32, ptr %ptr2
225  %val3 = load i32, ptr %ptr3
226  %val4 = load i32, ptr %ptr4
227  %val5 = load i32, ptr %ptr5
228  %val6 = load i32, ptr %ptr6
229  %val7 = load i32, ptr %ptr7
230  %val8 = load i32, ptr %ptr8
231  %val9 = load i32, ptr %ptr9
232
233  %frob0 = add i32 %val0, 100
234  %frob1 = add i32 %val1, 100
235  %frob2 = add i32 %val2, 100
236  %frob3 = add i32 %val3, 100
237  %frob4 = add i32 %val4, 100
238  %frob5 = add i32 %val5, 100
239  %frob6 = add i32 %val6, 100
240  %frob7 = add i32 %val7, 100
241  %frob8 = add i32 %val8, 100
242  %frob9 = add i32 %val9, 100
243
244  store i32 %frob0, ptr %ptr0
245  store i32 %frob1, ptr %ptr1
246  store i32 %frob2, ptr %ptr2
247  store i32 %frob3, ptr %ptr3
248  store i32 %frob4, ptr %ptr4
249  store i32 %frob5, ptr %ptr5
250  store i32 %frob6, ptr %ptr6
251  store i32 %frob7, ptr %ptr7
252  store i32 %frob8, ptr %ptr8
253  store i32 %frob9, ptr %ptr9
254
255  %ret = call i64 @foo()
256
257  %ext0 = sext i32 %frob0 to i64
258  %ext1 = sext i32 %frob1 to i64
259  %ext2 = sext i32 %frob2 to i64
260  %ext3 = sext i32 %frob3 to i64
261  %ext4 = sext i32 %frob4 to i64
262  %ext5 = sext i32 %frob5 to i64
263  %ext6 = sext i32 %frob6 to i64
264  %ext7 = sext i32 %frob7 to i64
265  %ext8 = sext i32 %frob8 to i64
266  %ext9 = sext i32 %frob9 to i64
267
268  %t0 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %ret, i64 %ext0)
269  %add0 = extractvalue {i64, i1} %t0, 0
270  %obit0 = extractvalue {i64, i1} %t0, 1
271  %t1 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add0, i64 %ext1)
272  %add1 = extractvalue {i64, i1} %t1, 0
273  %obit1 = extractvalue {i64, i1} %t1, 1
274  %res1 = or i1 %obit0, %obit1
275  %t2 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add1, i64 %ext2)
276  %add2 = extractvalue {i64, i1} %t2, 0
277  %obit2 = extractvalue {i64, i1} %t2, 1
278  %res2 = or i1 %res1, %obit2
279  %t3 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add2, i64 %ext3)
280  %add3 = extractvalue {i64, i1} %t3, 0
281  %obit3 = extractvalue {i64, i1} %t3, 1
282  %res3 = or i1 %res2, %obit3
283  %t4 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add3, i64 %ext4)
284  %add4 = extractvalue {i64, i1} %t4, 0
285  %obit4 = extractvalue {i64, i1} %t4, 1
286  %res4 = or i1 %res3, %obit4
287  %t5 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add4, i64 %ext5)
288  %add5 = extractvalue {i64, i1} %t5, 0
289  %obit5 = extractvalue {i64, i1} %t5, 1
290  %res5 = or i1 %res4, %obit5
291  %t6 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add5, i64 %ext6)
292  %add6 = extractvalue {i64, i1} %t6, 0
293  %obit6 = extractvalue {i64, i1} %t6, 1
294  %res6 = or i1 %res5, %obit6
295  %t7 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add6, i64 %ext7)
296  %add7 = extractvalue {i64, i1} %t7, 0
297  %obit7 = extractvalue {i64, i1} %t7, 1
298  %res7 = or i1 %res6, %obit7
299  %t8 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add7, i64 %ext8)
300  %add8 = extractvalue {i64, i1} %t8, 0
301  %obit8 = extractvalue {i64, i1} %t8, 1
302  %res8 = or i1 %res7, %obit8
303  %t9 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add8, i64 %ext9)
304  %add9 = extractvalue {i64, i1} %t9, 0
305  %obit9 = extractvalue {i64, i1} %t9, 1
306  %res9 = or i1 %res8, %obit9
307
308  ret i1 %res9
309}
310
311declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
312
313