xref: /llvm-project/llvm/test/CodeGen/SystemZ/int-uadd-02.ll (revision 7b3bbd83c0c24087072ec5b22a76799ab31f87d5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; Test 64-bit addition in which the second operand is variable.
3;
4; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
5
6declare i64 @foo()
7
8; Check ALGR.
9define zeroext i1 @f1(i64 %dummy, i64 %a, i64 %b, ptr %res) {
10; CHECK-LABEL: f1:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    algr %r3, %r4
13; CHECK-NEXT:    ipm %r0
14; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
15; CHECK-NEXT:    stg %r3, 0(%r5)
16; CHECK-NEXT:    br %r14
17  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
18  %val = extractvalue {i64, i1} %t, 0
19  %obit = extractvalue {i64, i1} %t, 1
20  store i64 %val, ptr %res
21  ret i1 %obit
22}
23
24; Check using the overflow result for a branch.
25define void @f2(i64 %dummy, i64 %a, i64 %b, ptr %res) {
26; CHECK-LABEL: f2:
27; CHECK:       # %bb.0:
28; CHECK-NEXT:    algr %r3, %r4
29; CHECK-NEXT:    stg %r3, 0(%r5)
30; CHECK-NEXT:    jgnle foo@PLT
31; CHECK-NEXT:  .LBB1_1: # %exit
32; CHECK-NEXT:    br %r14
33  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
34  %val = extractvalue {i64, i1} %t, 0
35  %obit = extractvalue {i64, i1} %t, 1
36  store i64 %val, ptr %res
37  br i1 %obit, label %call, label %exit
38
39call:
40  tail call i64 @foo()
41  br label %exit
42
43exit:
44  ret void
45}
46
47; ... and the same with the inverted direction.
48define void @f3(i64 %dummy, i64 %a, i64 %b, ptr %res) {
49; CHECK-LABEL: f3:
50; CHECK:       # %bb.0:
51; CHECK-NEXT:    algr %r3, %r4
52; CHECK-NEXT:    stg %r3, 0(%r5)
53; CHECK-NEXT:    jgle foo@PLT
54; CHECK-NEXT:  .LBB2_1: # %exit
55; CHECK-NEXT:    br %r14
56  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
57  %val = extractvalue {i64, i1} %t, 0
58  %obit = extractvalue {i64, i1} %t, 1
59  store i64 %val, ptr %res
60  br i1 %obit, label %exit, label %call
61
62call:
63  tail call i64 @foo()
64  br label %exit
65
66exit:
67  ret void
68}
69
70; Check ALG with no displacement.
71define zeroext i1 @f4(i64 %dummy, i64 %a, ptr %src, ptr %res) {
72; CHECK-LABEL: f4:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    alg %r3, 0(%r4)
75; CHECK-NEXT:    ipm %r0
76; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
77; CHECK-NEXT:    stg %r3, 0(%r5)
78; CHECK-NEXT:    br %r14
79  %b = load i64, ptr %src
80  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
81  %val = extractvalue {i64, i1} %t, 0
82  %obit = extractvalue {i64, i1} %t, 1
83  store i64 %val, ptr %res
84  ret i1 %obit
85}
86
87; Check the high end of the aligned ALG range.
88define zeroext i1 @f5(i64 %dummy, i64 %a, ptr %src, ptr %res) {
89; CHECK-LABEL: f5:
90; CHECK:       # %bb.0:
91; CHECK-NEXT:    alg %r3, 524280(%r4)
92; CHECK-NEXT:    ipm %r0
93; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
94; CHECK-NEXT:    stg %r3, 0(%r5)
95; CHECK-NEXT:    br %r14
96  %ptr = getelementptr i64, ptr %src, i64 65535
97  %b = load i64, ptr %ptr
98  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
99  %val = extractvalue {i64, i1} %t, 0
100  %obit = extractvalue {i64, i1} %t, 1
101  store i64 %val, ptr %res
102  ret i1 %obit
103}
104
105; Check the next doubleword up, which needs separate address logic.
106; Other sequences besides this one would be OK.
107define zeroext i1 @f6(i64 %dummy, i64 %a, ptr %src, ptr %res) {
108; CHECK-LABEL: f6:
109; CHECK:       # %bb.0:
110; CHECK-NEXT:    agfi %r4, 524288
111; CHECK-NEXT:    alg %r3, 0(%r4)
112; CHECK-NEXT:    ipm %r0
113; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
114; CHECK-NEXT:    stg %r3, 0(%r5)
115; CHECK-NEXT:    br %r14
116  %ptr = getelementptr i64, ptr %src, i64 65536
117  %b = load i64, ptr %ptr
118  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
119  %val = extractvalue {i64, i1} %t, 0
120  %obit = extractvalue {i64, i1} %t, 1
121  store i64 %val, ptr %res
122  ret i1 %obit
123}
124
125; Check the high end of the negative aligned ALG range.
126define zeroext i1 @f7(i64 %dummy, i64 %a, ptr %src, ptr %res) {
127; CHECK-LABEL: f7:
128; CHECK:       # %bb.0:
129; CHECK-NEXT:    alg %r3, -8(%r4)
130; CHECK-NEXT:    ipm %r0
131; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
132; CHECK-NEXT:    stg %r3, 0(%r5)
133; CHECK-NEXT:    br %r14
134  %ptr = getelementptr i64, ptr %src, i64 -1
135  %b = load i64, ptr %ptr
136  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
137  %val = extractvalue {i64, i1} %t, 0
138  %obit = extractvalue {i64, i1} %t, 1
139  store i64 %val, ptr %res
140  ret i1 %obit
141}
142
143; Check the low end of the ALG range.
144define zeroext i1 @f8(i64 %dummy, i64 %a, ptr %src, ptr %res) {
145; CHECK-LABEL: f8:
146; CHECK:       # %bb.0:
147; CHECK-NEXT:    alg %r3, -524288(%r4)
148; CHECK-NEXT:    ipm %r0
149; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
150; CHECK-NEXT:    stg %r3, 0(%r5)
151; CHECK-NEXT:    br %r14
152  %ptr = getelementptr i64, ptr %src, i64 -65536
153  %b = load i64, ptr %ptr
154  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
155  %val = extractvalue {i64, i1} %t, 0
156  %obit = extractvalue {i64, i1} %t, 1
157  store i64 %val, ptr %res
158  ret i1 %obit
159}
160
161; Check the next doubleword down, which needs separate address logic.
162; Other sequences besides this one would be OK.
163define zeroext i1 @f9(i64 %dummy, i64 %a, ptr %src, ptr %res) {
164; CHECK-LABEL: f9:
165; CHECK:       # %bb.0:
166; CHECK-NEXT:    agfi %r4, -524296
167; CHECK-NEXT:    alg %r3, 0(%r4)
168; CHECK-NEXT:    ipm %r0
169; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
170; CHECK-NEXT:    stg %r3, 0(%r5)
171; CHECK-NEXT:    br %r14
172  %ptr = getelementptr i64, ptr %src, i64 -65537
173  %b = load i64, ptr %ptr
174  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
175  %val = extractvalue {i64, i1} %t, 0
176  %obit = extractvalue {i64, i1} %t, 1
177  store i64 %val, ptr %res
178  ret i1 %obit
179}
180
181; Check that ALG allows an index.
182define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, ptr %res) {
183; CHECK-LABEL: f10:
184; CHECK:       # %bb.0:
185; CHECK-NEXT:    alg %r4, 524280(%r3,%r2)
186; CHECK-NEXT:    ipm %r0
187; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
188; CHECK-NEXT:    stg %r4, 0(%r5)
189; CHECK-NEXT:    br %r14
190  %add1 = add i64 %src, %index
191  %add2 = add i64 %add1, 524280
192  %ptr = inttoptr i64 %add2 to ptr
193  %b = load i64, ptr %ptr
194  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
195  %val = extractvalue {i64, i1} %t, 0
196  %obit = extractvalue {i64, i1} %t, 1
197  store i64 %val, ptr %res
198  ret i1 %obit
199}
200
201; Check that additions of spilled values can use ALG rather than ALGR.
202define zeroext i1 @f11(ptr %ptr0) {
203; CHECK-LABEL: f11:
204; CHECK:       # %bb.0:
205; CHECK-NEXT:    stmg %r6, %r15, 48(%r15)
206; CHECK-NEXT:    .cfi_offset %r6, -112
207; CHECK-NEXT:    .cfi_offset %r7, -104
208; CHECK-NEXT:    .cfi_offset %r8, -96
209; CHECK-NEXT:    .cfi_offset %r9, -88
210; CHECK-NEXT:    .cfi_offset %r10, -80
211; CHECK-NEXT:    .cfi_offset %r11, -72
212; CHECK-NEXT:    .cfi_offset %r12, -64
213; CHECK-NEXT:    .cfi_offset %r13, -56
214; CHECK-NEXT:    .cfi_offset %r14, -48
215; CHECK-NEXT:    .cfi_offset %r15, -40
216; CHECK-NEXT:    aghi %r15, -176
217; CHECK-NEXT:    .cfi_def_cfa_offset 336
218; CHECK-NEXT:    lg %r6, 0(%r2)
219; CHECK-NEXT:    lg %r13, 16(%r2)
220; CHECK-NEXT:    lg %r12, 32(%r2)
221; CHECK-NEXT:    lg %r7, 48(%r2)
222; CHECK-NEXT:    lg %r8, 64(%r2)
223; CHECK-NEXT:    lg %r9, 80(%r2)
224; CHECK-NEXT:    lg %r10, 96(%r2)
225; CHECK-NEXT:    lg %r11, 112(%r2)
226; CHECK-NEXT:    mvc 160(8,%r15), 128(%r2) # 8-byte Folded Spill
227; CHECK-NEXT:    mvc 168(8,%r15), 144(%r2) # 8-byte Folded Spill
228; CHECK-NEXT:    brasl %r14, foo@PLT
229; CHECK-NEXT:    algr %r2, %r6
230; CHECK-NEXT:    ipm %r0
231; CHECK-NEXT:    risbg %r0, %r0, 63, 191, 35
232; CHECK-NEXT:    algr %r2, %r13
233; CHECK-NEXT:    ipm %r1
234; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
235; CHECK-NEXT:    algr %r2, %r12
236; CHECK-NEXT:    ipm %r1
237; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
238; CHECK-NEXT:    algr %r2, %r7
239; CHECK-NEXT:    ipm %r1
240; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
241; CHECK-NEXT:    algr %r2, %r8
242; CHECK-NEXT:    ipm %r1
243; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
244; CHECK-NEXT:    algr %r2, %r9
245; CHECK-NEXT:    ipm %r1
246; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
247; CHECK-NEXT:    algr %r2, %r10
248; CHECK-NEXT:    ipm %r1
249; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
250; CHECK-NEXT:    algr %r2, %r11
251; CHECK-NEXT:    ipm %r1
252; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
253; CHECK-NEXT:    alg %r2, 160(%r15) # 8-byte Folded Reload
254; CHECK-NEXT:    ipm %r1
255; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
256; CHECK-NEXT:    alg %r2, 168(%r15) # 8-byte Folded Reload
257; CHECK-NEXT:    ipm %r1
258; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
259; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 0
260; CHECK-NEXT:    lmg %r6, %r15, 224(%r15)
261; CHECK-NEXT:    br %r14
262  %ptr1 = getelementptr i64, ptr %ptr0, i64 2
263  %ptr2 = getelementptr i64, ptr %ptr0, i64 4
264  %ptr3 = getelementptr i64, ptr %ptr0, i64 6
265  %ptr4 = getelementptr i64, ptr %ptr0, i64 8
266  %ptr5 = getelementptr i64, ptr %ptr0, i64 10
267  %ptr6 = getelementptr i64, ptr %ptr0, i64 12
268  %ptr7 = getelementptr i64, ptr %ptr0, i64 14
269  %ptr8 = getelementptr i64, ptr %ptr0, i64 16
270  %ptr9 = getelementptr i64, ptr %ptr0, i64 18
271
272  %val0 = load i64, ptr %ptr0
273  %val1 = load i64, ptr %ptr1
274  %val2 = load i64, ptr %ptr2
275  %val3 = load i64, ptr %ptr3
276  %val4 = load i64, ptr %ptr4
277  %val5 = load i64, ptr %ptr5
278  %val6 = load i64, ptr %ptr6
279  %val7 = load i64, ptr %ptr7
280  %val8 = load i64, ptr %ptr8
281  %val9 = load i64, ptr %ptr9
282
283  %ret = call i64 @foo()
284
285  %t0 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %ret, i64 %val0)
286  %add0 = extractvalue {i64, i1} %t0, 0
287  %obit0 = extractvalue {i64, i1} %t0, 1
288  %t1 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add0, i64 %val1)
289  %add1 = extractvalue {i64, i1} %t1, 0
290  %obit1 = extractvalue {i64, i1} %t1, 1
291  %res1 = or i1 %obit0, %obit1
292  %t2 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add1, i64 %val2)
293  %add2 = extractvalue {i64, i1} %t2, 0
294  %obit2 = extractvalue {i64, i1} %t2, 1
295  %res2 = or i1 %res1, %obit2
296  %t3 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add2, i64 %val3)
297  %add3 = extractvalue {i64, i1} %t3, 0
298  %obit3 = extractvalue {i64, i1} %t3, 1
299  %res3 = or i1 %res2, %obit3
300  %t4 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add3, i64 %val4)
301  %add4 = extractvalue {i64, i1} %t4, 0
302  %obit4 = extractvalue {i64, i1} %t4, 1
303  %res4 = or i1 %res3, %obit4
304  %t5 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add4, i64 %val5)
305  %add5 = extractvalue {i64, i1} %t5, 0
306  %obit5 = extractvalue {i64, i1} %t5, 1
307  %res5 = or i1 %res4, %obit5
308  %t6 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add5, i64 %val6)
309  %add6 = extractvalue {i64, i1} %t6, 0
310  %obit6 = extractvalue {i64, i1} %t6, 1
311  %res6 = or i1 %res5, %obit6
312  %t7 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add6, i64 %val7)
313  %add7 = extractvalue {i64, i1} %t7, 0
314  %obit7 = extractvalue {i64, i1} %t7, 1
315  %res7 = or i1 %res6, %obit7
316  %t8 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add7, i64 %val8)
317  %add8 = extractvalue {i64, i1} %t8, 0
318  %obit8 = extractvalue {i64, i1} %t8, 1
319  %res8 = or i1 %res7, %obit8
320  %t9 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add8, i64 %val9)
321  %add9 = extractvalue {i64, i1} %t9, 0
322  %obit9 = extractvalue {i64, i1} %t9, 1
323  %res9 = or i1 %res8, %obit9
324
325  ret i1 %res9
326}
327
328declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
329
330