xref: /llvm-project/llvm/test/CodeGen/SystemZ/int-uadd-03.ll (revision 872276de4b8c5f13f106b79c53a27e4a6ff8ce35)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; Test additions between an i64 and a zero-extended i32.
3;
4; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
5
6declare i64 @foo()
7
8; Check ALGFR.
9define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, ptr %res) {
10; CHECK-LABEL: f1:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    algfr %r3, %r4
13; CHECK-NEXT:    ipm %r0
14; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
15; CHECK-NEXT:    stg %r3, 0(%r5)
16; CHECK-NEXT:    br %r14
17  %bext = zext i32 %b to i64
18  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext)
19  %val = extractvalue {i64, i1} %t, 0
20  %obit = extractvalue {i64, i1} %t, 1
21  store i64 %val, ptr %res
22  ret i1 %obit
23}
24
25; Check using the overflow result for a branch.
26define void @f2(i64 %dummy, i64 %a, i32 %b, ptr %res) {
27; CHECK-LABEL: f2:
28; CHECK:       # %bb.0:
29; CHECK-NEXT:    algfr %r3, %r4
30; CHECK-NEXT:    stg %r3, 0(%r5)
31; CHECK-NEXT:    jgnle foo@PLT
32; CHECK-NEXT:  .LBB1_1: # %exit
33; CHECK-NEXT:    br %r14
34  %bext = zext i32 %b to i64
35  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext)
36  %val = extractvalue {i64, i1} %t, 0
37  %obit = extractvalue {i64, i1} %t, 1
38  store i64 %val, ptr %res
39  br i1 %obit, label %call, label %exit
40
41call:
42  tail call i64 @foo()
43  br label %exit
44
45exit:
46  ret void
47}
48
49; ... and the same with the inverted direction.
50define void @f3(i64 %dummy, i64 %a, i32 %b, ptr %res) {
51; CHECK-LABEL: f3:
52; CHECK:       # %bb.0:
53; CHECK-NEXT:    algfr %r3, %r4
54; CHECK-NEXT:    stg %r3, 0(%r5)
55; CHECK-NEXT:    jgle foo@PLT
56; CHECK-NEXT:  .LBB2_1: # %exit
57; CHECK-NEXT:    br %r14
58  %bext = zext i32 %b to i64
59  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext)
60  %val = extractvalue {i64, i1} %t, 0
61  %obit = extractvalue {i64, i1} %t, 1
62  store i64 %val, ptr %res
63  br i1 %obit, label %exit, label %call
64
65call:
66  tail call i64 @foo()
67  br label %exit
68
69exit:
70  ret void
71}
72
73; Check ALGF with no displacement.
74define zeroext i1 @f4(i64 %dummy, i64 %a, ptr %src, ptr %res) {
75; CHECK-LABEL: f4:
76; CHECK:       # %bb.0:
77; CHECK-NEXT:    algf %r3, 0(%r4)
78; CHECK-NEXT:    ipm %r0
79; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
80; CHECK-NEXT:    stg %r3, 0(%r5)
81; CHECK-NEXT:    br %r14
82  %b = load i32, ptr %src
83  %bext = zext i32 %b to i64
84  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext)
85  %val = extractvalue {i64, i1} %t, 0
86  %obit = extractvalue {i64, i1} %t, 1
87  store i64 %val, ptr %res
88  ret i1 %obit
89}
90
91; Check the high end of the aligned ALGF range.
92define zeroext i1 @f5(i64 %dummy, i64 %a, ptr %src, ptr %res) {
93; CHECK-LABEL: f5:
94; CHECK:       # %bb.0:
95; CHECK-NEXT:    algf %r3, 524284(%r4)
96; CHECK-NEXT:    ipm %r0
97; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
98; CHECK-NEXT:    stg %r3, 0(%r5)
99; CHECK-NEXT:    br %r14
100  %ptr = getelementptr i32, ptr %src, i64 131071
101  %b = load i32, ptr %ptr
102  %bext = zext i32 %b to i64
103  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext)
104  %val = extractvalue {i64, i1} %t, 0
105  %obit = extractvalue {i64, i1} %t, 1
106  store i64 %val, ptr %res
107  ret i1 %obit
108}
109
110; Check the next doubleword up, which needs separate address logic.
111; Other sequences besides this one would be OK.
112define zeroext i1 @f6(i64 %dummy, i64 %a, ptr %src, ptr %res) {
113; CHECK-LABEL: f6:
114; CHECK:       # %bb.0:
115; CHECK-NEXT:    agfi %r4, 524288
116; CHECK-NEXT:    algf %r3, 0(%r4)
117; CHECK-NEXT:    ipm %r0
118; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
119; CHECK-NEXT:    stg %r3, 0(%r5)
120; CHECK-NEXT:    br %r14
121  %ptr = getelementptr i32, ptr %src, i64 131072
122  %b = load i32, ptr %ptr
123  %bext = zext i32 %b to i64
124  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext)
125  %val = extractvalue {i64, i1} %t, 0
126  %obit = extractvalue {i64, i1} %t, 1
127  store i64 %val, ptr %res
128  ret i1 %obit
129}
130
131; Check the high end of the negative aligned ALGF range.
132define zeroext i1 @f7(i64 %dummy, i64 %a, ptr %src, ptr %res) {
133; CHECK-LABEL: f7:
134; CHECK:       # %bb.0:
135; CHECK-NEXT:    algf %r3, -4(%r4)
136; CHECK-NEXT:    ipm %r0
137; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
138; CHECK-NEXT:    stg %r3, 0(%r5)
139; CHECK-NEXT:    br %r14
140  %ptr = getelementptr i32, ptr %src, i64 -1
141  %b = load i32, ptr %ptr
142  %bext = zext i32 %b to i64
143  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext)
144  %val = extractvalue {i64, i1} %t, 0
145  %obit = extractvalue {i64, i1} %t, 1
146  store i64 %val, ptr %res
147  ret i1 %obit
148}
149
150; Check the low end of the ALGF range.
151define zeroext i1 @f8(i64 %dummy, i64 %a, ptr %src, ptr %res) {
152; CHECK-LABEL: f8:
153; CHECK:       # %bb.0:
154; CHECK-NEXT:    algf %r3, -524288(%r4)
155; CHECK-NEXT:    ipm %r0
156; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
157; CHECK-NEXT:    stg %r3, 0(%r5)
158; CHECK-NEXT:    br %r14
159  %ptr = getelementptr i32, ptr %src, i64 -131072
160  %b = load i32, ptr %ptr
161  %bext = zext i32 %b to i64
162  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext)
163  %val = extractvalue {i64, i1} %t, 0
164  %obit = extractvalue {i64, i1} %t, 1
165  store i64 %val, ptr %res
166  ret i1 %obit
167}
168
169; Check the next doubleword down, which needs separate address logic.
170; Other sequences besides this one would be OK.
171define zeroext i1 @f9(i64 %dummy, i64 %a, ptr %src, ptr %res) {
172; CHECK-LABEL: f9:
173; CHECK:       # %bb.0:
174; CHECK-NEXT:    agfi %r4, -524292
175; CHECK-NEXT:    algf %r3, 0(%r4)
176; CHECK-NEXT:    ipm %r0
177; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
178; CHECK-NEXT:    stg %r3, 0(%r5)
179; CHECK-NEXT:    br %r14
180  %ptr = getelementptr i32, ptr %src, i64 -131073
181  %b = load i32, ptr %ptr
182  %bext = zext i32 %b to i64
183  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext)
184  %val = extractvalue {i64, i1} %t, 0
185  %obit = extractvalue {i64, i1} %t, 1
186  store i64 %val, ptr %res
187  ret i1 %obit
188}
189
190; Check that ALGF allows an index.
191define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, ptr %res) {
192; CHECK-LABEL: f10:
193; CHECK:       # %bb.0:
194; CHECK-NEXT:    algf %r4, 524284(%r3,%r2)
195; CHECK-NEXT:    ipm %r0
196; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 35
197; CHECK-NEXT:    stg %r4, 0(%r5)
198; CHECK-NEXT:    br %r14
199  %add1 = add i64 %src, %index
200  %add2 = add i64 %add1, 524284
201  %ptr = inttoptr i64 %add2 to ptr
202  %b = load i32, ptr %ptr
203  %bext = zext i32 %b to i64
204  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext)
205  %val = extractvalue {i64, i1} %t, 0
206  %obit = extractvalue {i64, i1} %t, 1
207  store i64 %val, ptr %res
208  ret i1 %obit
209}
210
211; Check that additions of spilled values can use ALGF rather than ALGFR.
212define zeroext i1 @f11(ptr %ptr0) {
213; CHECK-LABEL: f11:
214; CHECK:       # %bb.0:
215; CHECK-NEXT:    stmg %r6, %r15, 48(%r15)
216; CHECK-NEXT:    .cfi_offset %r6, -112
217; CHECK-NEXT:    .cfi_offset %r7, -104
218; CHECK-NEXT:    .cfi_offset %r8, -96
219; CHECK-NEXT:    .cfi_offset %r9, -88
220; CHECK-NEXT:    .cfi_offset %r10, -80
221; CHECK-NEXT:    .cfi_offset %r11, -72
222; CHECK-NEXT:    .cfi_offset %r12, -64
223; CHECK-NEXT:    .cfi_offset %r13, -56
224; CHECK-NEXT:    .cfi_offset %r14, -48
225; CHECK-NEXT:    .cfi_offset %r15, -40
226; CHECK-NEXT:    aghi %r15, -168
227; CHECK-NEXT:    .cfi_def_cfa_offset 328
228; CHECK-NEXT:    lhi %r0, 100
229; CHECK-NEXT:    lhi %r12, 100
230; CHECK-NEXT:    a %r12, 0(%r2)
231; CHECK-NEXT:    lhi %r13, 100
232; CHECK-NEXT:    a %r13, 8(%r2)
233; CHECK-NEXT:    lhi %r6, 100
234; CHECK-NEXT:    a %r6, 16(%r2)
235; CHECK-NEXT:    lhi %r7, 100
236; CHECK-NEXT:    a %r7, 24(%r2)
237; CHECK-NEXT:    lhi %r8, 100
238; CHECK-NEXT:    a %r8, 32(%r2)
239; CHECK-NEXT:    lhi %r9, 100
240; CHECK-NEXT:    a %r9, 40(%r2)
241; CHECK-NEXT:    lhi %r10, 100
242; CHECK-NEXT:    a %r10, 48(%r2)
243; CHECK-NEXT:    lhi %r11, 100
244; CHECK-NEXT:    a %r11, 56(%r2)
245; CHECK-NEXT:    lhi %r1, 100
246; CHECK-NEXT:    a %r1, 64(%r2)
247; CHECK-NEXT:    st %r1, 160(%r15) # 4-byte Folded Spill
248; CHECK-NEXT:    a %r0, 72(%r2)
249; CHECK-NEXT:    st %r0, 164(%r15) # 4-byte Folded Spill
250; CHECK-NEXT:    st %r12, 0(%r2)
251; CHECK-NEXT:    st %r13, 8(%r2)
252; CHECK-NEXT:    st %r6, 16(%r2)
253; CHECK-NEXT:    st %r7, 24(%r2)
254; CHECK-NEXT:    st %r8, 32(%r2)
255; CHECK-NEXT:    st %r9, 40(%r2)
256; CHECK-NEXT:    st %r10, 48(%r2)
257; CHECK-NEXT:    st %r11, 56(%r2)
258; CHECK-NEXT:    st %r1, 64(%r2)
259; CHECK-NEXT:    st %r0, 72(%r2)
260; CHECK-NEXT:    brasl %r14, foo@PLT
261; CHECK-NEXT:    algfr %r2, %r12
262; CHECK-NEXT:    ipm %r0
263; CHECK-NEXT:    risbg %r0, %r0, 63, 191, 35
264; CHECK-NEXT:    algfr %r2, %r13
265; CHECK-NEXT:    ipm %r1
266; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
267; CHECK-NEXT:    algfr %r2, %r6
268; CHECK-NEXT:    ipm %r1
269; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
270; CHECK-NEXT:    algfr %r2, %r7
271; CHECK-NEXT:    ipm %r1
272; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
273; CHECK-NEXT:    algfr %r2, %r8
274; CHECK-NEXT:    ipm %r1
275; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
276; CHECK-NEXT:    algfr %r2, %r9
277; CHECK-NEXT:    ipm %r1
278; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
279; CHECK-NEXT:    algfr %r2, %r10
280; CHECK-NEXT:    ipm %r1
281; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
282; CHECK-NEXT:    algfr %r2, %r11
283; CHECK-NEXT:    ipm %r1
284; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
285; CHECK-NEXT:    algf %r2, 160(%r15) # 4-byte Folded Reload
286; CHECK-NEXT:    ipm %r1
287; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
288; CHECK-NEXT:    algf %r2, 164(%r15) # 4-byte Folded Reload
289; CHECK-NEXT:    ipm %r1
290; CHECK-NEXT:    rosbg %r0, %r1, 63, 63, 35
291; CHECK-NEXT:    risbg %r2, %r0, 63, 191, 0
292; CHECK-NEXT:    lmg %r6, %r15, 216(%r15)
293; CHECK-NEXT:    br %r14
294  %ptr1 = getelementptr i32, ptr %ptr0, i64 2
295  %ptr2 = getelementptr i32, ptr %ptr0, i64 4
296  %ptr3 = getelementptr i32, ptr %ptr0, i64 6
297  %ptr4 = getelementptr i32, ptr %ptr0, i64 8
298  %ptr5 = getelementptr i32, ptr %ptr0, i64 10
299  %ptr6 = getelementptr i32, ptr %ptr0, i64 12
300  %ptr7 = getelementptr i32, ptr %ptr0, i64 14
301  %ptr8 = getelementptr i32, ptr %ptr0, i64 16
302  %ptr9 = getelementptr i32, ptr %ptr0, i64 18
303
304  %val0 = load i32, ptr %ptr0
305  %val1 = load i32, ptr %ptr1
306  %val2 = load i32, ptr %ptr2
307  %val3 = load i32, ptr %ptr3
308  %val4 = load i32, ptr %ptr4
309  %val5 = load i32, ptr %ptr5
310  %val6 = load i32, ptr %ptr6
311  %val7 = load i32, ptr %ptr7
312  %val8 = load i32, ptr %ptr8
313  %val9 = load i32, ptr %ptr9
314
315  %frob0 = add i32 %val0, 100
316  %frob1 = add i32 %val1, 100
317  %frob2 = add i32 %val2, 100
318  %frob3 = add i32 %val3, 100
319  %frob4 = add i32 %val4, 100
320  %frob5 = add i32 %val5, 100
321  %frob6 = add i32 %val6, 100
322  %frob7 = add i32 %val7, 100
323  %frob8 = add i32 %val8, 100
324  %frob9 = add i32 %val9, 100
325
326  store i32 %frob0, ptr %ptr0
327  store i32 %frob1, ptr %ptr1
328  store i32 %frob2, ptr %ptr2
329  store i32 %frob3, ptr %ptr3
330  store i32 %frob4, ptr %ptr4
331  store i32 %frob5, ptr %ptr5
332  store i32 %frob6, ptr %ptr6
333  store i32 %frob7, ptr %ptr7
334  store i32 %frob8, ptr %ptr8
335  store i32 %frob9, ptr %ptr9
336
337  %ret = call i64 @foo()
338
339  %ext0 = zext i32 %frob0 to i64
340  %ext1 = zext i32 %frob1 to i64
341  %ext2 = zext i32 %frob2 to i64
342  %ext3 = zext i32 %frob3 to i64
343  %ext4 = zext i32 %frob4 to i64
344  %ext5 = zext i32 %frob5 to i64
345  %ext6 = zext i32 %frob6 to i64
346  %ext7 = zext i32 %frob7 to i64
347  %ext8 = zext i32 %frob8 to i64
348  %ext9 = zext i32 %frob9 to i64
349
350  %t0 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %ret, i64 %ext0)
351  %add0 = extractvalue {i64, i1} %t0, 0
352  %obit0 = extractvalue {i64, i1} %t0, 1
353  %t1 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add0, i64 %ext1)
354  %add1 = extractvalue {i64, i1} %t1, 0
355  %obit1 = extractvalue {i64, i1} %t1, 1
356  %res1 = or i1 %obit0, %obit1
357  %t2 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add1, i64 %ext2)
358  %add2 = extractvalue {i64, i1} %t2, 0
359  %obit2 = extractvalue {i64, i1} %t2, 1
360  %res2 = or i1 %res1, %obit2
361  %t3 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add2, i64 %ext3)
362  %add3 = extractvalue {i64, i1} %t3, 0
363  %obit3 = extractvalue {i64, i1} %t3, 1
364  %res3 = or i1 %res2, %obit3
365  %t4 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add3, i64 %ext4)
366  %add4 = extractvalue {i64, i1} %t4, 0
367  %obit4 = extractvalue {i64, i1} %t4, 1
368  %res4 = or i1 %res3, %obit4
369  %t5 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add4, i64 %ext5)
370  %add5 = extractvalue {i64, i1} %t5, 0
371  %obit5 = extractvalue {i64, i1} %t5, 1
372  %res5 = or i1 %res4, %obit5
373  %t6 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add5, i64 %ext6)
374  %add6 = extractvalue {i64, i1} %t6, 0
375  %obit6 = extractvalue {i64, i1} %t6, 1
376  %res6 = or i1 %res5, %obit6
377  %t7 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add6, i64 %ext7)
378  %add7 = extractvalue {i64, i1} %t7, 0
379  %obit7 = extractvalue {i64, i1} %t7, 1
380  %res7 = or i1 %res6, %obit7
381  %t8 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add7, i64 %ext8)
382  %add8 = extractvalue {i64, i1} %t8, 0
383  %obit8 = extractvalue {i64, i1} %t8, 1
384  %res8 = or i1 %res7, %obit8
385  %t9 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add8, i64 %ext9)
386  %add9 = extractvalue {i64, i1} %t9, 0
387  %obit9 = extractvalue {i64, i1} %t9, 1
388  %res9 = or i1 %res8, %obit9
389
390  ret i1 %res9
391}
392
393declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
394
395