xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-ldp.ll (revision 1ee315ae7964c8433b772e0b5d667834994ba753)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm64-eabi -verify-machineinstrs | FileCheck %s
3
4define i32 @ldp_int(ptr %p) nounwind {
5; CHECK-LABEL: ldp_int:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    ldp w8, w9, [x0]
8; CHECK-NEXT:    add w0, w9, w8
9; CHECK-NEXT:    ret
10  %tmp = load i32, ptr %p, align 4
11  %add.ptr = getelementptr inbounds i32, ptr %p, i64 1
12  %tmp1 = load i32, ptr %add.ptr, align 4
13  %add = add nsw i32 %tmp1, %tmp
14  ret i32 %add
15}
16
17define i64 @ldp_sext_int(ptr %p) nounwind {
18; CHECK-LABEL: ldp_sext_int:
19; CHECK:       // %bb.0:
20; CHECK-NEXT:    ldpsw x8, x9, [x0]
21; CHECK-NEXT:    add x0, x9, x8
22; CHECK-NEXT:    ret
23  %tmp = load i32, ptr %p, align 4
24  %add.ptr = getelementptr inbounds i32, ptr %p, i64 1
25  %tmp1 = load i32, ptr %add.ptr, align 4
26  %sexttmp = sext i32 %tmp to i64
27  %sexttmp1 = sext i32 %tmp1 to i64
28  %add = add nsw i64 %sexttmp1, %sexttmp
29  ret i64 %add
30}
31
32define i64 @ldp_half_sext_res0_int(ptr %p) nounwind {
33; CHECK-LABEL: ldp_half_sext_res0_int:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    ldp w8, w9, [x0]
36; CHECK-NEXT:    // kill: def $w8 killed $w8 def $x8
37; CHECK-NEXT:    sxtw x8, w8
38; CHECK-NEXT:    add x0, x9, x8
39; CHECK-NEXT:    ret
40  %tmp = load i32, ptr %p, align 4
41  %add.ptr = getelementptr inbounds i32, ptr %p, i64 1
42  %tmp1 = load i32, ptr %add.ptr, align 4
43  %sexttmp = sext i32 %tmp to i64
44  %sexttmp1 = zext i32 %tmp1 to i64
45  %add = add nsw i64 %sexttmp1, %sexttmp
46  ret i64 %add
47}
48
49define i64 @ldp_half_sext_res1_int(ptr %p) nounwind {
50; CHECK-LABEL: ldp_half_sext_res1_int:
51; CHECK:       // %bb.0:
52; CHECK-NEXT:    ldp w8, w9, [x0]
53; CHECK-NEXT:    // kill: def $w9 killed $w9 def $x9
54; CHECK-NEXT:    sxtw x9, w9
55; CHECK-NEXT:    add x0, x9, x8
56; CHECK-NEXT:    ret
57  %tmp = load i32, ptr %p, align 4
58  %add.ptr = getelementptr inbounds i32, ptr %p, i64 1
59  %tmp1 = load i32, ptr %add.ptr, align 4
60  %sexttmp = zext i32 %tmp to i64
61  %sexttmp1 = sext i32 %tmp1 to i64
62  %add = add nsw i64 %sexttmp1, %sexttmp
63  ret i64 %add
64}
65
66
67define i64 @ldp_long(ptr %p) nounwind {
68; CHECK-LABEL: ldp_long:
69; CHECK:       // %bb.0:
70; CHECK-NEXT:    ldp x8, x9, [x0]
71; CHECK-NEXT:    add x0, x9, x8
72; CHECK-NEXT:    ret
73  %tmp = load i64, ptr %p, align 8
74  %add.ptr = getelementptr inbounds i64, ptr %p, i64 1
75  %tmp1 = load i64, ptr %add.ptr, align 8
76  %add = add nsw i64 %tmp1, %tmp
77  ret i64 %add
78}
79
80define float @ldp_float(ptr %p) nounwind {
81; CHECK-LABEL: ldp_float:
82; CHECK:       // %bb.0:
83; CHECK-NEXT:    ldp s0, s1, [x0]
84; CHECK-NEXT:    fadd s0, s0, s1
85; CHECK-NEXT:    ret
86  %tmp = load float, ptr %p, align 4
87  %add.ptr = getelementptr inbounds float, ptr %p, i64 1
88  %tmp1 = load float, ptr %add.ptr, align 4
89  %add = fadd float %tmp, %tmp1
90  ret float %add
91}
92
93define double @ldp_double(ptr %p) nounwind {
94; CHECK-LABEL: ldp_double:
95; CHECK:       // %bb.0:
96; CHECK-NEXT:    ldp d0, d1, [x0]
97; CHECK-NEXT:    fadd d0, d0, d1
98; CHECK-NEXT:    ret
99  %tmp = load double, ptr %p, align 8
100  %add.ptr = getelementptr inbounds double, ptr %p, i64 1
101  %tmp1 = load double, ptr %add.ptr, align 8
102  %add = fadd double %tmp, %tmp1
103  ret double %add
104}
105
106define <2 x double> @ldp_doublex2(ptr %p) nounwind {
107; CHECK-LABEL: ldp_doublex2:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    ldp q0, q1, [x0]
110; CHECK-NEXT:    fadd v0.2d, v0.2d, v1.2d
111; CHECK-NEXT:    ret
112  %tmp = load <2 x double>, ptr %p, align 16
113  %add.ptr = getelementptr inbounds <2 x double>, ptr %p, i64 1
114  %tmp1 = load <2 x double>, ptr %add.ptr, align 16
115  %add = fadd <2 x double> %tmp, %tmp1
116  ret <2 x double> %add
117}
118
119; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
120define i32 @ldur_int(ptr %a) nounwind {
121; CHECK-LABEL: ldur_int:
122; CHECK:       // %bb.0:
123; CHECK-NEXT:    ldp w9, w8, [x0, #-8]
124; CHECK-NEXT:    add w0, w8, w9
125; CHECK-NEXT:    ret
126  %p1 = getelementptr inbounds i32, ptr %a, i32 -1
127  %tmp1 = load i32, ptr %p1, align 2
128  %p2 = getelementptr inbounds i32, ptr %a, i32 -2
129  %tmp2 = load i32, ptr %p2, align 2
130  %tmp3 = add i32 %tmp1, %tmp2
131  ret i32 %tmp3
132}
133
134define i64 @ldur_sext_int(ptr %a) nounwind {
135; CHECK-LABEL: ldur_sext_int:
136; CHECK:       // %bb.0:
137; CHECK-NEXT:    ldpsw x9, x8, [x0, #-8]
138; CHECK-NEXT:    add x0, x8, x9
139; CHECK-NEXT:    ret
140  %p1 = getelementptr inbounds i32, ptr %a, i32 -1
141  %tmp1 = load i32, ptr %p1, align 2
142  %p2 = getelementptr inbounds i32, ptr %a, i32 -2
143  %tmp2 = load i32, ptr %p2, align 2
144  %sexttmp1 = sext i32 %tmp1 to i64
145  %sexttmp2 = sext i32 %tmp2 to i64
146  %tmp3 = add i64 %sexttmp1, %sexttmp2
147  ret i64 %tmp3
148}
149
150define i64 @ldur_half_sext_int_res0(ptr %a) nounwind {
151; CHECK-LABEL: ldur_half_sext_int_res0:
152; CHECK:       // %bb.0:
153; CHECK-NEXT:    ldp w9, w8, [x0, #-8]
154; CHECK-NEXT:    // kill: def $w9 killed $w9 def $x9
155; CHECK-NEXT:    sxtw x9, w9
156; CHECK-NEXT:    add x0, x8, x9
157; CHECK-NEXT:    ret
158  %p1 = getelementptr inbounds i32, ptr %a, i32 -1
159  %tmp1 = load i32, ptr %p1, align 2
160  %p2 = getelementptr inbounds i32, ptr %a, i32 -2
161  %tmp2 = load i32, ptr %p2, align 2
162  %sexttmp1 = zext i32 %tmp1 to i64
163  %sexttmp2 = sext i32 %tmp2 to i64
164  %tmp3 = add i64 %sexttmp1, %sexttmp2
165  ret i64 %tmp3
166}
167
168define i64 @ldur_half_sext_int_res1(ptr %a) nounwind {
169; CHECK-LABEL: ldur_half_sext_int_res1:
170; CHECK:       // %bb.0:
171; CHECK-NEXT:    ldp w9, w8, [x0, #-8]
172; CHECK-NEXT:    // kill: def $w8 killed $w8 def $x8
173; CHECK-NEXT:    sxtw x8, w8
174; CHECK-NEXT:    add x0, x8, x9
175; CHECK-NEXT:    ret
176  %p1 = getelementptr inbounds i32, ptr %a, i32 -1
177  %tmp1 = load i32, ptr %p1, align 2
178  %p2 = getelementptr inbounds i32, ptr %a, i32 -2
179  %tmp2 = load i32, ptr %p2, align 2
180  %sexttmp1 = sext i32 %tmp1 to i64
181  %sexttmp2 = zext i32 %tmp2 to i64
182  %tmp3 = add i64 %sexttmp1, %sexttmp2
183  ret i64 %tmp3
184}
185
186
187define i64 @ldur_long(ptr %a) nounwind ssp {
188; CHECK-LABEL: ldur_long:
189; CHECK:       // %bb.0:
190; CHECK-NEXT:    ldp x9, x8, [x0, #-16]
191; CHECK-NEXT:    add x0, x8, x9
192; CHECK-NEXT:    ret
193  %p1 = getelementptr inbounds i64, ptr %a, i64 -1
194  %tmp1 = load i64, ptr %p1, align 2
195  %p2 = getelementptr inbounds i64, ptr %a, i64 -2
196  %tmp2 = load i64, ptr %p2, align 2
197  %tmp3 = add i64 %tmp1, %tmp2
198  ret i64 %tmp3
199}
200
201define float @ldur_float(ptr %a) {
202; CHECK-LABEL: ldur_float:
203; CHECK:       // %bb.0:
204; CHECK-NEXT:    ldp s1, s0, [x0, #-8]
205; CHECK-NEXT:    fadd s0, s0, s1
206; CHECK-NEXT:    ret
207  %p1 = getelementptr inbounds float, ptr %a, i64 -1
208  %tmp1 = load float, ptr %p1, align 2
209  %p2 = getelementptr inbounds float, ptr %a, i64 -2
210  %tmp2 = load float, ptr %p2, align 2
211  %tmp3 = fadd float %tmp1, %tmp2
212  ret float %tmp3
213}
214
215define double @ldur_double(ptr %a) {
216; CHECK-LABEL: ldur_double:
217; CHECK:       // %bb.0:
218; CHECK-NEXT:    ldp d1, d0, [x0, #-16]
219; CHECK-NEXT:    fadd d0, d0, d1
220; CHECK-NEXT:    ret
221  %p1 = getelementptr inbounds double, ptr %a, i64 -1
222  %tmp1 = load double, ptr %p1, align 2
223  %p2 = getelementptr inbounds double, ptr %a, i64 -2
224  %tmp2 = load double, ptr %p2, align 2
225  %tmp3 = fadd double %tmp1, %tmp2
226  ret double %tmp3
227}
228
229define <2 x double> @ldur_doublex2(ptr %a) {
230; CHECK-LABEL: ldur_doublex2:
231; CHECK:       // %bb.0:
232; CHECK-NEXT:    ldp q1, q0, [x0, #-32]
233; CHECK-NEXT:    fadd v0.2d, v0.2d, v1.2d
234; CHECK-NEXT:    ret
235  %p1 = getelementptr inbounds <2 x double>, ptr %a, i64 -1
236  %tmp1 = load <2 x double>, ptr %p1, align 2
237  %p2 = getelementptr inbounds <2 x double>, ptr %a, i64 -2
238  %tmp2 = load <2 x double>, ptr %p2, align 2
239  %tmp3 = fadd <2 x double> %tmp1, %tmp2
240  ret <2 x double> %tmp3
241}
242
243; Now check some boundary conditions
244define i64 @pairUpBarelyIn(ptr %a) nounwind ssp {
245; CHECK-LABEL: pairUpBarelyIn:
246; CHECK:       // %bb.0:
247; CHECK-NEXT:    ldp x9, x8, [x0, #-256]
248; CHECK-NEXT:    add x0, x8, x9
249; CHECK-NEXT:    ret
250  %p1 = getelementptr inbounds i64, ptr %a, i64 -31
251  %tmp1 = load i64, ptr %p1, align 2
252  %p2 = getelementptr inbounds i64, ptr %a, i64 -32
253  %tmp2 = load i64, ptr %p2, align 2
254  %tmp3 = add i64 %tmp1, %tmp2
255  ret i64 %tmp3
256}
257
258define i64 @pairUpBarelyInSext(ptr %a) nounwind ssp {
259; CHECK-LABEL: pairUpBarelyInSext:
260; CHECK:       // %bb.0:
261; CHECK-NEXT:    ldpsw x9, x8, [x0, #-256]
262; CHECK-NEXT:    add x0, x8, x9
263; CHECK-NEXT:    ret
264  %p1 = getelementptr inbounds i32, ptr %a, i64 -63
265  %tmp1 = load i32, ptr %p1, align 2
266  %p2 = getelementptr inbounds i32, ptr %a, i64 -64
267  %tmp2 = load i32, ptr %p2, align 2
268  %sexttmp1 = sext i32 %tmp1 to i64
269  %sexttmp2 = sext i32 %tmp2 to i64
270  %tmp3 = add i64 %sexttmp1, %sexttmp2
271  ret i64 %tmp3
272}
273
274define i64 @pairUpBarelyInHalfSextRes0(ptr %a) nounwind ssp {
275; CHECK-LABEL: pairUpBarelyInHalfSextRes0:
276; CHECK:       // %bb.0:
277; CHECK-NEXT:    ldp w9, w8, [x0, #-256]
278; CHECK-NEXT:    // kill: def $w9 killed $w9 def $x9
279; CHECK-NEXT:    sxtw x9, w9
280; CHECK-NEXT:    add x0, x8, x9
281; CHECK-NEXT:    ret
282  %p1 = getelementptr inbounds i32, ptr %a, i64 -63
283  %tmp1 = load i32, ptr %p1, align 2
284  %p2 = getelementptr inbounds i32, ptr %a, i64 -64
285  %tmp2 = load i32, ptr %p2, align 2
286  %sexttmp1 = zext i32 %tmp1 to i64
287  %sexttmp2 = sext i32 %tmp2 to i64
288  %tmp3 = add i64 %sexttmp1, %sexttmp2
289  ret i64 %tmp3
290}
291
292define i64 @pairUpBarelyInHalfSextRes1(ptr %a) nounwind ssp {
293; CHECK-LABEL: pairUpBarelyInHalfSextRes1:
294; CHECK:       // %bb.0:
295; CHECK-NEXT:    ldp w9, w8, [x0, #-256]
296; CHECK-NEXT:    // kill: def $w8 killed $w8 def $x8
297; CHECK-NEXT:    sxtw x8, w8
298; CHECK-NEXT:    add x0, x8, x9
299; CHECK-NEXT:    ret
300  %p1 = getelementptr inbounds i32, ptr %a, i64 -63
301  %tmp1 = load i32, ptr %p1, align 2
302  %p2 = getelementptr inbounds i32, ptr %a, i64 -64
303  %tmp2 = load i32, ptr %p2, align 2
304  %sexttmp1 = sext i32 %tmp1 to i64
305  %sexttmp2 = zext i32 %tmp2 to i64
306  %tmp3 = add i64 %sexttmp1, %sexttmp2
307  ret i64 %tmp3
308}
309
310define i64 @pairUpBarelyOut(ptr %a) nounwind ssp {
311; Don't be fragile about which loads or manipulations of the base register
312; are used---just check that there isn't an ldp before the add
313; CHECK-LABEL: pairUpBarelyOut:
314; CHECK:       // %bb.0:
315; CHECK-NEXT:    sub x8, x0, #264
316; CHECK-NEXT:    ldur x9, [x0, #-256]
317; CHECK-NEXT:    ldr x8, [x8]
318; CHECK-NEXT:    add x0, x9, x8
319; CHECK-NEXT:    ret
320  %p1 = getelementptr inbounds i64, ptr %a, i64 -32
321  %tmp1 = load i64, ptr %p1, align 2
322  %p2 = getelementptr inbounds i64, ptr %a, i64 -33
323  %tmp2 = load i64, ptr %p2, align 2
324  %tmp3 = add i64 %tmp1, %tmp2
325  ret i64 %tmp3
326}
327
328define i64 @pairUpBarelyOutSext(ptr %a) nounwind ssp {
329; Don't be fragile about which loads or manipulations of the base register
330; are used---just check that there isn't an ldp before the add
331; CHECK-LABEL: pairUpBarelyOutSext:
332; CHECK:       // %bb.0:
333; CHECK-NEXT:    sub x8, x0, #260
334; CHECK-NEXT:    ldursw x9, [x0, #-256]
335; CHECK-NEXT:    ldrsw x8, [x8]
336; CHECK-NEXT:    add x0, x9, x8
337; CHECK-NEXT:    ret
338  %p1 = getelementptr inbounds i32, ptr %a, i64 -64
339  %tmp1 = load i32, ptr %p1, align 2
340  %p2 = getelementptr inbounds i32, ptr %a, i64 -65
341  %tmp2 = load i32, ptr %p2, align 2
342  %sexttmp1 = sext i32 %tmp1 to i64
343  %sexttmp2 = sext i32 %tmp2 to i64
344  %tmp3 = add i64 %sexttmp1, %sexttmp2
345  ret i64 %tmp3
346}
347
348define i64 @pairUpNotAligned(ptr %a) nounwind ssp {
349; CHECK-LABEL: pairUpNotAligned:
350; CHECK:       // %bb.0:
351; CHECK-NEXT:    ldur x8, [x0, #-143]
352; CHECK-NEXT:    ldur x9, [x0, #-135]
353; CHECK-NEXT:    add x0, x8, x9
354; CHECK-NEXT:    ret
355  %p1 = getelementptr inbounds i64, ptr %a, i64 -18
356  %bp1 = bitcast ptr %p1 to ptr
357  %bp1p1 = getelementptr inbounds i8, ptr %bp1, i64 1
358  %dp1 = bitcast ptr %bp1p1 to ptr
359  %tmp1 = load i64, ptr %dp1, align 1
360
361  %p2 = getelementptr inbounds i64, ptr %a, i64 -17
362  %bp2 = bitcast ptr %p2 to ptr
363  %bp2p1 = getelementptr inbounds i8, ptr %bp2, i64 1
364  %dp2 = bitcast ptr %bp2p1 to ptr
365  %tmp2 = load i64, ptr %dp2, align 1
366
367  %tmp3 = add i64 %tmp1, %tmp2
368  ret i64 %tmp3
369}
370
371define i64 @pairUpNotAlignedSext(ptr %a) nounwind ssp {
372; CHECK-LABEL: pairUpNotAlignedSext:
373; CHECK:       // %bb.0:
374; CHECK-NEXT:    ldursw x8, [x0, #-71]
375; CHECK-NEXT:    ldursw x9, [x0, #-67]
376; CHECK-NEXT:    add x0, x8, x9
377; CHECK-NEXT:    ret
378  %p1 = getelementptr inbounds i32, ptr %a, i64 -18
379  %bp1 = bitcast ptr %p1 to ptr
380  %bp1p1 = getelementptr inbounds i8, ptr %bp1, i64 1
381  %dp1 = bitcast ptr %bp1p1 to ptr
382  %tmp1 = load i32, ptr %dp1, align 1
383
384  %p2 = getelementptr inbounds i32, ptr %a, i64 -17
385  %bp2 = bitcast ptr %p2 to ptr
386  %bp2p1 = getelementptr inbounds i8, ptr %bp2, i64 1
387  %dp2 = bitcast ptr %bp2p1 to ptr
388  %tmp2 = load i32, ptr %dp2, align 1
389
390  %sexttmp1 = sext i32 %tmp1 to i64
391  %sexttmp2 = sext i32 %tmp2 to i64
392  %tmp3 = add i64 %sexttmp1, %sexttmp2
393 ret i64 %tmp3
394}
395
396declare void @use-ptr(ptr)
397
398define i64 @ldp_sext_int_pre(ptr %p) nounwind {
399; CHECK-LABEL: ldp_sext_int_pre:
400; CHECK:       // %bb.0:
401; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
402; CHECK-NEXT:    mov x19, x0
403; CHECK-NEXT:    add x0, x0, #8
404; CHECK-NEXT:    bl "use-ptr"
405; CHECK-NEXT:    ldpsw x8, x9, [x19, #8]
406; CHECK-NEXT:    add x0, x9, x8
407; CHECK-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
408; CHECK-NEXT:    ret
409  %ptr = getelementptr inbounds i32, ptr %p, i64 2
410  call void @use-ptr(ptr %ptr)
411  %add.ptr = getelementptr inbounds i32, ptr %ptr, i64 0
412  %tmp = load i32, ptr %add.ptr, align 4
413  %add.ptr1 = getelementptr inbounds i32, ptr %ptr, i64 1
414  %tmp1 = load i32, ptr %add.ptr1, align 4
415  %sexttmp = sext i32 %tmp to i64
416  %sexttmp1 = sext i32 %tmp1 to i64
417  %add = add nsw i64 %sexttmp1, %sexttmp
418  ret i64 %add
419}
420
421define i64 @ldp_sext_int_post(ptr %p) nounwind {
422; CHECK-LABEL: ldp_sext_int_post:
423; CHECK:       // %bb.0:
424; CHECK-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
425; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
426; CHECK-NEXT:    ldpsw x19, x20, [x0], #8
427; CHECK-NEXT:    bl "use-ptr"
428; CHECK-NEXT:    add x0, x20, x19
429; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
430; CHECK-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
431; CHECK-NEXT:    ret
432  %tmp = load i32, ptr %p, align 4
433  %add.ptr = getelementptr inbounds i32, ptr %p, i64 1
434  %tmp1 = load i32, ptr %add.ptr, align 4
435  %sexttmp = sext i32 %tmp to i64
436  %sexttmp1 = sext i32 %tmp1 to i64
437  %ptr = getelementptr inbounds i32, ptr %add.ptr, i64 1
438  call void @use-ptr(ptr %ptr)
439  %add = add nsw i64 %sexttmp1, %sexttmp
440  ret i64 %add
441}
442
443