xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-stp.ll (revision c649fd34e928ad01951cbff298c5c44853dd41dd)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-enable-stp-suppress=false -verify-machineinstrs -mcpu=cyclone -aarch64-enable-sink-fold=true | FileCheck %s
3
4define void @stp_int(i32 %a, i32 %b, ptr nocapture %p) nounwind {
5; CHECK-LABEL: stp_int:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    stp w0, w1, [x2]
8; CHECK-NEXT:    ret
9  store i32 %a, ptr %p, align 4
10  %add.ptr = getelementptr inbounds i32, ptr %p, i64 1
11  store i32 %b, ptr %add.ptr, align 4
12  ret void
13}
14
15define void @stp_long(i64 %a, i64 %b, ptr nocapture %p) nounwind {
16; CHECK-LABEL: stp_long:
17; CHECK:       // %bb.0:
18; CHECK-NEXT:    stp x0, x1, [x2]
19; CHECK-NEXT:    ret
20  store i64 %a, ptr %p, align 8
21  %add.ptr = getelementptr inbounds i64, ptr %p, i64 1
22  store i64 %b, ptr %add.ptr, align 8
23  ret void
24}
25
26define void @stp_float(float %a, float %b, ptr nocapture %p) nounwind {
27; CHECK-LABEL: stp_float:
28; CHECK:       // %bb.0:
29; CHECK-NEXT:    stp s0, s1, [x0]
30; CHECK-NEXT:    ret
31  store float %a, ptr %p, align 4
32  %add.ptr = getelementptr inbounds float, ptr %p, i64 1
33  store float %b, ptr %add.ptr, align 4
34  ret void
35}
36
37define void @stp_double(double %a, double %b, ptr nocapture %p) nounwind {
38; CHECK-LABEL: stp_double:
39; CHECK:       // %bb.0:
40; CHECK-NEXT:    stp d0, d1, [x0]
41; CHECK-NEXT:    ret
42  store double %a, ptr %p, align 8
43  %add.ptr = getelementptr inbounds double, ptr %p, i64 1
44  store double %b, ptr %add.ptr, align 8
45  ret void
46}
47
48define void @stp_doublex2(<2 x double> %a, <2 x double> %b, ptr nocapture %p) nounwind {
49; CHECK-LABEL: stp_doublex2:
50; CHECK:       // %bb.0:
51; CHECK-NEXT:    stp q0, q1, [x0]
52; CHECK-NEXT:    ret
53  store <2 x double> %a, ptr %p, align 16
54  %add.ptr = getelementptr inbounds <2 x double>, ptr %p, i64 1
55  store <2 x double> %b, ptr %add.ptr, align 16
56  ret void
57}
58
59; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
60define void @stur_int(i32 %a, i32 %b, ptr nocapture %p) nounwind {
61; CHECK-LABEL: stur_int:
62; CHECK:       // %bb.0:
63; CHECK-NEXT:    stp w1, w0, [x2, #-8]
64; CHECK-NEXT:    ret
65  %p1 = getelementptr inbounds i32, ptr %p, i32 -1
66  store i32 %a, ptr %p1, align 2
67  %p2 = getelementptr inbounds i32, ptr %p, i32 -2
68  store i32 %b, ptr %p2, align 2
69  ret void
70}
71
72define void @stur_long(i64 %a, i64 %b, ptr nocapture %p) nounwind {
73; CHECK-LABEL: stur_long:
74; CHECK:       // %bb.0:
75; CHECK-NEXT:    stp x1, x0, [x2, #-16]
76; CHECK-NEXT:    ret
77  %p1 = getelementptr inbounds i64, ptr %p, i32 -1
78  store i64 %a, ptr %p1, align 2
79  %p2 = getelementptr inbounds i64, ptr %p, i32 -2
80  store i64 %b, ptr %p2, align 2
81  ret void
82}
83
84define void @stur_float(float %a, float %b, ptr nocapture %p) nounwind {
85; CHECK-LABEL: stur_float:
86; CHECK:       // %bb.0:
87; CHECK-NEXT:    stp s1, s0, [x0, #-8]
88; CHECK-NEXT:    ret
89  %p1 = getelementptr inbounds float, ptr %p, i32 -1
90  store float %a, ptr %p1, align 2
91  %p2 = getelementptr inbounds float, ptr %p, i32 -2
92  store float %b, ptr %p2, align 2
93  ret void
94}
95
96define void @stur_double(double %a, double %b, ptr nocapture %p) nounwind {
97; CHECK-LABEL: stur_double:
98; CHECK:       // %bb.0:
99; CHECK-NEXT:    stp d1, d0, [x0, #-16]
100; CHECK-NEXT:    ret
101  %p1 = getelementptr inbounds double, ptr %p, i32 -1
102  store double %a, ptr %p1, align 2
103  %p2 = getelementptr inbounds double, ptr %p, i32 -2
104  store double %b, ptr %p2, align 2
105  ret void
106}
107
108define void @stur_doublex2(<2 x double> %a, <2 x double> %b, ptr nocapture %p) nounwind {
109; CHECK-LABEL: stur_doublex2:
110; CHECK:       // %bb.0:
111; CHECK-NEXT:    stp q1, q0, [x0, #-32]
112; CHECK-NEXT:    ret
113  %p1 = getelementptr inbounds <2 x double>, ptr %p, i32 -1
114  store <2 x double> %a, ptr %p1, align 2
115  %p2 = getelementptr inbounds <2 x double>, ptr %p, i32 -2
116  store <2 x double> %b, ptr %p2, align 2
117  ret void
118}
119
120define void @splat_v4i32(i32 %v, ptr %p) {
121; CHECK-LABEL: splat_v4i32:
122; CHECK:       // %bb.0: // %entry
123; CHECK-NEXT:    dup v0.4s, w0
124; CHECK-NEXT:    str q0, [x1]
125; CHECK-NEXT:    ret
126entry:
127  %p17 = insertelement <4 x i32> undef, i32 %v, i32 0
128  %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1
129  %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2
130  %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3
131  store <4 x i32> %p20, ptr %p, align 4
132  ret void
133}
134
135; Check that a non-splat store that is storing a vector created by 4
136; insertelements that is not a splat vector does not get split.
137define void @nosplat_v4i32(i32 %v, ptr %p) {
138; CHECK-LABEL: nosplat_v4i32:
139; CHECK:       // %bb.0: // %entry
140; CHECK-NEXT:    sub sp, sp, #16
141; CHECK-NEXT:    .cfi_def_cfa_offset 16
142; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
143; CHECK-NEXT:    mov x8, sp
144; CHECK-NEXT:    bfi x8, x0, #2, #2
145; CHECK-NEXT:    str w0, [x8]
146; CHECK-NEXT:    ldr q0, [sp]
147; CHECK-NEXT:    mov v0.s[1], w0
148; CHECK-NEXT:    mov v0.s[2], w0
149; CHECK-NEXT:    mov v0.s[3], w0
150; CHECK-NEXT:    str q0, [x1]
151; CHECK-NEXT:    add sp, sp, #16
152; CHECK-NEXT:    ret
153entry:
154  %p17 = insertelement <4 x i32> undef, i32 %v, i32 %v
155  %p18 = insertelement <4 x i32> %p17, i32 %v, i32 1
156  %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2
157  %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3
158  store <4 x i32> %p20, ptr %p, align 4
159  ret void
160}
161
162; Check that a non-splat store that is storing a vector created by 4
163; insertelements that is not a splat vector does not get split.
164define void @nosplat2_v4i32(i32 %v, ptr %p, <4 x i32> %vin) {
165; CHECK-LABEL: nosplat2_v4i32:
166; CHECK:       // %bb.0: // %entry
167; CHECK-NEXT:    mov v0.s[1], w0
168; CHECK-NEXT:    mov v0.s[2], w0
169; CHECK-NEXT:    mov v0.s[3], w0
170; CHECK-NEXT:    str q0, [x1]
171; CHECK-NEXT:    ret
172entry:
173  %p18 = insertelement <4 x i32> %vin, i32 %v, i32 1
174  %p19 = insertelement <4 x i32> %p18, i32 %v, i32 2
175  %p20 = insertelement <4 x i32> %p19, i32 %v, i32 3
176  store <4 x i32> %p20, ptr %p, align 4
177  ret void
178}
179
180; Read of %b to compute %tmp2 shouldn't prevent formation of stp
181define i32 @stp_int_rar_hazard(i32 %a, i32 %b, ptr nocapture %p) nounwind {
182; CHECK-LABEL: stp_int_rar_hazard:
183; CHECK:       // %bb.0:
184; CHECK-NEXT:    ldr w8, [x2, #8]
185; CHECK-NEXT:    stp w0, w1, [x2]
186; CHECK-NEXT:    add w0, w8, w1
187; CHECK-NEXT:    ret
188  store i32 %a, ptr %p, align 4
189  %ld.ptr = getelementptr inbounds i32, ptr %p, i64 2
190  %tmp = load i32, ptr %ld.ptr, align 4
191  %tmp2 = add i32 %tmp, %b
192  %add.ptr = getelementptr inbounds i32, ptr %p, i64 1
193  store i32 %b, ptr %add.ptr, align 4
194  ret i32 %tmp2
195}
196
197; Read of %b to compute %tmp2 shouldn't prevent formation of stp
198define i32 @stp_int_rar_hazard_after(i32 %w0, i32 %a, i32 %b, ptr nocapture %p) nounwind {
199; CHECK-LABEL: stp_int_rar_hazard_after:
200; CHECK:       // %bb.0:
201; CHECK-NEXT:    ldr w8, [x3, #4]
202; CHECK-NEXT:    stp w1, w2, [x3]
203; CHECK-NEXT:    add w0, w8, w2
204; CHECK-NEXT:    ret
205  store i32 %a, ptr %p, align 4
206  %ld.ptr = getelementptr inbounds i32, ptr %p, i64 1
207  %tmp = load i32, ptr %ld.ptr, align 4
208  %tmp2 = add i32 %tmp, %b
209  %add.ptr = getelementptr inbounds i32, ptr %p, i64 1
210  store i32 %b, ptr %add.ptr, align 4
211  ret i32 %tmp2
212}
213