xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-reassocadd.ll (revision 23b673e5b4b73b42864fcd7d63c1e974317ed4d6)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=aarch64 -mattr=+sve %s -o - | FileCheck %s
3
4define <vscale x 16 x i8> @i8_1v_4s(ptr %b) {
5; CHECK-LABEL: i8_1v_4s:
6; CHECK:       // %bb.0: // %entry
7; CHECK-NEXT:    rdvl x8, #1
8; CHECK-NEXT:    ptrue p0.b
9; CHECK-NEXT:    mov w9, #4 // =0x4
10; CHECK-NEXT:    add x8, x0, x8
11; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x8, x9]
12; CHECK-NEXT:    ret
13entry:
14  %0 = tail call i64 @llvm.vscale.i64()
15  %1 = shl nuw nsw i64 %0, 4
16  %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
17  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 4
18  %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
19  ret <vscale x 16 x i8> %2
20}
21
22define <vscale x 16 x i8> @i8_4s_1v(ptr %b) {
23; CHECK-LABEL: i8_4s_1v:
24; CHECK:       // %bb.0: // %entry
25; CHECK-NEXT:    ptrue p0.b
26; CHECK-NEXT:    add x8, x0, #4
27; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x8, #1, mul vl]
28; CHECK-NEXT:    ret
29entry:
30  %add.ptr = getelementptr inbounds i8, ptr %b, i64 4
31  %0 = tail call i64 @llvm.vscale.i64()
32  %1 = shl nuw nsw i64 %0, 4
33  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
34  %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
35  ret <vscale x 16 x i8> %2
36}
37
38define <vscale x 8 x i16> @i16_1v_8s(ptr %b) {
39; CHECK-LABEL: i16_1v_8s:
40; CHECK:       // %bb.0: // %entry
41; CHECK-NEXT:    rdvl x8, #1
42; CHECK-NEXT:    ptrue p0.h
43; CHECK-NEXT:    mov x9, #4 // =0x4
44; CHECK-NEXT:    add x8, x0, x8
45; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
46; CHECK-NEXT:    ret
47entry:
48  %0 = tail call i64 @llvm.vscale.i64()
49  %1 = shl nuw nsw i64 %0, 3
50  %add.ptr = getelementptr inbounds i16, ptr %b, i64 %1
51  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8
52  %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
53  ret <vscale x 8 x i16> %2
54}
55
56define <vscale x 8 x i16> @i16_8s_1v(ptr %b) {
57; CHECK-LABEL: i16_8s_1v:
58; CHECK:       // %bb.0: // %entry
59; CHECK-NEXT:    ptrue p0.h
60; CHECK-NEXT:    add x8, x0, #8
61; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, #1, mul vl]
62; CHECK-NEXT:    ret
63entry:
64  %add.ptr = getelementptr inbounds i8, ptr %b, i64 8
65  %0 = tail call i64 @llvm.vscale.i64()
66  %1 = shl nuw nsw i64 %0, 3
67  %add.ptr1 = getelementptr inbounds i16, ptr %add.ptr, i64 %1
68  %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
69  ret <vscale x 8 x i16> %2
70}
71
72define <vscale x 8 x i16> @i16_2v_8s(ptr %b) {
73; CHECK-LABEL: i16_2v_8s:
74; CHECK:       // %bb.0: // %entry
75; CHECK-NEXT:    rdvl x8, #2
76; CHECK-NEXT:    ptrue p0.h
77; CHECK-NEXT:    mov x9, #4 // =0x4
78; CHECK-NEXT:    add x8, x0, x8
79; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
80; CHECK-NEXT:    ret
81entry:
82  %0 = tail call i64 @llvm.vscale.i64()
83  %1 = shl nuw nsw i64 %0, 4
84  %add.ptr = getelementptr inbounds i16, ptr %b, i64 %1
85  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8
86  %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
87  ret <vscale x 8 x i16> %2
88}
89
90define <vscale x 8 x i16> @i16_8s_2v(ptr %b) {
91; CHECK-LABEL: i16_8s_2v:
92; CHECK:       // %bb.0: // %entry
93; CHECK-NEXT:    ptrue p0.h
94; CHECK-NEXT:    add x8, x0, #8
95; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, #2, mul vl]
96; CHECK-NEXT:    ret
97entry:
98  %add.ptr = getelementptr inbounds i8, ptr %b, i64 8
99  %0 = tail call i64 @llvm.vscale.i64()
100  %1 = shl nuw nsw i64 %0, 4
101  %add.ptr1 = getelementptr inbounds i16, ptr %add.ptr, i64 %1
102  %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
103  ret <vscale x 8 x i16> %2
104}
105
106define <vscale x 4 x i32> @i32_1v_16s(ptr %b) {
107; CHECK-LABEL: i32_1v_16s:
108; CHECK:       // %bb.0: // %entry
109; CHECK-NEXT:    rdvl x8, #1
110; CHECK-NEXT:    ptrue p0.s
111; CHECK-NEXT:    mov x9, #4 // =0x4
112; CHECK-NEXT:    add x8, x0, x8
113; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
114; CHECK-NEXT:    ret
115entry:
116  %0 = tail call i64 @llvm.vscale.i64()
117  %1 = shl nuw nsw i64 %0, 2
118  %add.ptr = getelementptr inbounds i32, ptr %b, i64 %1
119  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 16
120  %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
121  ret <vscale x 4 x i32> %2
122}
123
124define <vscale x 4 x i32> @i32_16s_2v(ptr %b) {
125; CHECK-LABEL: i32_16s_2v:
126; CHECK:       // %bb.0: // %entry
127; CHECK-NEXT:    ptrue p0.s
128; CHECK-NEXT:    add x8, x0, #16
129; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, #1, mul vl]
130; CHECK-NEXT:    ret
131entry:
132  %add.ptr = getelementptr inbounds i8, ptr %b, i64 16
133  %0 = tail call i64 @llvm.vscale.i64()
134  %1 = shl nuw nsw i64 %0, 2
135  %add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i64 %1
136  %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
137  ret <vscale x 4 x i32> %2
138}
139
140define <vscale x 2 x i64> @i64_1v_32s(ptr %b) {
141; CHECK-LABEL: i64_1v_32s:
142; CHECK:       // %bb.0: // %entry
143; CHECK-NEXT:    rdvl x8, #1
144; CHECK-NEXT:    ptrue p0.d
145; CHECK-NEXT:    mov x9, #4 // =0x4
146; CHECK-NEXT:    add x8, x0, x8
147; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
148; CHECK-NEXT:    ret
149entry:
150  %0 = tail call i64 @llvm.vscale.i64()
151  %1 = shl nuw nsw i64 %0, 1
152  %add.ptr = getelementptr inbounds i64, ptr %b, i64 %1
153  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 32
154  %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
155  ret <vscale x 2 x i64> %2
156}
157
158define <vscale x 2 x i64> @i64_32s_2v(ptr %b) {
159; CHECK-LABEL: i64_32s_2v:
160; CHECK:       // %bb.0: // %entry
161; CHECK-NEXT:    ptrue p0.d
162; CHECK-NEXT:    add x8, x0, #32
163; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x8, #1, mul vl]
164; CHECK-NEXT:    ret
165entry:
166  %add.ptr = getelementptr inbounds i8, ptr %b, i64 32
167  %0 = tail call i64 @llvm.vscale.i64()
168  %1 = shl nuw nsw i64 %0, 1
169  %add.ptr1 = getelementptr inbounds i64, ptr %add.ptr, i64 %1
170  %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
171  ret <vscale x 2 x i64> %2
172}
173
174
175define <vscale x 16 x i8> @i8_m2v_4s(ptr %b) {
176; CHECK-LABEL: i8_m2v_4s:
177; CHECK:       // %bb.0: // %entry
178; CHECK-NEXT:    cnth x8, all, mul #4
179; CHECK-NEXT:    ptrue p0.b
180; CHECK-NEXT:    mov w9, #4 // =0x4
181; CHECK-NEXT:    sub x8, x0, x8
182; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x8, x9]
183; CHECK-NEXT:    ret
184entry:
185  %0 = tail call i64 @llvm.vscale.i64()
186  %1 = mul i64 %0, -32
187  %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
188  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 4
189  %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
190  ret <vscale x 16 x i8> %2
191}
192
193define <vscale x 16 x i8> @i8_4s_m2v(ptr %b) {
194; CHECK-LABEL: i8_4s_m2v:
195; CHECK:       // %bb.0: // %entry
196; CHECK-NEXT:    ptrue p0.b
197; CHECK-NEXT:    add x8, x0, #4
198; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x8, #-2, mul vl]
199; CHECK-NEXT:    ret
200entry:
201  %add.ptr = getelementptr inbounds i8, ptr %b, i64 4
202  %0 = tail call i64 @llvm.vscale.i64()
203  %1 = mul i64 %0, -32
204  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
205  %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
206  ret <vscale x 16 x i8> %2
207}
208
209define <vscale x 8 x i16> @i16_m2v_8s(ptr %b) {
210; CHECK-LABEL: i16_m2v_8s:
211; CHECK:       // %bb.0: // %entry
212; CHECK-NEXT:    cnth x8, all, mul #4
213; CHECK-NEXT:    ptrue p0.h
214; CHECK-NEXT:    mov x9, #4 // =0x4
215; CHECK-NEXT:    sub x8, x0, x8
216; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
217; CHECK-NEXT:    ret
218entry:
219  %0 = tail call i64 @llvm.vscale.i64()
220  %1 = mul i64 %0, -32
221  %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
222  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8
223  %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
224  ret <vscale x 8 x i16> %2
225}
226
227define <vscale x 8 x i16> @i16_8s_m2v(ptr %b) {
228; CHECK-LABEL: i16_8s_m2v:
229; CHECK:       // %bb.0: // %entry
230; CHECK-NEXT:    ptrue p0.h
231; CHECK-NEXT:    add x8, x0, #8
232; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x8, #-2, mul vl]
233; CHECK-NEXT:    ret
234entry:
235  %add.ptr = getelementptr inbounds i8, ptr %b, i64 8
236  %0 = tail call i64 @llvm.vscale.i64()
237  %1 = mul i64 %0, -32
238  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
239  %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
240  ret <vscale x 8 x i16> %2
241}
242
243define <vscale x 4 x i32> @i32_m2v_16s(ptr %b) {
244; CHECK-LABEL: i32_m2v_16s:
245; CHECK:       // %bb.0: // %entry
246; CHECK-NEXT:    cnth x8, all, mul #4
247; CHECK-NEXT:    ptrue p0.s
248; CHECK-NEXT:    mov x9, #4 // =0x4
249; CHECK-NEXT:    sub x8, x0, x8
250; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
251; CHECK-NEXT:    ret
252entry:
253  %0 = tail call i64 @llvm.vscale.i64()
254  %1 = mul i64 %0, -32
255  %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
256  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 16
257  %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
258  ret <vscale x 4 x i32> %2
259}
260
261define <vscale x 4 x i32> @i32_16s_m2v(ptr %b) {
262; CHECK-LABEL: i32_16s_m2v:
263; CHECK:       // %bb.0: // %entry
264; CHECK-NEXT:    ptrue p0.s
265; CHECK-NEXT:    add x8, x0, #16
266; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, #-2, mul vl]
267; CHECK-NEXT:    ret
268entry:
269  %add.ptr = getelementptr inbounds i8, ptr %b, i64 16
270  %0 = tail call i64 @llvm.vscale.i64()
271  %1 = mul i64 %0, -32
272  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
273  %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
274  ret <vscale x 4 x i32> %2
275}
276
277define <vscale x 2 x i64> @i64_m2v_32s(ptr %b) {
278; CHECK-LABEL: i64_m2v_32s:
279; CHECK:       // %bb.0: // %entry
280; CHECK-NEXT:    cnth x8, all, mul #4
281; CHECK-NEXT:    ptrue p0.d
282; CHECK-NEXT:    mov x9, #4 // =0x4
283; CHECK-NEXT:    sub x8, x0, x8
284; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
285; CHECK-NEXT:    ret
286entry:
287  %0 = tail call i64 @llvm.vscale.i64()
288  %1 = mul i64 %0, -32
289  %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
290  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 32
291  %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
292  ret <vscale x 2 x i64> %2
293}
294
295define <vscale x 2 x i64> @i64_32s_m2v(ptr %b) {
296; CHECK-LABEL: i64_32s_m2v:
297; CHECK:       // %bb.0: // %entry
298; CHECK-NEXT:    ptrue p0.d
299; CHECK-NEXT:    add x8, x0, #32
300; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x8, #-2, mul vl]
301; CHECK-NEXT:    ret
302entry:
303  %add.ptr = getelementptr inbounds i8, ptr %b, i64 32
304  %0 = tail call i64 @llvm.vscale.i64()
305  %1 = mul i64 %0, -32
306  %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
307  %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
308  ret <vscale x 2 x i64> %2
309}
310
311declare i64 @llvm.vscale.i64()
312