xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc -global-isel=1 -mtriple=arm64-apple-ios7.0 -o - %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5@ptr = global ptr null
6
7define <8 x i8> @test_v8i8_pre_load(ptr %addr) {
8; CHECK-LABEL: test_v8i8_pre_load:
9; CHECK:       ; %bb.0:
10; CHECK-NEXT:    ldr d0, [x0, #40]!
11; CHECK-NEXT:    adrp x8, _ptr@PAGE
12; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
13; CHECK-NEXT:    ret
14  %newaddr = getelementptr <8 x i8>, ptr %addr, i32 5
15  %val = load <8 x i8>, ptr %newaddr, align 8
16  store ptr %newaddr, ptr @ptr
17  ret <8 x i8> %val
18}
19
20define <8 x i8> @test_v8i8_post_load(ptr %addr) {
21; CHECK-LABEL: test_v8i8_post_load:
22; CHECK:       ; %bb.0:
23; CHECK-NEXT:    ldr d0, [x0], #40
24; CHECK-NEXT:    adrp x8, _ptr@PAGE
25; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
26; CHECK-NEXT:    ret
27  %newaddr = getelementptr <8 x i8>, ptr %addr, i32 5
28  %val = load <8 x i8>, ptr %addr, align 8
29  store ptr %newaddr, ptr @ptr
30  ret <8 x i8> %val
31}
32
33define void @test_v8i8_pre_store(<8 x i8> %in, ptr %addr) {
34; CHECK-LABEL: test_v8i8_pre_store:
35; CHECK:       ; %bb.0:
36; CHECK-NEXT:    adrp x8, _ptr@PAGE
37; CHECK-NEXT:    str d0, [x0, #40]!
38; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
39; CHECK-NEXT:    ret
40  %newaddr = getelementptr <8 x i8>, ptr %addr, i32 5
41  store <8 x i8> %in, ptr %newaddr, align 8
42  store ptr %newaddr, ptr @ptr
43  ret void
44}
45
46define void @test_v8i8_post_store(<8 x i8> %in, ptr %addr) {
47; CHECK-LABEL: test_v8i8_post_store:
48; CHECK:       ; %bb.0:
49; CHECK-NEXT:    adrp x8, _ptr@PAGE
50; CHECK-NEXT:    str d0, [x0], #40
51; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
52; CHECK-NEXT:    ret
53  %newaddr = getelementptr <8 x i8>, ptr %addr, i32 5
54  store <8 x i8> %in, ptr %addr, align 8
55  store ptr %newaddr, ptr @ptr
56  ret void
57}
58
59define <4 x i16> @test_v4i16_pre_load(ptr %addr) {
60; CHECK-LABEL: test_v4i16_pre_load:
61; CHECK:       ; %bb.0:
62; CHECK-NEXT:    ldr d0, [x0, #40]!
63; CHECK-NEXT:    adrp x8, _ptr@PAGE
64; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
65; CHECK-NEXT:    ret
66  %newaddr = getelementptr <4 x i16>, ptr %addr, i32 5
67  %val = load <4 x i16>, ptr %newaddr, align 8
68  store ptr %newaddr, ptr @ptr
69  ret <4 x i16> %val
70}
71
72define <4 x i16> @test_v4i16_post_load(ptr %addr) {
73; CHECK-LABEL: test_v4i16_post_load:
74; CHECK:       ; %bb.0:
75; CHECK-NEXT:    ldr d0, [x0], #40
76; CHECK-NEXT:    adrp x8, _ptr@PAGE
77; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
78; CHECK-NEXT:    ret
79  %newaddr = getelementptr <4 x i16>, ptr %addr, i32 5
80  %val = load <4 x i16>, ptr %addr, align 8
81  store ptr %newaddr, ptr @ptr
82  ret <4 x i16> %val
83}
84
85define void @test_v4i16_pre_store(<4 x i16> %in, ptr %addr) {
86; CHECK-LABEL: test_v4i16_pre_store:
87; CHECK:       ; %bb.0:
88; CHECK-NEXT:    adrp x8, _ptr@PAGE
89; CHECK-NEXT:    str d0, [x0, #40]!
90; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
91; CHECK-NEXT:    ret
92  %newaddr = getelementptr <4 x i16>, ptr %addr, i32 5
93  store <4 x i16> %in, ptr %newaddr, align 8
94  store ptr %newaddr, ptr @ptr
95  ret void
96}
97
98define void @test_v4i16_post_store(<4 x i16> %in, ptr %addr) {
99; CHECK-LABEL: test_v4i16_post_store:
100; CHECK:       ; %bb.0:
101; CHECK-NEXT:    adrp x8, _ptr@PAGE
102; CHECK-NEXT:    str d0, [x0], #40
103; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
104; CHECK-NEXT:    ret
105  %newaddr = getelementptr <4 x i16>, ptr %addr, i32 5
106  store <4 x i16> %in, ptr %addr, align 8
107  store ptr %newaddr, ptr @ptr
108  ret void
109}
110
111define <2 x i32> @test_v2i32_pre_load(ptr %addr) {
112; CHECK-LABEL: test_v2i32_pre_load:
113; CHECK:       ; %bb.0:
114; CHECK-NEXT:    ldr d0, [x0, #40]!
115; CHECK-NEXT:    adrp x8, _ptr@PAGE
116; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
117; CHECK-NEXT:    ret
118  %newaddr = getelementptr <2 x i32>, ptr %addr, i32 5
119  %val = load <2 x i32>, ptr %newaddr, align 8
120  store ptr %newaddr, ptr @ptr
121  ret <2 x i32> %val
122}
123
124define <2 x i32> @test_v2i32_post_load(ptr %addr) {
125; CHECK-LABEL: test_v2i32_post_load:
126; CHECK:       ; %bb.0:
127; CHECK-NEXT:    ldr d0, [x0], #40
128; CHECK-NEXT:    adrp x8, _ptr@PAGE
129; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
130; CHECK-NEXT:    ret
131  %newaddr = getelementptr <2 x i32>, ptr %addr, i32 5
132  %val = load <2 x i32>, ptr %addr, align 8
133  store ptr %newaddr, ptr @ptr
134  ret <2 x i32> %val
135}
136
137define void @test_v2i32_pre_store(<2 x i32> %in, ptr %addr) {
138; CHECK-LABEL: test_v2i32_pre_store:
139; CHECK:       ; %bb.0:
140; CHECK-NEXT:    adrp x8, _ptr@PAGE
141; CHECK-NEXT:    str d0, [x0, #40]!
142; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
143; CHECK-NEXT:    ret
144  %newaddr = getelementptr <2 x i32>, ptr %addr, i32 5
145  store <2 x i32> %in, ptr %newaddr, align 8
146  store ptr %newaddr, ptr @ptr
147  ret void
148}
149
150define void @test_v2i32_post_store(<2 x i32> %in, ptr %addr) {
151; CHECK-LABEL: test_v2i32_post_store:
152; CHECK:       ; %bb.0:
153; CHECK-NEXT:    adrp x8, _ptr@PAGE
154; CHECK-NEXT:    str d0, [x0], #40
155; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
156; CHECK-NEXT:    ret
157  %newaddr = getelementptr <2 x i32>, ptr %addr, i32 5
158  store <2 x i32> %in, ptr %addr, align 8
159  store ptr %newaddr, ptr @ptr
160  ret void
161}
162
163define <2 x float> @test_v2f32_pre_load(ptr %addr) {
164; CHECK-LABEL: test_v2f32_pre_load:
165; CHECK:       ; %bb.0:
166; CHECK-NEXT:    ldr d0, [x0, #40]!
167; CHECK-NEXT:    adrp x8, _ptr@PAGE
168; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
169; CHECK-NEXT:    ret
170  %newaddr = getelementptr <2 x float>, ptr %addr, i32 5
171  %val = load <2 x float>, ptr %newaddr, align 8
172  store ptr %newaddr, ptr @ptr
173  ret <2 x float> %val
174}
175
176define <2 x float> @test_v2f32_post_load(ptr %addr) {
177; CHECK-LABEL: test_v2f32_post_load:
178; CHECK:       ; %bb.0:
179; CHECK-NEXT:    ldr d0, [x0], #40
180; CHECK-NEXT:    adrp x8, _ptr@PAGE
181; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
182; CHECK-NEXT:    ret
183  %newaddr = getelementptr <2 x float>, ptr %addr, i32 5
184  %val = load <2 x float>, ptr %addr, align 8
185  store ptr %newaddr, ptr @ptr
186  ret <2 x float> %val
187}
188
189define void @test_v2f32_pre_store(<2 x float> %in, ptr %addr) {
190; CHECK-LABEL: test_v2f32_pre_store:
191; CHECK:       ; %bb.0:
192; CHECK-NEXT:    adrp x8, _ptr@PAGE
193; CHECK-NEXT:    str d0, [x0, #40]!
194; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
195; CHECK-NEXT:    ret
196  %newaddr = getelementptr <2 x float>, ptr %addr, i32 5
197  store <2 x float> %in, ptr %newaddr, align 8
198  store ptr %newaddr, ptr @ptr
199  ret void
200}
201
202define void @test_v2f32_post_store(<2 x float> %in, ptr %addr) {
203; CHECK-LABEL: test_v2f32_post_store:
204; CHECK:       ; %bb.0:
205; CHECK-NEXT:    adrp x8, _ptr@PAGE
206; CHECK-NEXT:    str d0, [x0], #40
207; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
208; CHECK-NEXT:    ret
209  %newaddr = getelementptr <2 x float>, ptr %addr, i32 5
210  store <2 x float> %in, ptr %addr, align 8
211  store ptr %newaddr, ptr @ptr
212  ret void
213}
214
215define <1 x i64> @test_v1i64_pre_load(ptr %addr) {
216; CHECK-SD-LABEL: test_v1i64_pre_load:
217; CHECK-SD:       ; %bb.0:
218; CHECK-SD-NEXT:    ldr d0, [x0, #40]!
219; CHECK-SD-NEXT:    adrp x8, _ptr@PAGE
220; CHECK-SD-NEXT:    str x0, [x8, _ptr@PAGEOFF]
221; CHECK-SD-NEXT:    ret
222;
223; CHECK-GI-LABEL: test_v1i64_pre_load:
224; CHECK-GI:       ; %bb.0:
225; CHECK-GI-NEXT:    ldr x8, [x0, #40]!
226; CHECK-GI-NEXT:    adrp x9, _ptr@PAGE
227; CHECK-GI-NEXT:    str x0, [x9, _ptr@PAGEOFF]
228; CHECK-GI-NEXT:    fmov d0, x8
229; CHECK-GI-NEXT:    ret
230  %newaddr = getelementptr <1 x i64>, ptr %addr, i32 5
231  %val = load <1 x i64>, ptr %newaddr, align 8
232  store ptr %newaddr, ptr @ptr
233  ret <1 x i64> %val
234}
235
236define <1 x i64> @test_v1i64_post_load(ptr %addr) {
237; CHECK-SD-LABEL: test_v1i64_post_load:
238; CHECK-SD:       ; %bb.0:
239; CHECK-SD-NEXT:    ldr d0, [x0], #40
240; CHECK-SD-NEXT:    adrp x8, _ptr@PAGE
241; CHECK-SD-NEXT:    str x0, [x8, _ptr@PAGEOFF]
242; CHECK-SD-NEXT:    ret
243;
244; CHECK-GI-LABEL: test_v1i64_post_load:
245; CHECK-GI:       ; %bb.0:
246; CHECK-GI-NEXT:    ldr x8, [x0], #40
247; CHECK-GI-NEXT:    adrp x9, _ptr@PAGE
248; CHECK-GI-NEXT:    str x0, [x9, _ptr@PAGEOFF]
249; CHECK-GI-NEXT:    fmov d0, x8
250; CHECK-GI-NEXT:    ret
251  %newaddr = getelementptr <1 x i64>, ptr %addr, i32 5
252  %val = load <1 x i64>, ptr %addr, align 8
253  store ptr %newaddr, ptr @ptr
254  ret <1 x i64> %val
255}
256
257define void @test_v1i64_pre_store(<1 x i64> %in, ptr %addr) {
258; CHECK-LABEL: test_v1i64_pre_store:
259; CHECK:       ; %bb.0:
260; CHECK-NEXT:    adrp x8, _ptr@PAGE
261; CHECK-NEXT:    str d0, [x0, #40]!
262; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
263; CHECK-NEXT:    ret
264  %newaddr = getelementptr <1 x i64>, ptr %addr, i32 5
265  store <1 x i64> %in, ptr %newaddr, align 8
266  store ptr %newaddr, ptr @ptr
267  ret void
268}
269
270define void @test_v1i64_post_store(<1 x i64> %in, ptr %addr) {
271; CHECK-LABEL: test_v1i64_post_store:
272; CHECK:       ; %bb.0:
273; CHECK-NEXT:    adrp x8, _ptr@PAGE
274; CHECK-NEXT:    str d0, [x0], #40
275; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
276; CHECK-NEXT:    ret
277  %newaddr = getelementptr <1 x i64>, ptr %addr, i32 5
278  store <1 x i64> %in, ptr %addr, align 8
279  store ptr %newaddr, ptr @ptr
280  ret void
281}
282
283define <16 x i8> @test_v16i8_pre_load(ptr %addr) {
284; CHECK-LABEL: test_v16i8_pre_load:
285; CHECK:       ; %bb.0:
286; CHECK-NEXT:    ldr q0, [x0, #80]!
287; CHECK-NEXT:    adrp x8, _ptr@PAGE
288; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
289; CHECK-NEXT:    ret
290  %newaddr = getelementptr <16 x i8>, ptr %addr, i32 5
291  %val = load <16 x i8>, ptr %newaddr, align 8
292  store ptr %newaddr, ptr @ptr
293  ret <16 x i8> %val
294}
295
296define <16 x i8> @test_v16i8_post_load(ptr %addr) {
297; CHECK-LABEL: test_v16i8_post_load:
298; CHECK:       ; %bb.0:
299; CHECK-NEXT:    ldr q0, [x0], #80
300; CHECK-NEXT:    adrp x8, _ptr@PAGE
301; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
302; CHECK-NEXT:    ret
303  %newaddr = getelementptr <16 x i8>, ptr %addr, i32 5
304  %val = load <16 x i8>, ptr %addr, align 8
305  store ptr %newaddr, ptr @ptr
306  ret <16 x i8> %val
307}
308
309define void @test_v16i8_pre_store(<16 x i8> %in, ptr %addr) {
310; CHECK-LABEL: test_v16i8_pre_store:
311; CHECK:       ; %bb.0:
312; CHECK-NEXT:    adrp x8, _ptr@PAGE
313; CHECK-NEXT:    str q0, [x0, #80]!
314; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
315; CHECK-NEXT:    ret
316  %newaddr = getelementptr <16 x i8>, ptr %addr, i32 5
317  store <16 x i8> %in, ptr %newaddr, align 8
318  store ptr %newaddr, ptr @ptr
319  ret void
320}
321
322define void @test_v16i8_post_store(<16 x i8> %in, ptr %addr) {
323; CHECK-LABEL: test_v16i8_post_store:
324; CHECK:       ; %bb.0:
325; CHECK-NEXT:    adrp x8, _ptr@PAGE
326; CHECK-NEXT:    str q0, [x0], #80
327; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
328; CHECK-NEXT:    ret
329  %newaddr = getelementptr <16 x i8>, ptr %addr, i32 5
330  store <16 x i8> %in, ptr %addr, align 8
331  store ptr %newaddr, ptr @ptr
332  ret void
333}
334
335define <8 x i16> @test_v8i16_pre_load(ptr %addr) {
336; CHECK-LABEL: test_v8i16_pre_load:
337; CHECK:       ; %bb.0:
338; CHECK-NEXT:    ldr q0, [x0, #80]!
339; CHECK-NEXT:    adrp x8, _ptr@PAGE
340; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
341; CHECK-NEXT:    ret
342  %newaddr = getelementptr <8 x i16>, ptr %addr, i32 5
343  %val = load <8 x i16>, ptr %newaddr, align 8
344  store ptr %newaddr, ptr @ptr
345  ret <8 x i16> %val
346}
347
348define <8 x i16> @test_v8i16_post_load(ptr %addr) {
349; CHECK-LABEL: test_v8i16_post_load:
350; CHECK:       ; %bb.0:
351; CHECK-NEXT:    ldr q0, [x0], #80
352; CHECK-NEXT:    adrp x8, _ptr@PAGE
353; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
354; CHECK-NEXT:    ret
355  %newaddr = getelementptr <8 x i16>, ptr %addr, i32 5
356  %val = load <8 x i16>, ptr %addr, align 8
357  store ptr %newaddr, ptr @ptr
358  ret <8 x i16> %val
359}
360
361define void @test_v8i16_pre_store(<8 x i16> %in, ptr %addr) {
362; CHECK-LABEL: test_v8i16_pre_store:
363; CHECK:       ; %bb.0:
364; CHECK-NEXT:    adrp x8, _ptr@PAGE
365; CHECK-NEXT:    str q0, [x0, #80]!
366; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
367; CHECK-NEXT:    ret
368  %newaddr = getelementptr <8 x i16>, ptr %addr, i32 5
369  store <8 x i16> %in, ptr %newaddr, align 8
370  store ptr %newaddr, ptr @ptr
371  ret void
372}
373
374define void @test_v8i16_post_store(<8 x i16> %in, ptr %addr) {
375; CHECK-LABEL: test_v8i16_post_store:
376; CHECK:       ; %bb.0:
377; CHECK-NEXT:    adrp x8, _ptr@PAGE
378; CHECK-NEXT:    str q0, [x0], #80
379; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
380; CHECK-NEXT:    ret
381  %newaddr = getelementptr <8 x i16>, ptr %addr, i32 5
382  store <8 x i16> %in, ptr %addr, align 8
383  store ptr %newaddr, ptr @ptr
384  ret void
385}
386
387define <4 x i32> @test_v4i32_pre_load(ptr %addr) {
388; CHECK-LABEL: test_v4i32_pre_load:
389; CHECK:       ; %bb.0:
390; CHECK-NEXT:    ldr q0, [x0, #80]!
391; CHECK-NEXT:    adrp x8, _ptr@PAGE
392; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
393; CHECK-NEXT:    ret
394  %newaddr = getelementptr <4 x i32>, ptr %addr, i32 5
395  %val = load <4 x i32>, ptr %newaddr, align 8
396  store ptr %newaddr, ptr @ptr
397  ret <4 x i32> %val
398}
399
400define <4 x i32> @test_v4i32_post_load(ptr %addr) {
401; CHECK-LABEL: test_v4i32_post_load:
402; CHECK:       ; %bb.0:
403; CHECK-NEXT:    ldr q0, [x0], #80
404; CHECK-NEXT:    adrp x8, _ptr@PAGE
405; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
406; CHECK-NEXT:    ret
407  %newaddr = getelementptr <4 x i32>, ptr %addr, i32 5
408  %val = load <4 x i32>, ptr %addr, align 8
409  store ptr %newaddr, ptr @ptr
410  ret <4 x i32> %val
411}
412
413define void @test_v4i32_pre_store(<4 x i32> %in, ptr %addr) {
414; CHECK-LABEL: test_v4i32_pre_store:
415; CHECK:       ; %bb.0:
416; CHECK-NEXT:    adrp x8, _ptr@PAGE
417; CHECK-NEXT:    str q0, [x0, #80]!
418; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
419; CHECK-NEXT:    ret
420  %newaddr = getelementptr <4 x i32>, ptr %addr, i32 5
421  store <4 x i32> %in, ptr %newaddr, align 8
422  store ptr %newaddr, ptr @ptr
423  ret void
424}
425
426define void @test_v4i32_post_store(<4 x i32> %in, ptr %addr) {
427; CHECK-LABEL: test_v4i32_post_store:
428; CHECK:       ; %bb.0:
429; CHECK-NEXT:    adrp x8, _ptr@PAGE
430; CHECK-NEXT:    str q0, [x0], #80
431; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
432; CHECK-NEXT:    ret
433  %newaddr = getelementptr <4 x i32>, ptr %addr, i32 5
434  store <4 x i32> %in, ptr %addr, align 8
435  store ptr %newaddr, ptr @ptr
436  ret void
437}
438
439
440define <4 x float> @test_v4f32_pre_load(ptr %addr) {
441; CHECK-LABEL: test_v4f32_pre_load:
442; CHECK:       ; %bb.0:
443; CHECK-NEXT:    ldr q0, [x0, #80]!
444; CHECK-NEXT:    adrp x8, _ptr@PAGE
445; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
446; CHECK-NEXT:    ret
447  %newaddr = getelementptr <4 x float>, ptr %addr, i32 5
448  %val = load <4 x float>, ptr %newaddr, align 8
449  store ptr %newaddr, ptr @ptr
450  ret <4 x float> %val
451}
452
453define <4 x float> @test_v4f32_post_load(ptr %addr) {
454; CHECK-LABEL: test_v4f32_post_load:
455; CHECK:       ; %bb.0:
456; CHECK-NEXT:    ldr q0, [x0], #80
457; CHECK-NEXT:    adrp x8, _ptr@PAGE
458; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
459; CHECK-NEXT:    ret
460  %newaddr = getelementptr <4 x float>, ptr %addr, i32 5
461  %val = load <4 x float>, ptr %addr, align 8
462  store ptr %newaddr, ptr @ptr
463  ret <4 x float> %val
464}
465
466define void @test_v4f32_pre_store(<4 x float> %in, ptr %addr) {
467; CHECK-LABEL: test_v4f32_pre_store:
468; CHECK:       ; %bb.0:
469; CHECK-NEXT:    adrp x8, _ptr@PAGE
470; CHECK-NEXT:    str q0, [x0, #80]!
471; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
472; CHECK-NEXT:    ret
473  %newaddr = getelementptr <4 x float>, ptr %addr, i32 5
474  store <4 x float> %in, ptr %newaddr, align 8
475  store ptr %newaddr, ptr @ptr
476  ret void
477}
478
479define void @test_v4f32_post_store(<4 x float> %in, ptr %addr) {
480; CHECK-LABEL: test_v4f32_post_store:
481; CHECK:       ; %bb.0:
482; CHECK-NEXT:    adrp x8, _ptr@PAGE
483; CHECK-NEXT:    str q0, [x0], #80
484; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
485; CHECK-NEXT:    ret
486  %newaddr = getelementptr <4 x float>, ptr %addr, i32 5
487  store <4 x float> %in, ptr %addr, align 8
488  store ptr %newaddr, ptr @ptr
489  ret void
490}
491
492
493define <2 x i64> @test_v2i64_pre_load(ptr %addr) {
494; CHECK-LABEL: test_v2i64_pre_load:
495; CHECK:       ; %bb.0:
496; CHECK-NEXT:    ldr q0, [x0, #80]!
497; CHECK-NEXT:    adrp x8, _ptr@PAGE
498; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
499; CHECK-NEXT:    ret
500  %newaddr = getelementptr <2 x i64>, ptr %addr, i32 5
501  %val = load <2 x i64>, ptr %newaddr, align 8
502  store ptr %newaddr, ptr @ptr
503  ret <2 x i64> %val
504}
505
506define <2 x i64> @test_v2i64_post_load(ptr %addr) {
507; CHECK-LABEL: test_v2i64_post_load:
508; CHECK:       ; %bb.0:
509; CHECK-NEXT:    ldr q0, [x0], #80
510; CHECK-NEXT:    adrp x8, _ptr@PAGE
511; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
512; CHECK-NEXT:    ret
513  %newaddr = getelementptr <2 x i64>, ptr %addr, i32 5
514  %val = load <2 x i64>, ptr %addr, align 8
515  store ptr %newaddr, ptr @ptr
516  ret <2 x i64> %val
517}
518
519define void @test_v2i64_pre_store(<2 x i64> %in, ptr %addr) {
520; CHECK-LABEL: test_v2i64_pre_store:
521; CHECK:       ; %bb.0:
522; CHECK-NEXT:    adrp x8, _ptr@PAGE
523; CHECK-NEXT:    str q0, [x0, #80]!
524; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
525; CHECK-NEXT:    ret
526  %newaddr = getelementptr <2 x i64>, ptr %addr, i32 5
527  store <2 x i64> %in, ptr %newaddr, align 8
528  store ptr %newaddr, ptr @ptr
529  ret void
530}
531
532define void @test_v2i64_post_store(<2 x i64> %in, ptr %addr) {
533; CHECK-LABEL: test_v2i64_post_store:
534; CHECK:       ; %bb.0:
535; CHECK-NEXT:    adrp x8, _ptr@PAGE
536; CHECK-NEXT:    str q0, [x0], #80
537; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
538; CHECK-NEXT:    ret
539  %newaddr = getelementptr <2 x i64>, ptr %addr, i32 5
540  store <2 x i64> %in, ptr %addr, align 8
541  store ptr %newaddr, ptr @ptr
542  ret void
543}
544
545
546define <2 x double> @test_v2f64_pre_load(ptr %addr) {
547; CHECK-LABEL: test_v2f64_pre_load:
548; CHECK:       ; %bb.0:
549; CHECK-NEXT:    ldr q0, [x0, #80]!
550; CHECK-NEXT:    adrp x8, _ptr@PAGE
551; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
552; CHECK-NEXT:    ret
553  %newaddr = getelementptr <2 x double>, ptr %addr, i32 5
554  %val = load <2 x double>, ptr %newaddr, align 8
555  store ptr %newaddr, ptr @ptr
556  ret <2 x double> %val
557}
558
559define <2 x double> @test_v2f64_post_load(ptr %addr) {
560; CHECK-LABEL: test_v2f64_post_load:
561; CHECK:       ; %bb.0:
562; CHECK-NEXT:    ldr q0, [x0], #80
563; CHECK-NEXT:    adrp x8, _ptr@PAGE
564; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
565; CHECK-NEXT:    ret
566  %newaddr = getelementptr <2 x double>, ptr %addr, i32 5
567  %val = load <2 x double>, ptr %addr, align 8
568  store ptr %newaddr, ptr @ptr
569  ret <2 x double> %val
570}
571
572define void @test_v2f64_pre_store(<2 x double> %in, ptr %addr) {
573; CHECK-LABEL: test_v2f64_pre_store:
574; CHECK:       ; %bb.0:
575; CHECK-NEXT:    adrp x8, _ptr@PAGE
576; CHECK-NEXT:    str q0, [x0, #80]!
577; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
578; CHECK-NEXT:    ret
579  %newaddr = getelementptr <2 x double>, ptr %addr, i32 5
580  store <2 x double> %in, ptr %newaddr, align 8
581  store ptr %newaddr, ptr @ptr
582  ret void
583}
584
585define void @test_v2f64_post_store(<2 x double> %in, ptr %addr) {
586; CHECK-LABEL: test_v2f64_post_store:
587; CHECK:       ; %bb.0:
588; CHECK-NEXT:    adrp x8, _ptr@PAGE
589; CHECK-NEXT:    str q0, [x0], #80
590; CHECK-NEXT:    str x0, [x8, _ptr@PAGEOFF]
591; CHECK-NEXT:    ret
592  %newaddr = getelementptr <2 x double>, ptr %addr, i32 5
593  store <2 x double> %in, ptr %addr, align 8
594  store ptr %newaddr, ptr @ptr
595  ret void
596}
597
598define ptr @test_v16i8_post_imm_st1_lane(<16 x i8> %in, ptr %addr) {
599; CHECK-SD-LABEL: test_v16i8_post_imm_st1_lane:
600; CHECK-SD:       ; %bb.0:
601; CHECK-SD-NEXT:    st1.b { v0 }[3], [x0], #1
602; CHECK-SD-NEXT:    ret
603;
604; CHECK-GI-LABEL: test_v16i8_post_imm_st1_lane:
605; CHECK-GI:       ; %bb.0:
606; CHECK-GI-NEXT:    mov b0, v0[3]
607; CHECK-GI-NEXT:    str b0, [x0], #1
608; CHECK-GI-NEXT:    ret
609  %elt = extractelement <16 x i8> %in, i32 3
610  store i8 %elt, ptr %addr
611
612  %newaddr = getelementptr i8, ptr %addr, i32 1
613  ret ptr %newaddr
614}
615
616define ptr @test_v16i8_post_reg_st1_lane(<16 x i8> %in, ptr %addr) {
617; CHECK-SD-LABEL: test_v16i8_post_reg_st1_lane:
618; CHECK-SD:       ; %bb.0:
619; CHECK-SD-NEXT:    mov w8, #2 ; =0x2
620; CHECK-SD-NEXT:    st1.b { v0 }[3], [x0], x8
621; CHECK-SD-NEXT:    ret
622;
623; CHECK-GI-LABEL: test_v16i8_post_reg_st1_lane:
624; CHECK-GI:       ; %bb.0:
625; CHECK-GI-NEXT:    mov b0, v0[3]
626; CHECK-GI-NEXT:    str b0, [x0], #2
627; CHECK-GI-NEXT:    ret
628  %elt = extractelement <16 x i8> %in, i32 3
629  store i8 %elt, ptr %addr
630
631  %newaddr = getelementptr i8, ptr %addr, i32 2
632  ret ptr %newaddr
633}
634
635
636define ptr @test_v8i16_post_imm_st1_lane(<8 x i16> %in, ptr %addr) {
637; CHECK-SD-LABEL: test_v8i16_post_imm_st1_lane:
638; CHECK-SD:       ; %bb.0:
639; CHECK-SD-NEXT:    st1.h { v0 }[3], [x0], #2
640; CHECK-SD-NEXT:    ret
641;
642; CHECK-GI-LABEL: test_v8i16_post_imm_st1_lane:
643; CHECK-GI:       ; %bb.0:
644; CHECK-GI-NEXT:    mov h0, v0[3]
645; CHECK-GI-NEXT:    str h0, [x0], #2
646; CHECK-GI-NEXT:    ret
647  %elt = extractelement <8 x i16> %in, i32 3
648  store i16 %elt, ptr %addr
649
650  %newaddr = getelementptr i16, ptr %addr, i32 1
651  ret ptr %newaddr
652}
653
654define ptr @test_v8i16_post_reg_st1_lane(<8 x i16> %in, ptr %addr) {
655; CHECK-SD-LABEL: test_v8i16_post_reg_st1_lane:
656; CHECK-SD:       ; %bb.0:
657; CHECK-SD-NEXT:    mov w8, #4 ; =0x4
658; CHECK-SD-NEXT:    st1.h { v0 }[3], [x0], x8
659; CHECK-SD-NEXT:    ret
660;
661; CHECK-GI-LABEL: test_v8i16_post_reg_st1_lane:
662; CHECK-GI:       ; %bb.0:
663; CHECK-GI-NEXT:    mov h0, v0[3]
664; CHECK-GI-NEXT:    str h0, [x0], #4
665; CHECK-GI-NEXT:    ret
666  %elt = extractelement <8 x i16> %in, i32 3
667  store i16 %elt, ptr %addr
668
669  %newaddr = getelementptr i16, ptr %addr, i32 2
670  ret ptr %newaddr
671}
672
673define ptr @test_v4i32_post_imm_st1_lane(<4 x i32> %in, ptr %addr) {
674; CHECK-SD-LABEL: test_v4i32_post_imm_st1_lane:
675; CHECK-SD:       ; %bb.0:
676; CHECK-SD-NEXT:    st1.s { v0 }[3], [x0], #4
677; CHECK-SD-NEXT:    ret
678;
679; CHECK-GI-LABEL: test_v4i32_post_imm_st1_lane:
680; CHECK-GI:       ; %bb.0:
681; CHECK-GI-NEXT:    mov s0, v0[3]
682; CHECK-GI-NEXT:    str s0, [x0], #4
683; CHECK-GI-NEXT:    ret
684  %elt = extractelement <4 x i32> %in, i32 3
685  store i32 %elt, ptr %addr
686
687  %newaddr = getelementptr i32, ptr %addr, i32 1
688  ret ptr %newaddr
689}
690
691define ptr @test_v4i32_post_reg_st1_lane(<4 x i32> %in, ptr %addr) {
692; CHECK-SD-LABEL: test_v4i32_post_reg_st1_lane:
693; CHECK-SD:       ; %bb.0:
694; CHECK-SD-NEXT:    mov w8, #8 ; =0x8
695; CHECK-SD-NEXT:    st1.s { v0 }[3], [x0], x8
696; CHECK-SD-NEXT:    ret
697;
698; CHECK-GI-LABEL: test_v4i32_post_reg_st1_lane:
699; CHECK-GI:       ; %bb.0:
700; CHECK-GI-NEXT:    mov s0, v0[3]
701; CHECK-GI-NEXT:    str s0, [x0], #8
702; CHECK-GI-NEXT:    ret
703  %elt = extractelement <4 x i32> %in, i32 3
704  store i32 %elt, ptr %addr
705
706  %newaddr = getelementptr i32, ptr %addr, i32 2
707  ret ptr %newaddr
708}
709
710define ptr @test_v4f32_post_imm_st1_lane(<4 x float> %in, ptr %addr) {
711; CHECK-SD-LABEL: test_v4f32_post_imm_st1_lane:
712; CHECK-SD:       ; %bb.0:
713; CHECK-SD-NEXT:    st1.s { v0 }[3], [x0], #4
714; CHECK-SD-NEXT:    ret
715;
716; CHECK-GI-LABEL: test_v4f32_post_imm_st1_lane:
717; CHECK-GI:       ; %bb.0:
718; CHECK-GI-NEXT:    mov s0, v0[3]
719; CHECK-GI-NEXT:    str s0, [x0], #4
720; CHECK-GI-NEXT:    ret
721  %elt = extractelement <4 x float> %in, i32 3
722  store float %elt, ptr %addr
723
724  %newaddr = getelementptr float, ptr %addr, i32 1
725  ret ptr %newaddr
726}
727
728define ptr @test_v4f32_post_reg_st1_lane(<4 x float> %in, ptr %addr) {
729; CHECK-SD-LABEL: test_v4f32_post_reg_st1_lane:
730; CHECK-SD:       ; %bb.0:
731; CHECK-SD-NEXT:    mov w8, #8 ; =0x8
732; CHECK-SD-NEXT:    st1.s { v0 }[3], [x0], x8
733; CHECK-SD-NEXT:    ret
734;
735; CHECK-GI-LABEL: test_v4f32_post_reg_st1_lane:
736; CHECK-GI:       ; %bb.0:
737; CHECK-GI-NEXT:    mov s0, v0[3]
738; CHECK-GI-NEXT:    str s0, [x0], #8
739; CHECK-GI-NEXT:    ret
740  %elt = extractelement <4 x float> %in, i32 3
741  store float %elt, ptr %addr
742
743  %newaddr = getelementptr float, ptr %addr, i32 2
744  ret ptr %newaddr
745}
746
747define ptr @test_v2i64_post_imm_st1_lane(<2 x i64> %in, ptr %addr) {
748; CHECK-SD-LABEL: test_v2i64_post_imm_st1_lane:
749; CHECK-SD:       ; %bb.0:
750; CHECK-SD-NEXT:    st1.d { v0 }[1], [x0], #8
751; CHECK-SD-NEXT:    ret
752;
753; CHECK-GI-LABEL: test_v2i64_post_imm_st1_lane:
754; CHECK-GI:       ; %bb.0:
755; CHECK-GI-NEXT:    mov d0, v0[1]
756; CHECK-GI-NEXT:    str d0, [x0], #8
757; CHECK-GI-NEXT:    ret
758  %elt = extractelement <2 x i64> %in, i64 1
759  store i64 %elt, ptr %addr
760
761  %newaddr = getelementptr i64, ptr %addr, i64 1
762  ret ptr %newaddr
763}
764
765define ptr @test_v2i64_post_reg_st1_lane(<2 x i64> %in, ptr %addr) {
766; CHECK-SD-LABEL: test_v2i64_post_reg_st1_lane:
767; CHECK-SD:       ; %bb.0:
768; CHECK-SD-NEXT:    mov w8, #16 ; =0x10
769; CHECK-SD-NEXT:    st1.d { v0 }[1], [x0], x8
770; CHECK-SD-NEXT:    ret
771;
772; CHECK-GI-LABEL: test_v2i64_post_reg_st1_lane:
773; CHECK-GI:       ; %bb.0:
774; CHECK-GI-NEXT:    mov d0, v0[1]
775; CHECK-GI-NEXT:    str d0, [x0], #16
776; CHECK-GI-NEXT:    ret
777  %elt = extractelement <2 x i64> %in, i64 1
778  store i64 %elt, ptr %addr
779
780  %newaddr = getelementptr i64, ptr %addr, i64 2
781  ret ptr %newaddr
782}
783
784define ptr @test_v2f64_post_imm_st1_lane(<2 x double> %in, ptr %addr) {
785; CHECK-SD-LABEL: test_v2f64_post_imm_st1_lane:
786; CHECK-SD:       ; %bb.0:
787; CHECK-SD-NEXT:    st1.d { v0 }[1], [x0], #8
788; CHECK-SD-NEXT:    ret
789;
790; CHECK-GI-LABEL: test_v2f64_post_imm_st1_lane:
791; CHECK-GI:       ; %bb.0:
792; CHECK-GI-NEXT:    mov d0, v0[1]
793; CHECK-GI-NEXT:    str d0, [x0], #8
794; CHECK-GI-NEXT:    ret
795  %elt = extractelement <2 x double> %in, i32 1
796  store double %elt, ptr %addr
797
798  %newaddr = getelementptr double, ptr %addr, i32 1
799  ret ptr %newaddr
800}
801
802define ptr @test_v2f64_post_reg_st1_lane(<2 x double> %in, ptr %addr) {
803; CHECK-SD-LABEL: test_v2f64_post_reg_st1_lane:
804; CHECK-SD:       ; %bb.0:
805; CHECK-SD-NEXT:    mov w8, #16 ; =0x10
806; CHECK-SD-NEXT:    st1.d { v0 }[1], [x0], x8
807; CHECK-SD-NEXT:    ret
808;
809; CHECK-GI-LABEL: test_v2f64_post_reg_st1_lane:
810; CHECK-GI:       ; %bb.0:
811; CHECK-GI-NEXT:    mov d0, v0[1]
812; CHECK-GI-NEXT:    str d0, [x0], #16
813; CHECK-GI-NEXT:    ret
814  %elt = extractelement <2 x double> %in, i32 1
815  store double %elt, ptr %addr
816
817  %newaddr = getelementptr double, ptr %addr, i32 2
818  ret ptr %newaddr
819}
820
821define ptr @test_v8i8_post_imm_st1_lane(<8 x i8> %in, ptr %addr) {
822; CHECK-SD-LABEL: test_v8i8_post_imm_st1_lane:
823; CHECK-SD:       ; %bb.0:
824; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
825; CHECK-SD-NEXT:    st1.b { v0 }[3], [x0], #1
826; CHECK-SD-NEXT:    ret
827;
828; CHECK-GI-LABEL: test_v8i8_post_imm_st1_lane:
829; CHECK-GI:       ; %bb.0:
830; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
831; CHECK-GI-NEXT:    mov b0, v0[3]
832; CHECK-GI-NEXT:    str b0, [x0], #1
833; CHECK-GI-NEXT:    ret
834  %elt = extractelement <8 x i8> %in, i32 3
835  store i8 %elt, ptr %addr
836
837  %newaddr = getelementptr i8, ptr %addr, i32 1
838  ret ptr %newaddr
839}
840
841define ptr @test_v8i8_post_reg_st1_lane(<8 x i8> %in, ptr %addr) {
842; CHECK-SD-LABEL: test_v8i8_post_reg_st1_lane:
843; CHECK-SD:       ; %bb.0:
844; CHECK-SD-NEXT:    mov w8, #2 ; =0x2
845; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
846; CHECK-SD-NEXT:    st1.b { v0 }[3], [x0], x8
847; CHECK-SD-NEXT:    ret
848;
849; CHECK-GI-LABEL: test_v8i8_post_reg_st1_lane:
850; CHECK-GI:       ; %bb.0:
851; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
852; CHECK-GI-NEXT:    mov b0, v0[3]
853; CHECK-GI-NEXT:    str b0, [x0], #2
854; CHECK-GI-NEXT:    ret
855  %elt = extractelement <8 x i8> %in, i32 3
856  store i8 %elt, ptr %addr
857
858  %newaddr = getelementptr i8, ptr %addr, i32 2
859  ret ptr %newaddr
860}
861
862define ptr @test_v4i16_post_imm_st1_lane(<4 x i16> %in, ptr %addr) {
863; CHECK-SD-LABEL: test_v4i16_post_imm_st1_lane:
864; CHECK-SD:       ; %bb.0:
865; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
866; CHECK-SD-NEXT:    st1.h { v0 }[3], [x0], #2
867; CHECK-SD-NEXT:    ret
868;
869; CHECK-GI-LABEL: test_v4i16_post_imm_st1_lane:
870; CHECK-GI:       ; %bb.0:
871; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
872; CHECK-GI-NEXT:    mov h0, v0[3]
873; CHECK-GI-NEXT:    str h0, [x0], #2
874; CHECK-GI-NEXT:    ret
875  %elt = extractelement <4 x i16> %in, i32 3
876  store i16 %elt, ptr %addr
877
878  %newaddr = getelementptr i16, ptr %addr, i32 1
879  ret ptr %newaddr
880}
881
882define ptr @test_v4i16_post_reg_st1_lane(<4 x i16> %in, ptr %addr) {
883; CHECK-SD-LABEL: test_v4i16_post_reg_st1_lane:
884; CHECK-SD:       ; %bb.0:
885; CHECK-SD-NEXT:    mov w8, #4 ; =0x4
886; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
887; CHECK-SD-NEXT:    st1.h { v0 }[3], [x0], x8
888; CHECK-SD-NEXT:    ret
889;
890; CHECK-GI-LABEL: test_v4i16_post_reg_st1_lane:
891; CHECK-GI:       ; %bb.0:
892; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
893; CHECK-GI-NEXT:    mov h0, v0[3]
894; CHECK-GI-NEXT:    str h0, [x0], #4
895; CHECK-GI-NEXT:    ret
896  %elt = extractelement <4 x i16> %in, i32 3
897  store i16 %elt, ptr %addr
898
899  %newaddr = getelementptr i16, ptr %addr, i32 2
900  ret ptr %newaddr
901}
902
903define ptr @test_v2i32_post_imm_st1_lane(<2 x i32> %in, ptr %addr) {
904; CHECK-SD-LABEL: test_v2i32_post_imm_st1_lane:
905; CHECK-SD:       ; %bb.0:
906; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
907; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], #4
908; CHECK-SD-NEXT:    ret
909;
910; CHECK-GI-LABEL: test_v2i32_post_imm_st1_lane:
911; CHECK-GI:       ; %bb.0:
912; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
913; CHECK-GI-NEXT:    mov s0, v0[1]
914; CHECK-GI-NEXT:    str s0, [x0], #4
915; CHECK-GI-NEXT:    ret
916  %elt = extractelement <2 x i32> %in, i32 1
917  store i32 %elt, ptr %addr
918
919  %newaddr = getelementptr i32, ptr %addr, i32 1
920  ret ptr %newaddr
921}
922
923define ptr @test_v2i32_post_reg_st1_lane(<2 x i32> %in, ptr %addr) {
924; CHECK-SD-LABEL: test_v2i32_post_reg_st1_lane:
925; CHECK-SD:       ; %bb.0:
926; CHECK-SD-NEXT:    mov w8, #8 ; =0x8
927; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
928; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], x8
929; CHECK-SD-NEXT:    ret
930;
931; CHECK-GI-LABEL: test_v2i32_post_reg_st1_lane:
932; CHECK-GI:       ; %bb.0:
933; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
934; CHECK-GI-NEXT:    mov s0, v0[1]
935; CHECK-GI-NEXT:    str s0, [x0], #8
936; CHECK-GI-NEXT:    ret
937  %elt = extractelement <2 x i32> %in, i32 1
938  store i32 %elt, ptr %addr
939
940  %newaddr = getelementptr i32, ptr %addr, i32 2
941  ret ptr %newaddr
942}
943
944define ptr @test_v2f32_post_imm_st1_lane(<2 x float> %in, ptr %addr) {
945; CHECK-SD-LABEL: test_v2f32_post_imm_st1_lane:
946; CHECK-SD:       ; %bb.0:
947; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
948; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], #4
949; CHECK-SD-NEXT:    ret
950;
951; CHECK-GI-LABEL: test_v2f32_post_imm_st1_lane:
952; CHECK-GI:       ; %bb.0:
953; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
954; CHECK-GI-NEXT:    mov s0, v0[1]
955; CHECK-GI-NEXT:    str s0, [x0], #4
956; CHECK-GI-NEXT:    ret
957  %elt = extractelement <2 x float> %in, i32 1
958  store float %elt, ptr %addr
959
960  %newaddr = getelementptr float, ptr %addr, i32 1
961  ret ptr %newaddr
962}
963
964define ptr @test_v2f32_post_reg_st1_lane(<2 x float> %in, ptr %addr) {
965; CHECK-SD-LABEL: test_v2f32_post_reg_st1_lane:
966; CHECK-SD:       ; %bb.0:
967; CHECK-SD-NEXT:    mov w8, #8 ; =0x8
968; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
969; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], x8
970; CHECK-SD-NEXT:    ret
971;
972; CHECK-GI-LABEL: test_v2f32_post_reg_st1_lane:
973; CHECK-GI:       ; %bb.0:
974; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
975; CHECK-GI-NEXT:    mov s0, v0[1]
976; CHECK-GI-NEXT:    str s0, [x0], #8
977; CHECK-GI-NEXT:    ret
978  %elt = extractelement <2 x float> %in, i32 1
979  store float %elt, ptr %addr
980
981  %newaddr = getelementptr float, ptr %addr, i32 2
982  ret ptr %newaddr
983}
984
985define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(ptr %A, ptr %ptr) {
986; CHECK-SD-LABEL: test_v16i8_post_imm_ld2:
987; CHECK-SD:       ; %bb.0:
988; CHECK-SD-NEXT:    ld2.16b { v0, v1 }, [x0], #32
989; CHECK-SD-NEXT:    str x0, [x1]
990; CHECK-SD-NEXT:    ret
991;
992; CHECK-GI-LABEL: test_v16i8_post_imm_ld2:
993; CHECK-GI:       ; %bb.0:
994; CHECK-GI-NEXT:    ld2.16b { v0, v1 }, [x0]
995; CHECK-GI-NEXT:    add x8, x0, #32
996; CHECK-GI-NEXT:    str x8, [x1]
997; CHECK-GI-NEXT:    ret
998  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %A)
999  %tmp = getelementptr i8, ptr %A, i32 32
1000  store ptr %tmp, ptr %ptr
1001  ret { <16 x i8>, <16 x i8> } %ld2
1002}
1003
1004define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
1005; CHECK-SD-LABEL: test_v16i8_post_reg_ld2:
1006; CHECK-SD:       ; %bb.0:
1007; CHECK-SD-NEXT:    ld2.16b { v0, v1 }, [x0], x2
1008; CHECK-SD-NEXT:    str x0, [x1]
1009; CHECK-SD-NEXT:    ret
1010;
1011; CHECK-GI-LABEL: test_v16i8_post_reg_ld2:
1012; CHECK-GI:       ; %bb.0:
1013; CHECK-GI-NEXT:    ld2.16b { v0, v1 }, [x0]
1014; CHECK-GI-NEXT:    add x8, x0, x2
1015; CHECK-GI-NEXT:    str x8, [x1]
1016; CHECK-GI-NEXT:    ret
1017  %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %A)
1018  %tmp = getelementptr i8, ptr %A, i64 %inc
1019  store ptr %tmp, ptr %ptr
1020  ret { <16 x i8>, <16 x i8> } %ld2
1021}
1022
1023declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr)
1024
1025
1026define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(ptr %A, ptr %ptr) {
1027; CHECK-SD-LABEL: test_v8i8_post_imm_ld2:
1028; CHECK-SD:       ; %bb.0:
1029; CHECK-SD-NEXT:    ld2.8b { v0, v1 }, [x0], #16
1030; CHECK-SD-NEXT:    str x0, [x1]
1031; CHECK-SD-NEXT:    ret
1032;
1033; CHECK-GI-LABEL: test_v8i8_post_imm_ld2:
1034; CHECK-GI:       ; %bb.0:
1035; CHECK-GI-NEXT:    ld2.8b { v0, v1 }, [x0]
1036; CHECK-GI-NEXT:    add x8, x0, #16
1037; CHECK-GI-NEXT:    str x8, [x1]
1038; CHECK-GI-NEXT:    ret
1039  %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
1040  %tmp = getelementptr i8, ptr %A, i32 16
1041  store ptr %tmp, ptr %ptr
1042  ret { <8 x i8>, <8 x i8> } %ld2
1043}
1044
1045define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
1046; CHECK-SD-LABEL: test_v8i8_post_reg_ld2:
1047; CHECK-SD:       ; %bb.0:
1048; CHECK-SD-NEXT:    ld2.8b { v0, v1 }, [x0], x2
1049; CHECK-SD-NEXT:    str x0, [x1]
1050; CHECK-SD-NEXT:    ret
1051;
1052; CHECK-GI-LABEL: test_v8i8_post_reg_ld2:
1053; CHECK-GI:       ; %bb.0:
1054; CHECK-GI-NEXT:    ld2.8b { v0, v1 }, [x0]
1055; CHECK-GI-NEXT:    add x8, x0, x2
1056; CHECK-GI-NEXT:    str x8, [x1]
1057; CHECK-GI-NEXT:    ret
1058  %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
1059  %tmp = getelementptr i8, ptr %A, i64 %inc
1060  store ptr %tmp, ptr %ptr
1061  ret { <8 x i8>, <8 x i8> } %ld2
1062}
1063
1064declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr)
1065
1066
1067define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(ptr %A, ptr %ptr) {
1068; CHECK-SD-LABEL: test_v8i16_post_imm_ld2:
1069; CHECK-SD:       ; %bb.0:
1070; CHECK-SD-NEXT:    ld2.8h { v0, v1 }, [x0], #32
1071; CHECK-SD-NEXT:    str x0, [x1]
1072; CHECK-SD-NEXT:    ret
1073;
1074; CHECK-GI-LABEL: test_v8i16_post_imm_ld2:
1075; CHECK-GI:       ; %bb.0:
1076; CHECK-GI-NEXT:    ld2.8h { v0, v1 }, [x0]
1077; CHECK-GI-NEXT:    add x8, x0, #32
1078; CHECK-GI-NEXT:    str x8, [x1]
1079; CHECK-GI-NEXT:    ret
1080  %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %A)
1081  %tmp = getelementptr i16, ptr %A, i32 16
1082  store ptr %tmp, ptr %ptr
1083  ret { <8 x i16>, <8 x i16> } %ld2
1084}
1085
1086define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
1087; CHECK-SD-LABEL: test_v8i16_post_reg_ld2:
1088; CHECK-SD:       ; %bb.0:
1089; CHECK-SD-NEXT:    lsl x8, x2, #1
1090; CHECK-SD-NEXT:    ld2.8h { v0, v1 }, [x0], x8
1091; CHECK-SD-NEXT:    str x0, [x1]
1092; CHECK-SD-NEXT:    ret
1093;
1094; CHECK-GI-LABEL: test_v8i16_post_reg_ld2:
1095; CHECK-GI:       ; %bb.0:
1096; CHECK-GI-NEXT:    ld2.8h { v0, v1 }, [x0]
1097; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
1098; CHECK-GI-NEXT:    str x8, [x1]
1099; CHECK-GI-NEXT:    ret
1100  %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %A)
1101  %tmp = getelementptr i16, ptr %A, i64 %inc
1102  store ptr %tmp, ptr %ptr
1103  ret { <8 x i16>, <8 x i16> } %ld2
1104}
1105
1106declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr)
1107
1108
1109define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(ptr %A, ptr %ptr) {
1110; CHECK-SD-LABEL: test_v4i16_post_imm_ld2:
1111; CHECK-SD:       ; %bb.0:
1112; CHECK-SD-NEXT:    ld2.4h { v0, v1 }, [x0], #16
1113; CHECK-SD-NEXT:    str x0, [x1]
1114; CHECK-SD-NEXT:    ret
1115;
1116; CHECK-GI-LABEL: test_v4i16_post_imm_ld2:
1117; CHECK-GI:       ; %bb.0:
1118; CHECK-GI-NEXT:    ld2.4h { v0, v1 }, [x0]
1119; CHECK-GI-NEXT:    add x8, x0, #16
1120; CHECK-GI-NEXT:    str x8, [x1]
1121; CHECK-GI-NEXT:    ret
1122  %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %A)
1123  %tmp = getelementptr i16, ptr %A, i32 8
1124  store ptr %tmp, ptr %ptr
1125  ret { <4 x i16>, <4 x i16> } %ld2
1126}
1127
1128define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
1129; CHECK-SD-LABEL: test_v4i16_post_reg_ld2:
1130; CHECK-SD:       ; %bb.0:
1131; CHECK-SD-NEXT:    lsl x8, x2, #1
1132; CHECK-SD-NEXT:    ld2.4h { v0, v1 }, [x0], x8
1133; CHECK-SD-NEXT:    str x0, [x1]
1134; CHECK-SD-NEXT:    ret
1135;
1136; CHECK-GI-LABEL: test_v4i16_post_reg_ld2:
1137; CHECK-GI:       ; %bb.0:
1138; CHECK-GI-NEXT:    ld2.4h { v0, v1 }, [x0]
1139; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
1140; CHECK-GI-NEXT:    str x8, [x1]
1141; CHECK-GI-NEXT:    ret
1142  %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %A)
1143  %tmp = getelementptr i16, ptr %A, i64 %inc
1144  store ptr %tmp, ptr %ptr
1145  ret { <4 x i16>, <4 x i16> } %ld2
1146}
1147
1148declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr)
1149
1150
1151define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(ptr %A, ptr %ptr) {
1152; CHECK-SD-LABEL: test_v4i32_post_imm_ld2:
1153; CHECK-SD:       ; %bb.0:
1154; CHECK-SD-NEXT:    ld2.4s { v0, v1 }, [x0], #32
1155; CHECK-SD-NEXT:    str x0, [x1]
1156; CHECK-SD-NEXT:    ret
1157;
1158; CHECK-GI-LABEL: test_v4i32_post_imm_ld2:
1159; CHECK-GI:       ; %bb.0:
1160; CHECK-GI-NEXT:    ld2.4s { v0, v1 }, [x0]
1161; CHECK-GI-NEXT:    add x8, x0, #32
1162; CHECK-GI-NEXT:    str x8, [x1]
1163; CHECK-GI-NEXT:    ret
1164  %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %A)
1165  %tmp = getelementptr i32, ptr %A, i32 8
1166  store ptr %tmp, ptr %ptr
1167  ret { <4 x i32>, <4 x i32> } %ld2
1168}
1169
1170define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
1171; CHECK-SD-LABEL: test_v4i32_post_reg_ld2:
1172; CHECK-SD:       ; %bb.0:
1173; CHECK-SD-NEXT:    lsl x8, x2, #2
1174; CHECK-SD-NEXT:    ld2.4s { v0, v1 }, [x0], x8
1175; CHECK-SD-NEXT:    str x0, [x1]
1176; CHECK-SD-NEXT:    ret
1177;
1178; CHECK-GI-LABEL: test_v4i32_post_reg_ld2:
1179; CHECK-GI:       ; %bb.0:
1180; CHECK-GI-NEXT:    ld2.4s { v0, v1 }, [x0]
1181; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
1182; CHECK-GI-NEXT:    str x8, [x1]
1183; CHECK-GI-NEXT:    ret
1184  %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %A)
1185  %tmp = getelementptr i32, ptr %A, i64 %inc
1186  store ptr %tmp, ptr %ptr
1187  ret { <4 x i32>, <4 x i32> } %ld2
1188}
1189
1190declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr)
1191
1192
1193define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(ptr %A, ptr %ptr) {
1194; CHECK-SD-LABEL: test_v2i32_post_imm_ld2:
1195; CHECK-SD:       ; %bb.0:
1196; CHECK-SD-NEXT:    ld2.2s { v0, v1 }, [x0], #16
1197; CHECK-SD-NEXT:    str x0, [x1]
1198; CHECK-SD-NEXT:    ret
1199;
1200; CHECK-GI-LABEL: test_v2i32_post_imm_ld2:
1201; CHECK-GI:       ; %bb.0:
1202; CHECK-GI-NEXT:    ld2.2s { v0, v1 }, [x0]
1203; CHECK-GI-NEXT:    add x8, x0, #16
1204; CHECK-GI-NEXT:    str x8, [x1]
1205; CHECK-GI-NEXT:    ret
1206  %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %A)
1207  %tmp = getelementptr i32, ptr %A, i32 4
1208  store ptr %tmp, ptr %ptr
1209  ret { <2 x i32>, <2 x i32> } %ld2
1210}
1211
1212define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
1213; CHECK-SD-LABEL: test_v2i32_post_reg_ld2:
1214; CHECK-SD:       ; %bb.0:
1215; CHECK-SD-NEXT:    lsl x8, x2, #2
1216; CHECK-SD-NEXT:    ld2.2s { v0, v1 }, [x0], x8
1217; CHECK-SD-NEXT:    str x0, [x1]
1218; CHECK-SD-NEXT:    ret
1219;
1220; CHECK-GI-LABEL: test_v2i32_post_reg_ld2:
1221; CHECK-GI:       ; %bb.0:
1222; CHECK-GI-NEXT:    ld2.2s { v0, v1 }, [x0]
1223; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
1224; CHECK-GI-NEXT:    str x8, [x1]
1225; CHECK-GI-NEXT:    ret
1226  %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %A)
1227  %tmp = getelementptr i32, ptr %A, i64 %inc
1228  store ptr %tmp, ptr %ptr
1229  ret { <2 x i32>, <2 x i32> } %ld2
1230}
1231
1232declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr)
1233
1234
1235define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(ptr %A, ptr %ptr) {
1236; CHECK-SD-LABEL: test_v2i64_post_imm_ld2:
1237; CHECK-SD:       ; %bb.0:
1238; CHECK-SD-NEXT:    ld2.2d { v0, v1 }, [x0], #32
1239; CHECK-SD-NEXT:    str x0, [x1]
1240; CHECK-SD-NEXT:    ret
1241;
1242; CHECK-GI-LABEL: test_v2i64_post_imm_ld2:
1243; CHECK-GI:       ; %bb.0:
1244; CHECK-GI-NEXT:    ld2.2d { v0, v1 }, [x0]
1245; CHECK-GI-NEXT:    add x8, x0, #32
1246; CHECK-GI-NEXT:    str x8, [x1]
1247; CHECK-GI-NEXT:    ret
1248  %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %A)
1249  %tmp = getelementptr i64, ptr %A, i32 4
1250  store ptr %tmp, ptr %ptr
1251  ret { <2 x i64>, <2 x i64> } %ld2
1252}
1253
1254define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
1255; CHECK-SD-LABEL: test_v2i64_post_reg_ld2:
1256; CHECK-SD:       ; %bb.0:
1257; CHECK-SD-NEXT:    lsl x8, x2, #3
1258; CHECK-SD-NEXT:    ld2.2d { v0, v1 }, [x0], x8
1259; CHECK-SD-NEXT:    str x0, [x1]
1260; CHECK-SD-NEXT:    ret
1261;
1262; CHECK-GI-LABEL: test_v2i64_post_reg_ld2:
1263; CHECK-GI:       ; %bb.0:
1264; CHECK-GI-NEXT:    ld2.2d { v0, v1 }, [x0]
1265; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
1266; CHECK-GI-NEXT:    str x8, [x1]
1267; CHECK-GI-NEXT:    ret
1268  %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %A)
1269  %tmp = getelementptr i64, ptr %A, i64 %inc
1270  store ptr %tmp, ptr %ptr
1271  ret { <2 x i64>, <2 x i64> } %ld2
1272}
1273
1274declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr)
1275
1276
1277define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(ptr %A, ptr %ptr) {
1278; CHECK-SD-LABEL: test_v1i64_post_imm_ld2:
1279; CHECK-SD:       ; %bb.0:
1280; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], #16
1281; CHECK-SD-NEXT:    str x0, [x1]
1282; CHECK-SD-NEXT:    ret
1283;
1284; CHECK-GI-LABEL: test_v1i64_post_imm_ld2:
1285; CHECK-GI:       ; %bb.0:
1286; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
1287; CHECK-GI-NEXT:    add x8, x0, #16
1288; CHECK-GI-NEXT:    str x8, [x1]
1289; CHECK-GI-NEXT:    ret
1290  %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %A)
1291  %tmp = getelementptr i64, ptr %A, i32 2
1292  store ptr %tmp, ptr %ptr
1293  ret { <1 x i64>, <1 x i64> } %ld2
1294}
1295
1296define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
1297; CHECK-SD-LABEL: test_v1i64_post_reg_ld2:
1298; CHECK-SD:       ; %bb.0:
1299; CHECK-SD-NEXT:    lsl x8, x2, #3
1300; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], x8
1301; CHECK-SD-NEXT:    str x0, [x1]
1302; CHECK-SD-NEXT:    ret
1303;
1304; CHECK-GI-LABEL: test_v1i64_post_reg_ld2:
1305; CHECK-GI:       ; %bb.0:
1306; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
1307; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
1308; CHECK-GI-NEXT:    str x8, [x1]
1309; CHECK-GI-NEXT:    ret
1310  %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %A)
1311  %tmp = getelementptr i64, ptr %A, i64 %inc
1312  store ptr %tmp, ptr %ptr
1313  ret { <1 x i64>, <1 x i64> } %ld2
1314}
1315
1316declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr)
1317
1318
1319define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(ptr %A, ptr %ptr) {
1320; CHECK-SD-LABEL: test_v4f32_post_imm_ld2:
1321; CHECK-SD:       ; %bb.0:
1322; CHECK-SD-NEXT:    ld2.4s { v0, v1 }, [x0], #32
1323; CHECK-SD-NEXT:    str x0, [x1]
1324; CHECK-SD-NEXT:    ret
1325;
1326; CHECK-GI-LABEL: test_v4f32_post_imm_ld2:
1327; CHECK-GI:       ; %bb.0:
1328; CHECK-GI-NEXT:    ld2.4s { v0, v1 }, [x0]
1329; CHECK-GI-NEXT:    add x8, x0, #32
1330; CHECK-GI-NEXT:    str x8, [x1]
1331; CHECK-GI-NEXT:    ret
1332  %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %A)
1333  %tmp = getelementptr float, ptr %A, i32 8
1334  store ptr %tmp, ptr %ptr
1335  ret { <4 x float>, <4 x float> } %ld2
1336}
1337
1338define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
1339; CHECK-SD-LABEL: test_v4f32_post_reg_ld2:
1340; CHECK-SD:       ; %bb.0:
1341; CHECK-SD-NEXT:    lsl x8, x2, #2
1342; CHECK-SD-NEXT:    ld2.4s { v0, v1 }, [x0], x8
1343; CHECK-SD-NEXT:    str x0, [x1]
1344; CHECK-SD-NEXT:    ret
1345;
1346; CHECK-GI-LABEL: test_v4f32_post_reg_ld2:
1347; CHECK-GI:       ; %bb.0:
1348; CHECK-GI-NEXT:    ld2.4s { v0, v1 }, [x0]
1349; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
1350; CHECK-GI-NEXT:    str x8, [x1]
1351; CHECK-GI-NEXT:    ret
1352  %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %A)
1353  %tmp = getelementptr float, ptr %A, i64 %inc
1354  store ptr %tmp, ptr %ptr
1355  ret { <4 x float>, <4 x float> } %ld2
1356}
1357
1358declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr)
1359
1360
1361define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(ptr %A, ptr %ptr) {
1362; CHECK-SD-LABEL: test_v2f32_post_imm_ld2:
1363; CHECK-SD:       ; %bb.0:
1364; CHECK-SD-NEXT:    ld2.2s { v0, v1 }, [x0], #16
1365; CHECK-SD-NEXT:    str x0, [x1]
1366; CHECK-SD-NEXT:    ret
1367;
1368; CHECK-GI-LABEL: test_v2f32_post_imm_ld2:
1369; CHECK-GI:       ; %bb.0:
1370; CHECK-GI-NEXT:    ld2.2s { v0, v1 }, [x0]
1371; CHECK-GI-NEXT:    add x8, x0, #16
1372; CHECK-GI-NEXT:    str x8, [x1]
1373; CHECK-GI-NEXT:    ret
1374  %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0(ptr %A)
1375  %tmp = getelementptr float, ptr %A, i32 4
1376  store ptr %tmp, ptr %ptr
1377  ret { <2 x float>, <2 x float> } %ld2
1378}
1379
1380define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
1381; CHECK-SD-LABEL: test_v2f32_post_reg_ld2:
1382; CHECK-SD:       ; %bb.0:
1383; CHECK-SD-NEXT:    lsl x8, x2, #2
1384; CHECK-SD-NEXT:    ld2.2s { v0, v1 }, [x0], x8
1385; CHECK-SD-NEXT:    str x0, [x1]
1386; CHECK-SD-NEXT:    ret
1387;
1388; CHECK-GI-LABEL: test_v2f32_post_reg_ld2:
1389; CHECK-GI:       ; %bb.0:
1390; CHECK-GI-NEXT:    ld2.2s { v0, v1 }, [x0]
1391; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
1392; CHECK-GI-NEXT:    str x8, [x1]
1393; CHECK-GI-NEXT:    ret
1394  %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0(ptr %A)
1395  %tmp = getelementptr float, ptr %A, i64 %inc
1396  store ptr %tmp, ptr %ptr
1397  ret { <2 x float>, <2 x float> } %ld2
1398}
1399
1400declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0(ptr)
1401
1402
1403define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(ptr %A, ptr %ptr) {
1404; CHECK-SD-LABEL: test_v2f64_post_imm_ld2:
1405; CHECK-SD:       ; %bb.0:
1406; CHECK-SD-NEXT:    ld2.2d { v0, v1 }, [x0], #32
1407; CHECK-SD-NEXT:    str x0, [x1]
1408; CHECK-SD-NEXT:    ret
1409;
1410; CHECK-GI-LABEL: test_v2f64_post_imm_ld2:
1411; CHECK-GI:       ; %bb.0:
1412; CHECK-GI-NEXT:    ld2.2d { v0, v1 }, [x0]
1413; CHECK-GI-NEXT:    add x8, x0, #32
1414; CHECK-GI-NEXT:    str x8, [x1]
1415; CHECK-GI-NEXT:    ret
1416  %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr %A)
1417  %tmp = getelementptr double, ptr %A, i32 4
1418  store ptr %tmp, ptr %ptr
1419  ret { <2 x double>, <2 x double> } %ld2
1420}
1421
1422define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
1423; CHECK-SD-LABEL: test_v2f64_post_reg_ld2:
1424; CHECK-SD:       ; %bb.0:
1425; CHECK-SD-NEXT:    lsl x8, x2, #3
1426; CHECK-SD-NEXT:    ld2.2d { v0, v1 }, [x0], x8
1427; CHECK-SD-NEXT:    str x0, [x1]
1428; CHECK-SD-NEXT:    ret
1429;
1430; CHECK-GI-LABEL: test_v2f64_post_reg_ld2:
1431; CHECK-GI:       ; %bb.0:
1432; CHECK-GI-NEXT:    ld2.2d { v0, v1 }, [x0]
1433; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
1434; CHECK-GI-NEXT:    str x8, [x1]
1435; CHECK-GI-NEXT:    ret
1436  %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr %A)
1437  %tmp = getelementptr double, ptr %A, i64 %inc
1438  store ptr %tmp, ptr %ptr
1439  ret { <2 x double>, <2 x double> } %ld2
1440}
1441
1442declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr)
1443
1444
1445define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(ptr %A, ptr %ptr) {
1446; CHECK-SD-LABEL: test_v1f64_post_imm_ld2:
1447; CHECK-SD:       ; %bb.0:
1448; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], #16
1449; CHECK-SD-NEXT:    str x0, [x1]
1450; CHECK-SD-NEXT:    ret
1451;
1452; CHECK-GI-LABEL: test_v1f64_post_imm_ld2:
1453; CHECK-GI:       ; %bb.0:
1454; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
1455; CHECK-GI-NEXT:    add x8, x0, #16
1456; CHECK-GI-NEXT:    str x8, [x1]
1457; CHECK-GI-NEXT:    ret
1458  %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr %A)
1459  %tmp = getelementptr double, ptr %A, i32 2
1460  store ptr %tmp, ptr %ptr
1461  ret { <1 x double>, <1 x double> } %ld2
1462}
1463
1464define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(ptr %A, ptr %ptr, i64 %inc) {
1465; CHECK-SD-LABEL: test_v1f64_post_reg_ld2:
1466; CHECK-SD:       ; %bb.0:
1467; CHECK-SD-NEXT:    lsl x8, x2, #3
1468; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], x8
1469; CHECK-SD-NEXT:    str x0, [x1]
1470; CHECK-SD-NEXT:    ret
1471;
1472; CHECK-GI-LABEL: test_v1f64_post_reg_ld2:
1473; CHECK-GI:       ; %bb.0:
1474; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
1475; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
1476; CHECK-GI-NEXT:    str x8, [x1]
1477; CHECK-GI-NEXT:    ret
1478  %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr %A)
1479  %tmp = getelementptr double, ptr %A, i64 %inc
1480  store ptr %tmp, ptr %ptr
1481  ret { <1 x double>, <1 x double> } %ld2
1482}
1483
1484declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr)
1485
1486
1487define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(ptr %A, ptr %ptr) {
1488; CHECK-SD-LABEL: test_v16i8_post_imm_ld3:
1489; CHECK-SD:       ; %bb.0:
1490; CHECK-SD-NEXT:    ld3.16b { v0, v1, v2 }, [x0], #48
1491; CHECK-SD-NEXT:    str x0, [x1]
1492; CHECK-SD-NEXT:    ret
1493;
1494; CHECK-GI-LABEL: test_v16i8_post_imm_ld3:
1495; CHECK-GI:       ; %bb.0:
1496; CHECK-GI-NEXT:    ld3.16b { v0, v1, v2 }, [x0]
1497; CHECK-GI-NEXT:    add x8, x0, #48
1498; CHECK-GI-NEXT:    str x8, [x1]
1499; CHECK-GI-NEXT:    ret
1500  %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %A)
1501  %tmp = getelementptr i8, ptr %A, i32 48
1502  store ptr %tmp, ptr %ptr
1503  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
1504}
1505
1506define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
1507; CHECK-SD-LABEL: test_v16i8_post_reg_ld3:
1508; CHECK-SD:       ; %bb.0:
1509; CHECK-SD-NEXT:    ld3.16b { v0, v1, v2 }, [x0], x2
1510; CHECK-SD-NEXT:    str x0, [x1]
1511; CHECK-SD-NEXT:    ret
1512;
1513; CHECK-GI-LABEL: test_v16i8_post_reg_ld3:
1514; CHECK-GI:       ; %bb.0:
1515; CHECK-GI-NEXT:    ld3.16b { v0, v1, v2 }, [x0]
1516; CHECK-GI-NEXT:    add x8, x0, x2
1517; CHECK-GI-NEXT:    str x8, [x1]
1518; CHECK-GI-NEXT:    ret
1519  %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %A)
1520  %tmp = getelementptr i8, ptr %A, i64 %inc
1521  store ptr %tmp, ptr %ptr
1522  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
1523}
1524
1525declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr)
1526
1527
1528define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(ptr %A, ptr %ptr) {
1529; CHECK-SD-LABEL: test_v8i8_post_imm_ld3:
1530; CHECK-SD:       ; %bb.0:
1531; CHECK-SD-NEXT:    ld3.8b { v0, v1, v2 }, [x0], #24
1532; CHECK-SD-NEXT:    str x0, [x1]
1533; CHECK-SD-NEXT:    ret
1534;
1535; CHECK-GI-LABEL: test_v8i8_post_imm_ld3:
1536; CHECK-GI:       ; %bb.0:
1537; CHECK-GI-NEXT:    ld3.8b { v0, v1, v2 }, [x0]
1538; CHECK-GI-NEXT:    add x8, x0, #24
1539; CHECK-GI-NEXT:    str x8, [x1]
1540; CHECK-GI-NEXT:    ret
1541  %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %A)
1542  %tmp = getelementptr i8, ptr %A, i32 24
1543  store ptr %tmp, ptr %ptr
1544  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
1545}
1546
1547define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
1548; CHECK-SD-LABEL: test_v8i8_post_reg_ld3:
1549; CHECK-SD:       ; %bb.0:
1550; CHECK-SD-NEXT:    ld3.8b { v0, v1, v2 }, [x0], x2
1551; CHECK-SD-NEXT:    str x0, [x1]
1552; CHECK-SD-NEXT:    ret
1553;
1554; CHECK-GI-LABEL: test_v8i8_post_reg_ld3:
1555; CHECK-GI:       ; %bb.0:
1556; CHECK-GI-NEXT:    ld3.8b { v0, v1, v2 }, [x0]
1557; CHECK-GI-NEXT:    add x8, x0, x2
1558; CHECK-GI-NEXT:    str x8, [x1]
1559; CHECK-GI-NEXT:    ret
1560  %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %A)
1561  %tmp = getelementptr i8, ptr %A, i64 %inc
1562  store ptr %tmp, ptr %ptr
1563  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
1564}
1565
1566declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr)
1567
1568
1569define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(ptr %A, ptr %ptr) {
1570; CHECK-SD-LABEL: test_v8i16_post_imm_ld3:
1571; CHECK-SD:       ; %bb.0:
1572; CHECK-SD-NEXT:    ld3.8h { v0, v1, v2 }, [x0], #48
1573; CHECK-SD-NEXT:    str x0, [x1]
1574; CHECK-SD-NEXT:    ret
1575;
1576; CHECK-GI-LABEL: test_v8i16_post_imm_ld3:
1577; CHECK-GI:       ; %bb.0:
1578; CHECK-GI-NEXT:    ld3.8h { v0, v1, v2 }, [x0]
1579; CHECK-GI-NEXT:    add x8, x0, #48
1580; CHECK-GI-NEXT:    str x8, [x1]
1581; CHECK-GI-NEXT:    ret
1582  %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %A)
1583  %tmp = getelementptr i16, ptr %A, i32 24
1584  store ptr %tmp, ptr %ptr
1585  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
1586}
1587
1588define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
1589; CHECK-SD-LABEL: test_v8i16_post_reg_ld3:
1590; CHECK-SD:       ; %bb.0:
1591; CHECK-SD-NEXT:    lsl x8, x2, #1
1592; CHECK-SD-NEXT:    ld3.8h { v0, v1, v2 }, [x0], x8
1593; CHECK-SD-NEXT:    str x0, [x1]
1594; CHECK-SD-NEXT:    ret
1595;
1596; CHECK-GI-LABEL: test_v8i16_post_reg_ld3:
1597; CHECK-GI:       ; %bb.0:
1598; CHECK-GI-NEXT:    ld3.8h { v0, v1, v2 }, [x0]
1599; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
1600; CHECK-GI-NEXT:    str x8, [x1]
1601; CHECK-GI-NEXT:    ret
1602  %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %A)
1603  %tmp = getelementptr i16, ptr %A, i64 %inc
1604  store ptr %tmp, ptr %ptr
1605  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
1606}
1607
1608declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr)
1609
1610
1611define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(ptr %A, ptr %ptr) {
1612; CHECK-SD-LABEL: test_v4i16_post_imm_ld3:
1613; CHECK-SD:       ; %bb.0:
1614; CHECK-SD-NEXT:    ld3.4h { v0, v1, v2 }, [x0], #24
1615; CHECK-SD-NEXT:    str x0, [x1]
1616; CHECK-SD-NEXT:    ret
1617;
1618; CHECK-GI-LABEL: test_v4i16_post_imm_ld3:
1619; CHECK-GI:       ; %bb.0:
1620; CHECK-GI-NEXT:    ld3.4h { v0, v1, v2 }, [x0]
1621; CHECK-GI-NEXT:    add x8, x0, #24
1622; CHECK-GI-NEXT:    str x8, [x1]
1623; CHECK-GI-NEXT:    ret
1624  %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %A)
1625  %tmp = getelementptr i16, ptr %A, i32 12
1626  store ptr %tmp, ptr %ptr
1627  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
1628}
1629
1630define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
1631; CHECK-SD-LABEL: test_v4i16_post_reg_ld3:
1632; CHECK-SD:       ; %bb.0:
1633; CHECK-SD-NEXT:    lsl x8, x2, #1
1634; CHECK-SD-NEXT:    ld3.4h { v0, v1, v2 }, [x0], x8
1635; CHECK-SD-NEXT:    str x0, [x1]
1636; CHECK-SD-NEXT:    ret
1637;
1638; CHECK-GI-LABEL: test_v4i16_post_reg_ld3:
1639; CHECK-GI:       ; %bb.0:
1640; CHECK-GI-NEXT:    ld3.4h { v0, v1, v2 }, [x0]
1641; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
1642; CHECK-GI-NEXT:    str x8, [x1]
1643; CHECK-GI-NEXT:    ret
1644  %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %A)
1645  %tmp = getelementptr i16, ptr %A, i64 %inc
1646  store ptr %tmp, ptr %ptr
1647  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
1648}
1649
1650declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr)
1651
1652
1653define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(ptr %A, ptr %ptr) {
1654; CHECK-SD-LABEL: test_v4i32_post_imm_ld3:
1655; CHECK-SD:       ; %bb.0:
1656; CHECK-SD-NEXT:    ld3.4s { v0, v1, v2 }, [x0], #48
1657; CHECK-SD-NEXT:    str x0, [x1]
1658; CHECK-SD-NEXT:    ret
1659;
1660; CHECK-GI-LABEL: test_v4i32_post_imm_ld3:
1661; CHECK-GI:       ; %bb.0:
1662; CHECK-GI-NEXT:    ld3.4s { v0, v1, v2 }, [x0]
1663; CHECK-GI-NEXT:    add x8, x0, #48
1664; CHECK-GI-NEXT:    str x8, [x1]
1665; CHECK-GI-NEXT:    ret
1666  %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %A)
1667  %tmp = getelementptr i32, ptr %A, i32 12
1668  store ptr %tmp, ptr %ptr
1669  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
1670}
1671
1672define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
1673; CHECK-SD-LABEL: test_v4i32_post_reg_ld3:
1674; CHECK-SD:       ; %bb.0:
1675; CHECK-SD-NEXT:    lsl x8, x2, #2
1676; CHECK-SD-NEXT:    ld3.4s { v0, v1, v2 }, [x0], x8
1677; CHECK-SD-NEXT:    str x0, [x1]
1678; CHECK-SD-NEXT:    ret
1679;
1680; CHECK-GI-LABEL: test_v4i32_post_reg_ld3:
1681; CHECK-GI:       ; %bb.0:
1682; CHECK-GI-NEXT:    ld3.4s { v0, v1, v2 }, [x0]
1683; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
1684; CHECK-GI-NEXT:    str x8, [x1]
1685; CHECK-GI-NEXT:    ret
1686  %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %A)
1687  %tmp = getelementptr i32, ptr %A, i64 %inc
1688  store ptr %tmp, ptr %ptr
1689  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
1690}
1691
1692declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr)
1693
1694
1695define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(ptr %A, ptr %ptr) {
1696; CHECK-SD-LABEL: test_v2i32_post_imm_ld3:
1697; CHECK-SD:       ; %bb.0:
1698; CHECK-SD-NEXT:    ld3.2s { v0, v1, v2 }, [x0], #24
1699; CHECK-SD-NEXT:    str x0, [x1]
1700; CHECK-SD-NEXT:    ret
1701;
1702; CHECK-GI-LABEL: test_v2i32_post_imm_ld3:
1703; CHECK-GI:       ; %bb.0:
1704; CHECK-GI-NEXT:    ld3.2s { v0, v1, v2 }, [x0]
1705; CHECK-GI-NEXT:    add x8, x0, #24
1706; CHECK-GI-NEXT:    str x8, [x1]
1707; CHECK-GI-NEXT:    ret
1708  %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %A)
1709  %tmp = getelementptr i32, ptr %A, i32 6
1710  store ptr %tmp, ptr %ptr
1711  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
1712}
1713
1714define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
1715; CHECK-SD-LABEL: test_v2i32_post_reg_ld3:
1716; CHECK-SD:       ; %bb.0:
1717; CHECK-SD-NEXT:    lsl x8, x2, #2
1718; CHECK-SD-NEXT:    ld3.2s { v0, v1, v2 }, [x0], x8
1719; CHECK-SD-NEXT:    str x0, [x1]
1720; CHECK-SD-NEXT:    ret
1721;
1722; CHECK-GI-LABEL: test_v2i32_post_reg_ld3:
1723; CHECK-GI:       ; %bb.0:
1724; CHECK-GI-NEXT:    ld3.2s { v0, v1, v2 }, [x0]
1725; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
1726; CHECK-GI-NEXT:    str x8, [x1]
1727; CHECK-GI-NEXT:    ret
1728  %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %A)
1729  %tmp = getelementptr i32, ptr %A, i64 %inc
1730  store ptr %tmp, ptr %ptr
1731  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
1732}
1733
1734declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr)
1735
1736
1737define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(ptr %A, ptr %ptr) {
1738; CHECK-SD-LABEL: test_v2i64_post_imm_ld3:
1739; CHECK-SD:       ; %bb.0:
1740; CHECK-SD-NEXT:    ld3.2d { v0, v1, v2 }, [x0], #48
1741; CHECK-SD-NEXT:    str x0, [x1]
1742; CHECK-SD-NEXT:    ret
1743;
1744; CHECK-GI-LABEL: test_v2i64_post_imm_ld3:
1745; CHECK-GI:       ; %bb.0:
1746; CHECK-GI-NEXT:    ld3.2d { v0, v1, v2 }, [x0]
1747; CHECK-GI-NEXT:    add x8, x0, #48
1748; CHECK-GI-NEXT:    str x8, [x1]
1749; CHECK-GI-NEXT:    ret
1750  %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %A)
1751  %tmp = getelementptr i64, ptr %A, i32 6
1752  store ptr %tmp, ptr %ptr
1753  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
1754}
1755
1756define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
1757; CHECK-SD-LABEL: test_v2i64_post_reg_ld3:
1758; CHECK-SD:       ; %bb.0:
1759; CHECK-SD-NEXT:    lsl x8, x2, #3
1760; CHECK-SD-NEXT:    ld3.2d { v0, v1, v2 }, [x0], x8
1761; CHECK-SD-NEXT:    str x0, [x1]
1762; CHECK-SD-NEXT:    ret
1763;
1764; CHECK-GI-LABEL: test_v2i64_post_reg_ld3:
1765; CHECK-GI:       ; %bb.0:
1766; CHECK-GI-NEXT:    ld3.2d { v0, v1, v2 }, [x0]
1767; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
1768; CHECK-GI-NEXT:    str x8, [x1]
1769; CHECK-GI-NEXT:    ret
1770  %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %A)
1771  %tmp = getelementptr i64, ptr %A, i64 %inc
1772  store ptr %tmp, ptr %ptr
1773  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
1774}
1775
1776declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr)
1777
1778
1779define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(ptr %A, ptr %ptr) {
1780; CHECK-SD-LABEL: test_v1i64_post_imm_ld3:
1781; CHECK-SD:       ; %bb.0:
1782; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], #24
1783; CHECK-SD-NEXT:    str x0, [x1]
1784; CHECK-SD-NEXT:    ret
1785;
1786; CHECK-GI-LABEL: test_v1i64_post_imm_ld3:
1787; CHECK-GI:       ; %bb.0:
1788; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
1789; CHECK-GI-NEXT:    add x8, x0, #24
1790; CHECK-GI-NEXT:    str x8, [x1]
1791; CHECK-GI-NEXT:    ret
1792  %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %A)
1793  %tmp = getelementptr i64, ptr %A, i32 3
1794  store ptr %tmp, ptr %ptr
1795  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
1796}
1797
1798define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
1799; CHECK-SD-LABEL: test_v1i64_post_reg_ld3:
1800; CHECK-SD:       ; %bb.0:
1801; CHECK-SD-NEXT:    lsl x8, x2, #3
1802; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], x8
1803; CHECK-SD-NEXT:    str x0, [x1]
1804; CHECK-SD-NEXT:    ret
1805;
1806; CHECK-GI-LABEL: test_v1i64_post_reg_ld3:
1807; CHECK-GI:       ; %bb.0:
1808; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
1809; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
1810; CHECK-GI-NEXT:    str x8, [x1]
1811; CHECK-GI-NEXT:    ret
1812  %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %A)
1813  %tmp = getelementptr i64, ptr %A, i64 %inc
1814  store ptr %tmp, ptr %ptr
1815  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
1816}
1817
1818declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr)
1819
1820
1821define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(ptr %A, ptr %ptr) {
1822; CHECK-SD-LABEL: test_v4f32_post_imm_ld3:
1823; CHECK-SD:       ; %bb.0:
1824; CHECK-SD-NEXT:    ld3.4s { v0, v1, v2 }, [x0], #48
1825; CHECK-SD-NEXT:    str x0, [x1]
1826; CHECK-SD-NEXT:    ret
1827;
1828; CHECK-GI-LABEL: test_v4f32_post_imm_ld3:
1829; CHECK-GI:       ; %bb.0:
1830; CHECK-GI-NEXT:    ld3.4s { v0, v1, v2 }, [x0]
1831; CHECK-GI-NEXT:    add x8, x0, #48
1832; CHECK-GI-NEXT:    str x8, [x1]
1833; CHECK-GI-NEXT:    ret
1834  %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr %A)
1835  %tmp = getelementptr float, ptr %A, i32 12
1836  store ptr %tmp, ptr %ptr
1837  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
1838}
1839
1840define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
1841; CHECK-SD-LABEL: test_v4f32_post_reg_ld3:
1842; CHECK-SD:       ; %bb.0:
1843; CHECK-SD-NEXT:    lsl x8, x2, #2
1844; CHECK-SD-NEXT:    ld3.4s { v0, v1, v2 }, [x0], x8
1845; CHECK-SD-NEXT:    str x0, [x1]
1846; CHECK-SD-NEXT:    ret
1847;
1848; CHECK-GI-LABEL: test_v4f32_post_reg_ld3:
1849; CHECK-GI:       ; %bb.0:
1850; CHECK-GI-NEXT:    ld3.4s { v0, v1, v2 }, [x0]
1851; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
1852; CHECK-GI-NEXT:    str x8, [x1]
1853; CHECK-GI-NEXT:    ret
1854  %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr %A)
1855  %tmp = getelementptr float, ptr %A, i64 %inc
1856  store ptr %tmp, ptr %ptr
1857  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
1858}
1859
1860declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr)
1861
1862
1863define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(ptr %A, ptr %ptr) {
1864; CHECK-SD-LABEL: test_v2f32_post_imm_ld3:
1865; CHECK-SD:       ; %bb.0:
1866; CHECK-SD-NEXT:    ld3.2s { v0, v1, v2 }, [x0], #24
1867; CHECK-SD-NEXT:    str x0, [x1]
1868; CHECK-SD-NEXT:    ret
1869;
1870; CHECK-GI-LABEL: test_v2f32_post_imm_ld3:
1871; CHECK-GI:       ; %bb.0:
1872; CHECK-GI-NEXT:    ld3.2s { v0, v1, v2 }, [x0]
1873; CHECK-GI-NEXT:    add x8, x0, #24
1874; CHECK-GI-NEXT:    str x8, [x1]
1875; CHECK-GI-NEXT:    ret
1876  %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0(ptr %A)
1877  %tmp = getelementptr float, ptr %A, i32 6
1878  store ptr %tmp, ptr %ptr
1879  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
1880}
1881
1882define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
1883; CHECK-SD-LABEL: test_v2f32_post_reg_ld3:
1884; CHECK-SD:       ; %bb.0:
1885; CHECK-SD-NEXT:    lsl x8, x2, #2
1886; CHECK-SD-NEXT:    ld3.2s { v0, v1, v2 }, [x0], x8
1887; CHECK-SD-NEXT:    str x0, [x1]
1888; CHECK-SD-NEXT:    ret
1889;
1890; CHECK-GI-LABEL: test_v2f32_post_reg_ld3:
1891; CHECK-GI:       ; %bb.0:
1892; CHECK-GI-NEXT:    ld3.2s { v0, v1, v2 }, [x0]
1893; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
1894; CHECK-GI-NEXT:    str x8, [x1]
1895; CHECK-GI-NEXT:    ret
1896  %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0(ptr %A)
1897  %tmp = getelementptr float, ptr %A, i64 %inc
1898  store ptr %tmp, ptr %ptr
1899  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
1900}
1901
1902declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0(ptr)
1903
1904
1905define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(ptr %A, ptr %ptr) {
1906; CHECK-SD-LABEL: test_v2f64_post_imm_ld3:
1907; CHECK-SD:       ; %bb.0:
1908; CHECK-SD-NEXT:    ld3.2d { v0, v1, v2 }, [x0], #48
1909; CHECK-SD-NEXT:    str x0, [x1]
1910; CHECK-SD-NEXT:    ret
1911;
1912; CHECK-GI-LABEL: test_v2f64_post_imm_ld3:
1913; CHECK-GI:       ; %bb.0:
1914; CHECK-GI-NEXT:    ld3.2d { v0, v1, v2 }, [x0]
1915; CHECK-GI-NEXT:    add x8, x0, #48
1916; CHECK-GI-NEXT:    str x8, [x1]
1917; CHECK-GI-NEXT:    ret
1918  %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0(ptr %A)
1919  %tmp = getelementptr double, ptr %A, i32 6
1920  store ptr %tmp, ptr %ptr
1921  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
1922}
1923
1924define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
1925; CHECK-SD-LABEL: test_v2f64_post_reg_ld3:
1926; CHECK-SD:       ; %bb.0:
1927; CHECK-SD-NEXT:    lsl x8, x2, #3
1928; CHECK-SD-NEXT:    ld3.2d { v0, v1, v2 }, [x0], x8
1929; CHECK-SD-NEXT:    str x0, [x1]
1930; CHECK-SD-NEXT:    ret
1931;
1932; CHECK-GI-LABEL: test_v2f64_post_reg_ld3:
1933; CHECK-GI:       ; %bb.0:
1934; CHECK-GI-NEXT:    ld3.2d { v0, v1, v2 }, [x0]
1935; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
1936; CHECK-GI-NEXT:    str x8, [x1]
1937; CHECK-GI-NEXT:    ret
1938  %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0(ptr %A)
1939  %tmp = getelementptr double, ptr %A, i64 %inc
1940  store ptr %tmp, ptr %ptr
1941  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
1942}
1943
1944declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0(ptr)
1945
1946
1947define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(ptr %A, ptr %ptr) {
1948; CHECK-SD-LABEL: test_v1f64_post_imm_ld3:
1949; CHECK-SD:       ; %bb.0:
1950; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], #24
1951; CHECK-SD-NEXT:    str x0, [x1]
1952; CHECK-SD-NEXT:    ret
1953;
1954; CHECK-GI-LABEL: test_v1f64_post_imm_ld3:
1955; CHECK-GI:       ; %bb.0:
1956; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
1957; CHECK-GI-NEXT:    add x8, x0, #24
1958; CHECK-GI-NEXT:    str x8, [x1]
1959; CHECK-GI-NEXT:    ret
1960  %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0(ptr %A)
1961  %tmp = getelementptr double, ptr %A, i32 3
1962  store ptr %tmp, ptr %ptr
1963  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
1964}
1965
1966define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(ptr %A, ptr %ptr, i64 %inc) {
1967; CHECK-SD-LABEL: test_v1f64_post_reg_ld3:
1968; CHECK-SD:       ; %bb.0:
1969; CHECK-SD-NEXT:    lsl x8, x2, #3
1970; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], x8
1971; CHECK-SD-NEXT:    str x0, [x1]
1972; CHECK-SD-NEXT:    ret
1973;
1974; CHECK-GI-LABEL: test_v1f64_post_reg_ld3:
1975; CHECK-GI:       ; %bb.0:
1976; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
1977; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
1978; CHECK-GI-NEXT:    str x8, [x1]
1979; CHECK-GI-NEXT:    ret
1980  %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0(ptr %A)
1981  %tmp = getelementptr double, ptr %A, i64 %inc
1982  store ptr %tmp, ptr %ptr
1983  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
1984}
1985
1986declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0(ptr)
1987
1988
1989define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(ptr %A, ptr %ptr) {
1990; CHECK-SD-LABEL: test_v16i8_post_imm_ld4:
1991; CHECK-SD:       ; %bb.0:
1992; CHECK-SD-NEXT:    ld4.16b { v0, v1, v2, v3 }, [x0], #64
1993; CHECK-SD-NEXT:    str x0, [x1]
1994; CHECK-SD-NEXT:    ret
1995;
1996; CHECK-GI-LABEL: test_v16i8_post_imm_ld4:
1997; CHECK-GI:       ; %bb.0:
1998; CHECK-GI-NEXT:    ld4.16b { v0, v1, v2, v3 }, [x0]
1999; CHECK-GI-NEXT:    add x8, x0, #64
2000; CHECK-GI-NEXT:    str x8, [x1]
2001; CHECK-GI-NEXT:    ret
2002  %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %A)
2003  %tmp = getelementptr i8, ptr %A, i32 64
2004  store ptr %tmp, ptr %ptr
2005  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
2006}
2007
2008define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
2009; CHECK-SD-LABEL: test_v16i8_post_reg_ld4:
2010; CHECK-SD:       ; %bb.0:
2011; CHECK-SD-NEXT:    ld4.16b { v0, v1, v2, v3 }, [x0], x2
2012; CHECK-SD-NEXT:    str x0, [x1]
2013; CHECK-SD-NEXT:    ret
2014;
2015; CHECK-GI-LABEL: test_v16i8_post_reg_ld4:
2016; CHECK-GI:       ; %bb.0:
2017; CHECK-GI-NEXT:    ld4.16b { v0, v1, v2, v3 }, [x0]
2018; CHECK-GI-NEXT:    add x8, x0, x2
2019; CHECK-GI-NEXT:    str x8, [x1]
2020; CHECK-GI-NEXT:    ret
2021  %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %A)
2022  %tmp = getelementptr i8, ptr %A, i64 %inc
2023  store ptr %tmp, ptr %ptr
2024  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
2025}
2026
2027declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr)
2028
2029
2030define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(ptr %A, ptr %ptr) {
2031; CHECK-SD-LABEL: test_v8i8_post_imm_ld4:
2032; CHECK-SD:       ; %bb.0:
2033; CHECK-SD-NEXT:    ld4.8b { v0, v1, v2, v3 }, [x0], #32
2034; CHECK-SD-NEXT:    str x0, [x1]
2035; CHECK-SD-NEXT:    ret
2036;
2037; CHECK-GI-LABEL: test_v8i8_post_imm_ld4:
2038; CHECK-GI:       ; %bb.0:
2039; CHECK-GI-NEXT:    ld4.8b { v0, v1, v2, v3 }, [x0]
2040; CHECK-GI-NEXT:    add x8, x0, #32
2041; CHECK-GI-NEXT:    str x8, [x1]
2042; CHECK-GI-NEXT:    ret
2043  %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %A)
2044  %tmp = getelementptr i8, ptr %A, i32 32
2045  store ptr %tmp, ptr %ptr
2046  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
2047}
2048
2049define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
2050; CHECK-SD-LABEL: test_v8i8_post_reg_ld4:
2051; CHECK-SD:       ; %bb.0:
2052; CHECK-SD-NEXT:    ld4.8b { v0, v1, v2, v3 }, [x0], x2
2053; CHECK-SD-NEXT:    str x0, [x1]
2054; CHECK-SD-NEXT:    ret
2055;
2056; CHECK-GI-LABEL: test_v8i8_post_reg_ld4:
2057; CHECK-GI:       ; %bb.0:
2058; CHECK-GI-NEXT:    ld4.8b { v0, v1, v2, v3 }, [x0]
2059; CHECK-GI-NEXT:    add x8, x0, x2
2060; CHECK-GI-NEXT:    str x8, [x1]
2061; CHECK-GI-NEXT:    ret
2062  %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %A)
2063  %tmp = getelementptr i8, ptr %A, i64 %inc
2064  store ptr %tmp, ptr %ptr
2065  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
2066}
2067
2068declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr)
2069
2070
2071define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(ptr %A, ptr %ptr) {
2072; CHECK-SD-LABEL: test_v8i16_post_imm_ld4:
2073; CHECK-SD:       ; %bb.0:
2074; CHECK-SD-NEXT:    ld4.8h { v0, v1, v2, v3 }, [x0], #64
2075; CHECK-SD-NEXT:    str x0, [x1]
2076; CHECK-SD-NEXT:    ret
2077;
2078; CHECK-GI-LABEL: test_v8i16_post_imm_ld4:
2079; CHECK-GI:       ; %bb.0:
2080; CHECK-GI-NEXT:    ld4.8h { v0, v1, v2, v3 }, [x0]
2081; CHECK-GI-NEXT:    add x8, x0, #64
2082; CHECK-GI-NEXT:    str x8, [x1]
2083; CHECK-GI-NEXT:    ret
2084  %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %A)
2085  %tmp = getelementptr i16, ptr %A, i32 32
2086  store ptr %tmp, ptr %ptr
2087  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
2088}
2089
2090define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
2091; CHECK-SD-LABEL: test_v8i16_post_reg_ld4:
2092; CHECK-SD:       ; %bb.0:
2093; CHECK-SD-NEXT:    lsl x8, x2, #1
2094; CHECK-SD-NEXT:    ld4.8h { v0, v1, v2, v3 }, [x0], x8
2095; CHECK-SD-NEXT:    str x0, [x1]
2096; CHECK-SD-NEXT:    ret
2097;
2098; CHECK-GI-LABEL: test_v8i16_post_reg_ld4:
2099; CHECK-GI:       ; %bb.0:
2100; CHECK-GI-NEXT:    ld4.8h { v0, v1, v2, v3 }, [x0]
2101; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
2102; CHECK-GI-NEXT:    str x8, [x1]
2103; CHECK-GI-NEXT:    ret
2104  %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %A)
2105  %tmp = getelementptr i16, ptr %A, i64 %inc
2106  store ptr %tmp, ptr %ptr
2107  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
2108}
2109
2110declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr)
2111
2112
2113define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(ptr %A, ptr %ptr) {
2114; CHECK-SD-LABEL: test_v4i16_post_imm_ld4:
2115; CHECK-SD:       ; %bb.0:
2116; CHECK-SD-NEXT:    ld4.4h { v0, v1, v2, v3 }, [x0], #32
2117; CHECK-SD-NEXT:    str x0, [x1]
2118; CHECK-SD-NEXT:    ret
2119;
2120; CHECK-GI-LABEL: test_v4i16_post_imm_ld4:
2121; CHECK-GI:       ; %bb.0:
2122; CHECK-GI-NEXT:    ld4.4h { v0, v1, v2, v3 }, [x0]
2123; CHECK-GI-NEXT:    add x8, x0, #32
2124; CHECK-GI-NEXT:    str x8, [x1]
2125; CHECK-GI-NEXT:    ret
2126  %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %A)
2127  %tmp = getelementptr i16, ptr %A, i32 16
2128  store ptr %tmp, ptr %ptr
2129  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
2130}
2131
2132define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
2133; CHECK-SD-LABEL: test_v4i16_post_reg_ld4:
2134; CHECK-SD:       ; %bb.0:
2135; CHECK-SD-NEXT:    lsl x8, x2, #1
2136; CHECK-SD-NEXT:    ld4.4h { v0, v1, v2, v3 }, [x0], x8
2137; CHECK-SD-NEXT:    str x0, [x1]
2138; CHECK-SD-NEXT:    ret
2139;
2140; CHECK-GI-LABEL: test_v4i16_post_reg_ld4:
2141; CHECK-GI:       ; %bb.0:
2142; CHECK-GI-NEXT:    ld4.4h { v0, v1, v2, v3 }, [x0]
2143; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
2144; CHECK-GI-NEXT:    str x8, [x1]
2145; CHECK-GI-NEXT:    ret
2146  %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %A)
2147  %tmp = getelementptr i16, ptr %A, i64 %inc
2148  store ptr %tmp, ptr %ptr
2149  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
2150}
2151
2152declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr)
2153
2154
2155define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(ptr %A, ptr %ptr) {
2156; CHECK-SD-LABEL: test_v4i32_post_imm_ld4:
2157; CHECK-SD:       ; %bb.0:
2158; CHECK-SD-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0], #64
2159; CHECK-SD-NEXT:    str x0, [x1]
2160; CHECK-SD-NEXT:    ret
2161;
2162; CHECK-GI-LABEL: test_v4i32_post_imm_ld4:
2163; CHECK-GI:       ; %bb.0:
2164; CHECK-GI-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0]
2165; CHECK-GI-NEXT:    add x8, x0, #64
2166; CHECK-GI-NEXT:    str x8, [x1]
2167; CHECK-GI-NEXT:    ret
2168  %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %A)
2169  %tmp = getelementptr i32, ptr %A, i32 16
2170  store ptr %tmp, ptr %ptr
2171  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
2172}
2173
2174define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
2175; CHECK-SD-LABEL: test_v4i32_post_reg_ld4:
2176; CHECK-SD:       ; %bb.0:
2177; CHECK-SD-NEXT:    lsl x8, x2, #2
2178; CHECK-SD-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0], x8
2179; CHECK-SD-NEXT:    str x0, [x1]
2180; CHECK-SD-NEXT:    ret
2181;
2182; CHECK-GI-LABEL: test_v4i32_post_reg_ld4:
2183; CHECK-GI:       ; %bb.0:
2184; CHECK-GI-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0]
2185; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
2186; CHECK-GI-NEXT:    str x8, [x1]
2187; CHECK-GI-NEXT:    ret
2188  %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %A)
2189  %tmp = getelementptr i32, ptr %A, i64 %inc
2190  store ptr %tmp, ptr %ptr
2191  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
2192}
2193
2194declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr)
2195
2196
2197define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(ptr %A, ptr %ptr) {
2198; CHECK-SD-LABEL: test_v2i32_post_imm_ld4:
2199; CHECK-SD:       ; %bb.0:
2200; CHECK-SD-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0], #32
2201; CHECK-SD-NEXT:    str x0, [x1]
2202; CHECK-SD-NEXT:    ret
2203;
2204; CHECK-GI-LABEL: test_v2i32_post_imm_ld4:
2205; CHECK-GI:       ; %bb.0:
2206; CHECK-GI-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0]
2207; CHECK-GI-NEXT:    add x8, x0, #32
2208; CHECK-GI-NEXT:    str x8, [x1]
2209; CHECK-GI-NEXT:    ret
2210  %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %A)
2211  %tmp = getelementptr i32, ptr %A, i32 8
2212  store ptr %tmp, ptr %ptr
2213  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
2214}
2215
2216define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
2217; CHECK-SD-LABEL: test_v2i32_post_reg_ld4:
2218; CHECK-SD:       ; %bb.0:
2219; CHECK-SD-NEXT:    lsl x8, x2, #2
2220; CHECK-SD-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0], x8
2221; CHECK-SD-NEXT:    str x0, [x1]
2222; CHECK-SD-NEXT:    ret
2223;
2224; CHECK-GI-LABEL: test_v2i32_post_reg_ld4:
2225; CHECK-GI:       ; %bb.0:
2226; CHECK-GI-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0]
2227; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
2228; CHECK-GI-NEXT:    str x8, [x1]
2229; CHECK-GI-NEXT:    ret
2230  %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %A)
2231  %tmp = getelementptr i32, ptr %A, i64 %inc
2232  store ptr %tmp, ptr %ptr
2233  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
2234}
2235
2236declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr)
2237
2238
2239define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(ptr %A, ptr %ptr) {
2240; CHECK-SD-LABEL: test_v2i64_post_imm_ld4:
2241; CHECK-SD:       ; %bb.0:
2242; CHECK-SD-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0], #64
2243; CHECK-SD-NEXT:    str x0, [x1]
2244; CHECK-SD-NEXT:    ret
2245;
2246; CHECK-GI-LABEL: test_v2i64_post_imm_ld4:
2247; CHECK-GI:       ; %bb.0:
2248; CHECK-GI-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0]
2249; CHECK-GI-NEXT:    add x8, x0, #64
2250; CHECK-GI-NEXT:    str x8, [x1]
2251; CHECK-GI-NEXT:    ret
2252  %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %A)
2253  %tmp = getelementptr i64, ptr %A, i32 8
2254  store ptr %tmp, ptr %ptr
2255  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
2256}
2257
2258define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
2259; CHECK-SD-LABEL: test_v2i64_post_reg_ld4:
2260; CHECK-SD:       ; %bb.0:
2261; CHECK-SD-NEXT:    lsl x8, x2, #3
2262; CHECK-SD-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0], x8
2263; CHECK-SD-NEXT:    str x0, [x1]
2264; CHECK-SD-NEXT:    ret
2265;
2266; CHECK-GI-LABEL: test_v2i64_post_reg_ld4:
2267; CHECK-GI:       ; %bb.0:
2268; CHECK-GI-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0]
2269; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
2270; CHECK-GI-NEXT:    str x8, [x1]
2271; CHECK-GI-NEXT:    ret
2272  %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %A)
2273  %tmp = getelementptr i64, ptr %A, i64 %inc
2274  store ptr %tmp, ptr %ptr
2275  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
2276}
2277
2278declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr)
2279
2280
2281define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(ptr %A, ptr %ptr) {
2282; CHECK-SD-LABEL: test_v1i64_post_imm_ld4:
2283; CHECK-SD:       ; %bb.0:
2284; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], #32
2285; CHECK-SD-NEXT:    str x0, [x1]
2286; CHECK-SD-NEXT:    ret
2287;
2288; CHECK-GI-LABEL: test_v1i64_post_imm_ld4:
2289; CHECK-GI:       ; %bb.0:
2290; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
2291; CHECK-GI-NEXT:    add x8, x0, #32
2292; CHECK-GI-NEXT:    str x8, [x1]
2293; CHECK-GI-NEXT:    ret
2294  %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %A)
2295  %tmp = getelementptr i64, ptr %A, i32 4
2296  store ptr %tmp, ptr %ptr
2297  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
2298}
2299
2300define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
2301; CHECK-SD-LABEL: test_v1i64_post_reg_ld4:
2302; CHECK-SD:       ; %bb.0:
2303; CHECK-SD-NEXT:    lsl x8, x2, #3
2304; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], x8
2305; CHECK-SD-NEXT:    str x0, [x1]
2306; CHECK-SD-NEXT:    ret
2307;
2308; CHECK-GI-LABEL: test_v1i64_post_reg_ld4:
2309; CHECK-GI:       ; %bb.0:
2310; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
2311; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
2312; CHECK-GI-NEXT:    str x8, [x1]
2313; CHECK-GI-NEXT:    ret
2314  %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %A)
2315  %tmp = getelementptr i64, ptr %A, i64 %inc
2316  store ptr %tmp, ptr %ptr
2317  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
2318}
2319
2320declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr)
2321
2322
2323define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(ptr %A, ptr %ptr) {
2324; CHECK-SD-LABEL: test_v4f32_post_imm_ld4:
2325; CHECK-SD:       ; %bb.0:
2326; CHECK-SD-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0], #64
2327; CHECK-SD-NEXT:    str x0, [x1]
2328; CHECK-SD-NEXT:    ret
2329;
2330; CHECK-GI-LABEL: test_v4f32_post_imm_ld4:
2331; CHECK-GI:       ; %bb.0:
2332; CHECK-GI-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0]
2333; CHECK-GI-NEXT:    add x8, x0, #64
2334; CHECK-GI-NEXT:    str x8, [x1]
2335; CHECK-GI-NEXT:    ret
2336  %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0(ptr %A)
2337  %tmp = getelementptr float, ptr %A, i32 16
2338  store ptr %tmp, ptr %ptr
2339  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
2340}
2341
2342define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
2343; CHECK-SD-LABEL: test_v4f32_post_reg_ld4:
2344; CHECK-SD:       ; %bb.0:
2345; CHECK-SD-NEXT:    lsl x8, x2, #2
2346; CHECK-SD-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0], x8
2347; CHECK-SD-NEXT:    str x0, [x1]
2348; CHECK-SD-NEXT:    ret
2349;
2350; CHECK-GI-LABEL: test_v4f32_post_reg_ld4:
2351; CHECK-GI:       ; %bb.0:
2352; CHECK-GI-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0]
2353; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
2354; CHECK-GI-NEXT:    str x8, [x1]
2355; CHECK-GI-NEXT:    ret
2356  %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0(ptr %A)
2357  %tmp = getelementptr float, ptr %A, i64 %inc
2358  store ptr %tmp, ptr %ptr
2359  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
2360}
2361
2362declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0(ptr)
2363
2364
2365define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(ptr %A, ptr %ptr) {
2366; CHECK-SD-LABEL: test_v2f32_post_imm_ld4:
2367; CHECK-SD:       ; %bb.0:
2368; CHECK-SD-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0], #32
2369; CHECK-SD-NEXT:    str x0, [x1]
2370; CHECK-SD-NEXT:    ret
2371;
2372; CHECK-GI-LABEL: test_v2f32_post_imm_ld4:
2373; CHECK-GI:       ; %bb.0:
2374; CHECK-GI-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0]
2375; CHECK-GI-NEXT:    add x8, x0, #32
2376; CHECK-GI-NEXT:    str x8, [x1]
2377; CHECK-GI-NEXT:    ret
2378  %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0(ptr %A)
2379  %tmp = getelementptr float, ptr %A, i32 8
2380  store ptr %tmp, ptr %ptr
2381  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
2382}
2383
2384define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
2385; CHECK-SD-LABEL: test_v2f32_post_reg_ld4:
2386; CHECK-SD:       ; %bb.0:
2387; CHECK-SD-NEXT:    lsl x8, x2, #2
2388; CHECK-SD-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0], x8
2389; CHECK-SD-NEXT:    str x0, [x1]
2390; CHECK-SD-NEXT:    ret
2391;
2392; CHECK-GI-LABEL: test_v2f32_post_reg_ld4:
2393; CHECK-GI:       ; %bb.0:
2394; CHECK-GI-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0]
2395; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
2396; CHECK-GI-NEXT:    str x8, [x1]
2397; CHECK-GI-NEXT:    ret
2398  %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0(ptr %A)
2399  %tmp = getelementptr float, ptr %A, i64 %inc
2400  store ptr %tmp, ptr %ptr
2401  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
2402}
2403
2404declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0(ptr)
2405
2406
2407define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(ptr %A, ptr %ptr) {
2408; CHECK-SD-LABEL: test_v2f64_post_imm_ld4:
2409; CHECK-SD:       ; %bb.0:
2410; CHECK-SD-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0], #64
2411; CHECK-SD-NEXT:    str x0, [x1]
2412; CHECK-SD-NEXT:    ret
2413;
2414; CHECK-GI-LABEL: test_v2f64_post_imm_ld4:
2415; CHECK-GI:       ; %bb.0:
2416; CHECK-GI-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0]
2417; CHECK-GI-NEXT:    add x8, x0, #64
2418; CHECK-GI-NEXT:    str x8, [x1]
2419; CHECK-GI-NEXT:    ret
2420  %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0(ptr %A)
2421  %tmp = getelementptr double, ptr %A, i32 8
2422  store ptr %tmp, ptr %ptr
2423  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
2424}
2425
2426define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
2427; CHECK-SD-LABEL: test_v2f64_post_reg_ld4:
2428; CHECK-SD:       ; %bb.0:
2429; CHECK-SD-NEXT:    lsl x8, x2, #3
2430; CHECK-SD-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0], x8
2431; CHECK-SD-NEXT:    str x0, [x1]
2432; CHECK-SD-NEXT:    ret
2433;
2434; CHECK-GI-LABEL: test_v2f64_post_reg_ld4:
2435; CHECK-GI:       ; %bb.0:
2436; CHECK-GI-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0]
2437; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
2438; CHECK-GI-NEXT:    str x8, [x1]
2439; CHECK-GI-NEXT:    ret
2440  %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0(ptr %A)
2441  %tmp = getelementptr double, ptr %A, i64 %inc
2442  store ptr %tmp, ptr %ptr
2443  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
2444}
2445
2446declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0(ptr)
2447
2448
2449define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(ptr %A, ptr %ptr) {
2450; CHECK-SD-LABEL: test_v1f64_post_imm_ld4:
2451; CHECK-SD:       ; %bb.0:
2452; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], #32
2453; CHECK-SD-NEXT:    str x0, [x1]
2454; CHECK-SD-NEXT:    ret
2455;
2456; CHECK-GI-LABEL: test_v1f64_post_imm_ld4:
2457; CHECK-GI:       ; %bb.0:
2458; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
2459; CHECK-GI-NEXT:    add x8, x0, #32
2460; CHECK-GI-NEXT:    str x8, [x1]
2461; CHECK-GI-NEXT:    ret
2462  %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr %A)
2463  %tmp = getelementptr double, ptr %A, i32 4
2464  store ptr %tmp, ptr %ptr
2465  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
2466}
2467
2468define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(ptr %A, ptr %ptr, i64 %inc) {
2469; CHECK-SD-LABEL: test_v1f64_post_reg_ld4:
2470; CHECK-SD:       ; %bb.0:
2471; CHECK-SD-NEXT:    lsl x8, x2, #3
2472; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], x8
2473; CHECK-SD-NEXT:    str x0, [x1]
2474; CHECK-SD-NEXT:    ret
2475;
2476; CHECK-GI-LABEL: test_v1f64_post_reg_ld4:
2477; CHECK-GI:       ; %bb.0:
2478; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
2479; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
2480; CHECK-GI-NEXT:    str x8, [x1]
2481; CHECK-GI-NEXT:    ret
2482  %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr %A)
2483  %tmp = getelementptr double, ptr %A, i64 %inc
2484  store ptr %tmp, ptr %ptr
2485  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
2486}
2487
2488declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr)
2489
2490define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(ptr %A, ptr %ptr) {
2491; CHECK-SD-LABEL: test_v16i8_post_imm_ld1x2:
2492; CHECK-SD:       ; %bb.0:
2493; CHECK-SD-NEXT:    ld1.16b { v0, v1 }, [x0], #32
2494; CHECK-SD-NEXT:    str x0, [x1]
2495; CHECK-SD-NEXT:    ret
2496;
2497; CHECK-GI-LABEL: test_v16i8_post_imm_ld1x2:
2498; CHECK-GI:       ; %bb.0:
2499; CHECK-GI-NEXT:    ld1.16b { v0, v1 }, [x0]
2500; CHECK-GI-NEXT:    add x8, x0, #32
2501; CHECK-GI-NEXT:    str x8, [x1]
2502; CHECK-GI-NEXT:    ret
2503  %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr %A)
2504  %tmp = getelementptr i8, ptr %A, i32 32
2505  store ptr %tmp, ptr %ptr
2506  ret { <16 x i8>, <16 x i8> } %ld1x2
2507}
2508
2509define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
2510; CHECK-SD-LABEL: test_v16i8_post_reg_ld1x2:
2511; CHECK-SD:       ; %bb.0:
2512; CHECK-SD-NEXT:    ld1.16b { v0, v1 }, [x0], x2
2513; CHECK-SD-NEXT:    str x0, [x1]
2514; CHECK-SD-NEXT:    ret
2515;
2516; CHECK-GI-LABEL: test_v16i8_post_reg_ld1x2:
2517; CHECK-GI:       ; %bb.0:
2518; CHECK-GI-NEXT:    ld1.16b { v0, v1 }, [x0]
2519; CHECK-GI-NEXT:    add x8, x0, x2
2520; CHECK-GI-NEXT:    str x8, [x1]
2521; CHECK-GI-NEXT:    ret
2522  %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr %A)
2523  %tmp = getelementptr i8, ptr %A, i64 %inc
2524  store ptr %tmp, ptr %ptr
2525  ret { <16 x i8>, <16 x i8> } %ld1x2
2526}
2527
2528declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr)
2529
2530
2531define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(ptr %A, ptr %ptr) {
2532; CHECK-SD-LABEL: test_v8i8_post_imm_ld1x2:
2533; CHECK-SD:       ; %bb.0:
2534; CHECK-SD-NEXT:    ld1.8b { v0, v1 }, [x0], #16
2535; CHECK-SD-NEXT:    str x0, [x1]
2536; CHECK-SD-NEXT:    ret
2537;
2538; CHECK-GI-LABEL: test_v8i8_post_imm_ld1x2:
2539; CHECK-GI:       ; %bb.0:
2540; CHECK-GI-NEXT:    ld1.8b { v0, v1 }, [x0]
2541; CHECK-GI-NEXT:    add x8, x0, #16
2542; CHECK-GI-NEXT:    str x8, [x1]
2543; CHECK-GI-NEXT:    ret
2544  %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr %A)
2545  %tmp = getelementptr i8, ptr %A, i32 16
2546  store ptr %tmp, ptr %ptr
2547  ret { <8 x i8>, <8 x i8> } %ld1x2
2548}
2549
2550define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
2551; CHECK-SD-LABEL: test_v8i8_post_reg_ld1x2:
2552; CHECK-SD:       ; %bb.0:
2553; CHECK-SD-NEXT:    ld1.8b { v0, v1 }, [x0], x2
2554; CHECK-SD-NEXT:    str x0, [x1]
2555; CHECK-SD-NEXT:    ret
2556;
2557; CHECK-GI-LABEL: test_v8i8_post_reg_ld1x2:
2558; CHECK-GI:       ; %bb.0:
2559; CHECK-GI-NEXT:    ld1.8b { v0, v1 }, [x0]
2560; CHECK-GI-NEXT:    add x8, x0, x2
2561; CHECK-GI-NEXT:    str x8, [x1]
2562; CHECK-GI-NEXT:    ret
2563  %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr %A)
2564  %tmp = getelementptr i8, ptr %A, i64 %inc
2565  store ptr %tmp, ptr %ptr
2566  ret { <8 x i8>, <8 x i8> } %ld1x2
2567}
2568
2569declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr)
2570
2571
2572define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(ptr %A, ptr %ptr) {
2573; CHECK-SD-LABEL: test_v8i16_post_imm_ld1x2:
2574; CHECK-SD:       ; %bb.0:
2575; CHECK-SD-NEXT:    ld1.8h { v0, v1 }, [x0], #32
2576; CHECK-SD-NEXT:    str x0, [x1]
2577; CHECK-SD-NEXT:    ret
2578;
2579; CHECK-GI-LABEL: test_v8i16_post_imm_ld1x2:
2580; CHECK-GI:       ; %bb.0:
2581; CHECK-GI-NEXT:    ld1.8h { v0, v1 }, [x0]
2582; CHECK-GI-NEXT:    add x8, x0, #32
2583; CHECK-GI-NEXT:    str x8, [x1]
2584; CHECK-GI-NEXT:    ret
2585  %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr %A)
2586  %tmp = getelementptr i16, ptr %A, i32 16
2587  store ptr %tmp, ptr %ptr
2588  ret { <8 x i16>, <8 x i16> } %ld1x2
2589}
2590
2591define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
2592; CHECK-SD-LABEL: test_v8i16_post_reg_ld1x2:
2593; CHECK-SD:       ; %bb.0:
2594; CHECK-SD-NEXT:    lsl x8, x2, #1
2595; CHECK-SD-NEXT:    ld1.8h { v0, v1 }, [x0], x8
2596; CHECK-SD-NEXT:    str x0, [x1]
2597; CHECK-SD-NEXT:    ret
2598;
2599; CHECK-GI-LABEL: test_v8i16_post_reg_ld1x2:
2600; CHECK-GI:       ; %bb.0:
2601; CHECK-GI-NEXT:    ld1.8h { v0, v1 }, [x0]
2602; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
2603; CHECK-GI-NEXT:    str x8, [x1]
2604; CHECK-GI-NEXT:    ret
2605  %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr %A)
2606  %tmp = getelementptr i16, ptr %A, i64 %inc
2607  store ptr %tmp, ptr %ptr
2608  ret { <8 x i16>, <8 x i16> } %ld1x2
2609}
2610
2611declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr)
2612
2613
2614define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(ptr %A, ptr %ptr) {
2615; CHECK-SD-LABEL: test_v4i16_post_imm_ld1x2:
2616; CHECK-SD:       ; %bb.0:
2617; CHECK-SD-NEXT:    ld1.4h { v0, v1 }, [x0], #16
2618; CHECK-SD-NEXT:    str x0, [x1]
2619; CHECK-SD-NEXT:    ret
2620;
2621; CHECK-GI-LABEL: test_v4i16_post_imm_ld1x2:
2622; CHECK-GI:       ; %bb.0:
2623; CHECK-GI-NEXT:    ld1.4h { v0, v1 }, [x0]
2624; CHECK-GI-NEXT:    add x8, x0, #16
2625; CHECK-GI-NEXT:    str x8, [x1]
2626; CHECK-GI-NEXT:    ret
2627  %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr %A)
2628  %tmp = getelementptr i16, ptr %A, i32 8
2629  store ptr %tmp, ptr %ptr
2630  ret { <4 x i16>, <4 x i16> } %ld1x2
2631}
2632
2633define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
2634; CHECK-SD-LABEL: test_v4i16_post_reg_ld1x2:
2635; CHECK-SD:       ; %bb.0:
2636; CHECK-SD-NEXT:    lsl x8, x2, #1
2637; CHECK-SD-NEXT:    ld1.4h { v0, v1 }, [x0], x8
2638; CHECK-SD-NEXT:    str x0, [x1]
2639; CHECK-SD-NEXT:    ret
2640;
2641; CHECK-GI-LABEL: test_v4i16_post_reg_ld1x2:
2642; CHECK-GI:       ; %bb.0:
2643; CHECK-GI-NEXT:    ld1.4h { v0, v1 }, [x0]
2644; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
2645; CHECK-GI-NEXT:    str x8, [x1]
2646; CHECK-GI-NEXT:    ret
2647  %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr %A)
2648  %tmp = getelementptr i16, ptr %A, i64 %inc
2649  store ptr %tmp, ptr %ptr
2650  ret { <4 x i16>, <4 x i16> } %ld1x2
2651}
2652
2653declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr)
2654
2655
2656define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(ptr %A, ptr %ptr) {
2657; CHECK-SD-LABEL: test_v4i32_post_imm_ld1x2:
2658; CHECK-SD:       ; %bb.0:
2659; CHECK-SD-NEXT:    ld1.4s { v0, v1 }, [x0], #32
2660; CHECK-SD-NEXT:    str x0, [x1]
2661; CHECK-SD-NEXT:    ret
2662;
2663; CHECK-GI-LABEL: test_v4i32_post_imm_ld1x2:
2664; CHECK-GI:       ; %bb.0:
2665; CHECK-GI-NEXT:    ld1.4s { v0, v1 }, [x0]
2666; CHECK-GI-NEXT:    add x8, x0, #32
2667; CHECK-GI-NEXT:    str x8, [x1]
2668; CHECK-GI-NEXT:    ret
2669  %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr %A)
2670  %tmp = getelementptr i32, ptr %A, i32 8
2671  store ptr %tmp, ptr %ptr
2672  ret { <4 x i32>, <4 x i32> } %ld1x2
2673}
2674
2675define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
2676; CHECK-SD-LABEL: test_v4i32_post_reg_ld1x2:
2677; CHECK-SD:       ; %bb.0:
2678; CHECK-SD-NEXT:    lsl x8, x2, #2
2679; CHECK-SD-NEXT:    ld1.4s { v0, v1 }, [x0], x8
2680; CHECK-SD-NEXT:    str x0, [x1]
2681; CHECK-SD-NEXT:    ret
2682;
2683; CHECK-GI-LABEL: test_v4i32_post_reg_ld1x2:
2684; CHECK-GI:       ; %bb.0:
2685; CHECK-GI-NEXT:    ld1.4s { v0, v1 }, [x0]
2686; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
2687; CHECK-GI-NEXT:    str x8, [x1]
2688; CHECK-GI-NEXT:    ret
2689  %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr %A)
2690  %tmp = getelementptr i32, ptr %A, i64 %inc
2691  store ptr %tmp, ptr %ptr
2692  ret { <4 x i32>, <4 x i32> } %ld1x2
2693}
2694
2695declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr)
2696
2697
2698define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(ptr %A, ptr %ptr) {
2699; CHECK-SD-LABEL: test_v2i32_post_imm_ld1x2:
2700; CHECK-SD:       ; %bb.0:
2701; CHECK-SD-NEXT:    ld1.2s { v0, v1 }, [x0], #16
2702; CHECK-SD-NEXT:    str x0, [x1]
2703; CHECK-SD-NEXT:    ret
2704;
2705; CHECK-GI-LABEL: test_v2i32_post_imm_ld1x2:
2706; CHECK-GI:       ; %bb.0:
2707; CHECK-GI-NEXT:    ld1.2s { v0, v1 }, [x0]
2708; CHECK-GI-NEXT:    add x8, x0, #16
2709; CHECK-GI-NEXT:    str x8, [x1]
2710; CHECK-GI-NEXT:    ret
2711  %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr %A)
2712  %tmp = getelementptr i32, ptr %A, i32 4
2713  store ptr %tmp, ptr %ptr
2714  ret { <2 x i32>, <2 x i32> } %ld1x2
2715}
2716
2717define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
2718; CHECK-SD-LABEL: test_v2i32_post_reg_ld1x2:
2719; CHECK-SD:       ; %bb.0:
2720; CHECK-SD-NEXT:    lsl x8, x2, #2
2721; CHECK-SD-NEXT:    ld1.2s { v0, v1 }, [x0], x8
2722; CHECK-SD-NEXT:    str x0, [x1]
2723; CHECK-SD-NEXT:    ret
2724;
2725; CHECK-GI-LABEL: test_v2i32_post_reg_ld1x2:
2726; CHECK-GI:       ; %bb.0:
2727; CHECK-GI-NEXT:    ld1.2s { v0, v1 }, [x0]
2728; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
2729; CHECK-GI-NEXT:    str x8, [x1]
2730; CHECK-GI-NEXT:    ret
2731  %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr %A)
2732  %tmp = getelementptr i32, ptr %A, i64 %inc
2733  store ptr %tmp, ptr %ptr
2734  ret { <2 x i32>, <2 x i32> } %ld1x2
2735}
2736
2737declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr)
2738
2739
2740define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(ptr %A, ptr %ptr) {
2741; CHECK-SD-LABEL: test_v2i64_post_imm_ld1x2:
2742; CHECK-SD:       ; %bb.0:
2743; CHECK-SD-NEXT:    ld1.2d { v0, v1 }, [x0], #32
2744; CHECK-SD-NEXT:    str x0, [x1]
2745; CHECK-SD-NEXT:    ret
2746;
2747; CHECK-GI-LABEL: test_v2i64_post_imm_ld1x2:
2748; CHECK-GI:       ; %bb.0:
2749; CHECK-GI-NEXT:    ld1.2d { v0, v1 }, [x0]
2750; CHECK-GI-NEXT:    add x8, x0, #32
2751; CHECK-GI-NEXT:    str x8, [x1]
2752; CHECK-GI-NEXT:    ret
2753  %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %A)
2754  %tmp = getelementptr i64, ptr %A, i32 4
2755  store ptr %tmp, ptr %ptr
2756  ret { <2 x i64>, <2 x i64> } %ld1x2
2757}
2758
2759define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
2760; CHECK-SD-LABEL: test_v2i64_post_reg_ld1x2:
2761; CHECK-SD:       ; %bb.0:
2762; CHECK-SD-NEXT:    lsl x8, x2, #3
2763; CHECK-SD-NEXT:    ld1.2d { v0, v1 }, [x0], x8
2764; CHECK-SD-NEXT:    str x0, [x1]
2765; CHECK-SD-NEXT:    ret
2766;
2767; CHECK-GI-LABEL: test_v2i64_post_reg_ld1x2:
2768; CHECK-GI:       ; %bb.0:
2769; CHECK-GI-NEXT:    ld1.2d { v0, v1 }, [x0]
2770; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
2771; CHECK-GI-NEXT:    str x8, [x1]
2772; CHECK-GI-NEXT:    ret
2773  %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %A)
2774  %tmp = getelementptr i64, ptr %A, i64 %inc
2775  store ptr %tmp, ptr %ptr
2776  ret { <2 x i64>, <2 x i64> } %ld1x2
2777}
2778
2779declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr)
2780
2781
2782define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(ptr %A, ptr %ptr) {
2783; CHECK-SD-LABEL: test_v1i64_post_imm_ld1x2:
2784; CHECK-SD:       ; %bb.0:
2785; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], #16
2786; CHECK-SD-NEXT:    str x0, [x1]
2787; CHECK-SD-NEXT:    ret
2788;
2789; CHECK-GI-LABEL: test_v1i64_post_imm_ld1x2:
2790; CHECK-GI:       ; %bb.0:
2791; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
2792; CHECK-GI-NEXT:    add x8, x0, #16
2793; CHECK-GI-NEXT:    str x8, [x1]
2794; CHECK-GI-NEXT:    ret
2795  %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %A)
2796  %tmp = getelementptr i64, ptr %A, i32 2
2797  store ptr %tmp, ptr %ptr
2798  ret { <1 x i64>, <1 x i64> } %ld1x2
2799}
2800
2801define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
2802; CHECK-SD-LABEL: test_v1i64_post_reg_ld1x2:
2803; CHECK-SD:       ; %bb.0:
2804; CHECK-SD-NEXT:    lsl x8, x2, #3
2805; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], x8
2806; CHECK-SD-NEXT:    str x0, [x1]
2807; CHECK-SD-NEXT:    ret
2808;
2809; CHECK-GI-LABEL: test_v1i64_post_reg_ld1x2:
2810; CHECK-GI:       ; %bb.0:
2811; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
2812; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
2813; CHECK-GI-NEXT:    str x8, [x1]
2814; CHECK-GI-NEXT:    ret
2815  %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %A)
2816  %tmp = getelementptr i64, ptr %A, i64 %inc
2817  store ptr %tmp, ptr %ptr
2818  ret { <1 x i64>, <1 x i64> } %ld1x2
2819}
2820
2821declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr)
2822
2823
2824define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(ptr %A, ptr %ptr) {
2825; CHECK-SD-LABEL: test_v4f32_post_imm_ld1x2:
2826; CHECK-SD:       ; %bb.0:
2827; CHECK-SD-NEXT:    ld1.4s { v0, v1 }, [x0], #32
2828; CHECK-SD-NEXT:    str x0, [x1]
2829; CHECK-SD-NEXT:    ret
2830;
2831; CHECK-GI-LABEL: test_v4f32_post_imm_ld1x2:
2832; CHECK-GI:       ; %bb.0:
2833; CHECK-GI-NEXT:    ld1.4s { v0, v1 }, [x0]
2834; CHECK-GI-NEXT:    add x8, x0, #32
2835; CHECK-GI-NEXT:    str x8, [x1]
2836; CHECK-GI-NEXT:    ret
2837  %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr %A)
2838  %tmp = getelementptr float, ptr %A, i32 8
2839  store ptr %tmp, ptr %ptr
2840  ret { <4 x float>, <4 x float> } %ld1x2
2841}
2842
2843define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
2844; CHECK-SD-LABEL: test_v4f32_post_reg_ld1x2:
2845; CHECK-SD:       ; %bb.0:
2846; CHECK-SD-NEXT:    lsl x8, x2, #2
2847; CHECK-SD-NEXT:    ld1.4s { v0, v1 }, [x0], x8
2848; CHECK-SD-NEXT:    str x0, [x1]
2849; CHECK-SD-NEXT:    ret
2850;
2851; CHECK-GI-LABEL: test_v4f32_post_reg_ld1x2:
2852; CHECK-GI:       ; %bb.0:
2853; CHECK-GI-NEXT:    ld1.4s { v0, v1 }, [x0]
2854; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
2855; CHECK-GI-NEXT:    str x8, [x1]
2856; CHECK-GI-NEXT:    ret
2857  %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr %A)
2858  %tmp = getelementptr float, ptr %A, i64 %inc
2859  store ptr %tmp, ptr %ptr
2860  ret { <4 x float>, <4 x float> } %ld1x2
2861}
2862
2863declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr)
2864
2865
2866define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(ptr %A, ptr %ptr) {
2867; CHECK-SD-LABEL: test_v2f32_post_imm_ld1x2:
2868; CHECK-SD:       ; %bb.0:
2869; CHECK-SD-NEXT:    ld1.2s { v0, v1 }, [x0], #16
2870; CHECK-SD-NEXT:    str x0, [x1]
2871; CHECK-SD-NEXT:    ret
2872;
2873; CHECK-GI-LABEL: test_v2f32_post_imm_ld1x2:
2874; CHECK-GI:       ; %bb.0:
2875; CHECK-GI-NEXT:    ld1.2s { v0, v1 }, [x0]
2876; CHECK-GI-NEXT:    add x8, x0, #16
2877; CHECK-GI-NEXT:    str x8, [x1]
2878; CHECK-GI-NEXT:    ret
2879  %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr %A)
2880  %tmp = getelementptr float, ptr %A, i32 4
2881  store ptr %tmp, ptr %ptr
2882  ret { <2 x float>, <2 x float> } %ld1x2
2883}
2884
2885define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
2886; CHECK-SD-LABEL: test_v2f32_post_reg_ld1x2:
2887; CHECK-SD:       ; %bb.0:
2888; CHECK-SD-NEXT:    lsl x8, x2, #2
2889; CHECK-SD-NEXT:    ld1.2s { v0, v1 }, [x0], x8
2890; CHECK-SD-NEXT:    str x0, [x1]
2891; CHECK-SD-NEXT:    ret
2892;
2893; CHECK-GI-LABEL: test_v2f32_post_reg_ld1x2:
2894; CHECK-GI:       ; %bb.0:
2895; CHECK-GI-NEXT:    ld1.2s { v0, v1 }, [x0]
2896; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
2897; CHECK-GI-NEXT:    str x8, [x1]
2898; CHECK-GI-NEXT:    ret
2899  %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr %A)
2900  %tmp = getelementptr float, ptr %A, i64 %inc
2901  store ptr %tmp, ptr %ptr
2902  ret { <2 x float>, <2 x float> } %ld1x2
2903}
2904
2905declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr)
2906
2907
2908define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(ptr %A, ptr %ptr) {
2909; CHECK-SD-LABEL: test_v2f64_post_imm_ld1x2:
2910; CHECK-SD:       ; %bb.0:
2911; CHECK-SD-NEXT:    ld1.2d { v0, v1 }, [x0], #32
2912; CHECK-SD-NEXT:    str x0, [x1]
2913; CHECK-SD-NEXT:    ret
2914;
2915; CHECK-GI-LABEL: test_v2f64_post_imm_ld1x2:
2916; CHECK-GI:       ; %bb.0:
2917; CHECK-GI-NEXT:    ld1.2d { v0, v1 }, [x0]
2918; CHECK-GI-NEXT:    add x8, x0, #32
2919; CHECK-GI-NEXT:    str x8, [x1]
2920; CHECK-GI-NEXT:    ret
2921  %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %A)
2922  %tmp = getelementptr double, ptr %A, i32 4
2923  store ptr %tmp, ptr %ptr
2924  ret { <2 x double>, <2 x double> } %ld1x2
2925}
2926
2927define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
2928; CHECK-SD-LABEL: test_v2f64_post_reg_ld1x2:
2929; CHECK-SD:       ; %bb.0:
2930; CHECK-SD-NEXT:    lsl x8, x2, #3
2931; CHECK-SD-NEXT:    ld1.2d { v0, v1 }, [x0], x8
2932; CHECK-SD-NEXT:    str x0, [x1]
2933; CHECK-SD-NEXT:    ret
2934;
2935; CHECK-GI-LABEL: test_v2f64_post_reg_ld1x2:
2936; CHECK-GI:       ; %bb.0:
2937; CHECK-GI-NEXT:    ld1.2d { v0, v1 }, [x0]
2938; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
2939; CHECK-GI-NEXT:    str x8, [x1]
2940; CHECK-GI-NEXT:    ret
2941  %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %A)
2942  %tmp = getelementptr double, ptr %A, i64 %inc
2943  store ptr %tmp, ptr %ptr
2944  ret { <2 x double>, <2 x double> } %ld1x2
2945}
2946
2947declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr)
2948
2949
2950define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(ptr %A, ptr %ptr) {
2951; CHECK-SD-LABEL: test_v1f64_post_imm_ld1x2:
2952; CHECK-SD:       ; %bb.0:
2953; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], #16
2954; CHECK-SD-NEXT:    str x0, [x1]
2955; CHECK-SD-NEXT:    ret
2956;
2957; CHECK-GI-LABEL: test_v1f64_post_imm_ld1x2:
2958; CHECK-GI:       ; %bb.0:
2959; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
2960; CHECK-GI-NEXT:    add x8, x0, #16
2961; CHECK-GI-NEXT:    str x8, [x1]
2962; CHECK-GI-NEXT:    ret
2963  %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %A)
2964  %tmp = getelementptr double, ptr %A, i32 2
2965  store ptr %tmp, ptr %ptr
2966  ret { <1 x double>, <1 x double> } %ld1x2
2967}
2968
2969define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(ptr %A, ptr %ptr, i64 %inc) {
2970; CHECK-SD-LABEL: test_v1f64_post_reg_ld1x2:
2971; CHECK-SD:       ; %bb.0:
2972; CHECK-SD-NEXT:    lsl x8, x2, #3
2973; CHECK-SD-NEXT:    ld1.1d { v0, v1 }, [x0], x8
2974; CHECK-SD-NEXT:    str x0, [x1]
2975; CHECK-SD-NEXT:    ret
2976;
2977; CHECK-GI-LABEL: test_v1f64_post_reg_ld1x2:
2978; CHECK-GI:       ; %bb.0:
2979; CHECK-GI-NEXT:    ld1.1d { v0, v1 }, [x0]
2980; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
2981; CHECK-GI-NEXT:    str x8, [x1]
2982; CHECK-GI-NEXT:    ret
2983  %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %A)
2984  %tmp = getelementptr double, ptr %A, i64 %inc
2985  store ptr %tmp, ptr %ptr
2986  ret { <1 x double>, <1 x double> } %ld1x2
2987}
2988
2989declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr)
2990
2991
2992define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(ptr %A, ptr %ptr) {
2993; CHECK-SD-LABEL: test_v16i8_post_imm_ld1x3:
2994; CHECK-SD:       ; %bb.0:
2995; CHECK-SD-NEXT:    ld1.16b { v0, v1, v2 }, [x0], #48
2996; CHECK-SD-NEXT:    str x0, [x1]
2997; CHECK-SD-NEXT:    ret
2998;
2999; CHECK-GI-LABEL: test_v16i8_post_imm_ld1x3:
3000; CHECK-GI:       ; %bb.0:
3001; CHECK-GI-NEXT:    ld1.16b { v0, v1, v2 }, [x0]
3002; CHECK-GI-NEXT:    add x8, x0, #48
3003; CHECK-GI-NEXT:    str x8, [x1]
3004; CHECK-GI-NEXT:    ret
3005  %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr %A)
3006  %tmp = getelementptr i8, ptr %A, i32 48
3007  store ptr %tmp, ptr %ptr
3008  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
3009}
3010
3011define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
3012; CHECK-SD-LABEL: test_v16i8_post_reg_ld1x3:
3013; CHECK-SD:       ; %bb.0:
3014; CHECK-SD-NEXT:    ld1.16b { v0, v1, v2 }, [x0], x2
3015; CHECK-SD-NEXT:    str x0, [x1]
3016; CHECK-SD-NEXT:    ret
3017;
3018; CHECK-GI-LABEL: test_v16i8_post_reg_ld1x3:
3019; CHECK-GI:       ; %bb.0:
3020; CHECK-GI-NEXT:    ld1.16b { v0, v1, v2 }, [x0]
3021; CHECK-GI-NEXT:    add x8, x0, x2
3022; CHECK-GI-NEXT:    str x8, [x1]
3023; CHECK-GI-NEXT:    ret
3024  %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr %A)
3025  %tmp = getelementptr i8, ptr %A, i64 %inc
3026  store ptr %tmp, ptr %ptr
3027  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
3028}
3029
3030declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr)
3031
3032
3033define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(ptr %A, ptr %ptr) {
3034; CHECK-SD-LABEL: test_v8i8_post_imm_ld1x3:
3035; CHECK-SD:       ; %bb.0:
3036; CHECK-SD-NEXT:    ld1.8b { v0, v1, v2 }, [x0], #24
3037; CHECK-SD-NEXT:    str x0, [x1]
3038; CHECK-SD-NEXT:    ret
3039;
3040; CHECK-GI-LABEL: test_v8i8_post_imm_ld1x3:
3041; CHECK-GI:       ; %bb.0:
3042; CHECK-GI-NEXT:    ld1.8b { v0, v1, v2 }, [x0]
3043; CHECK-GI-NEXT:    add x8, x0, #24
3044; CHECK-GI-NEXT:    str x8, [x1]
3045; CHECK-GI-NEXT:    ret
3046  %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr %A)
3047  %tmp = getelementptr i8, ptr %A, i32 24
3048  store ptr %tmp, ptr %ptr
3049  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
3050}
3051
3052define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
3053; CHECK-SD-LABEL: test_v8i8_post_reg_ld1x3:
3054; CHECK-SD:       ; %bb.0:
3055; CHECK-SD-NEXT:    ld1.8b { v0, v1, v2 }, [x0], x2
3056; CHECK-SD-NEXT:    str x0, [x1]
3057; CHECK-SD-NEXT:    ret
3058;
3059; CHECK-GI-LABEL: test_v8i8_post_reg_ld1x3:
3060; CHECK-GI:       ; %bb.0:
3061; CHECK-GI-NEXT:    ld1.8b { v0, v1, v2 }, [x0]
3062; CHECK-GI-NEXT:    add x8, x0, x2
3063; CHECK-GI-NEXT:    str x8, [x1]
3064; CHECK-GI-NEXT:    ret
3065  %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr %A)
3066  %tmp = getelementptr i8, ptr %A, i64 %inc
3067  store ptr %tmp, ptr %ptr
3068  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
3069}
3070
3071declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr)
3072
3073
3074define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(ptr %A, ptr %ptr) {
3075; CHECK-SD-LABEL: test_v8i16_post_imm_ld1x3:
3076; CHECK-SD:       ; %bb.0:
3077; CHECK-SD-NEXT:    ld1.8h { v0, v1, v2 }, [x0], #48
3078; CHECK-SD-NEXT:    str x0, [x1]
3079; CHECK-SD-NEXT:    ret
3080;
3081; CHECK-GI-LABEL: test_v8i16_post_imm_ld1x3:
3082; CHECK-GI:       ; %bb.0:
3083; CHECK-GI-NEXT:    ld1.8h { v0, v1, v2 }, [x0]
3084; CHECK-GI-NEXT:    add x8, x0, #48
3085; CHECK-GI-NEXT:    str x8, [x1]
3086; CHECK-GI-NEXT:    ret
3087  %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr %A)
3088  %tmp = getelementptr i16, ptr %A, i32 24
3089  store ptr %tmp, ptr %ptr
3090  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
3091}
3092
3093define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
3094; CHECK-SD-LABEL: test_v8i16_post_reg_ld1x3:
3095; CHECK-SD:       ; %bb.0:
3096; CHECK-SD-NEXT:    lsl x8, x2, #1
3097; CHECK-SD-NEXT:    ld1.8h { v0, v1, v2 }, [x0], x8
3098; CHECK-SD-NEXT:    str x0, [x1]
3099; CHECK-SD-NEXT:    ret
3100;
3101; CHECK-GI-LABEL: test_v8i16_post_reg_ld1x3:
3102; CHECK-GI:       ; %bb.0:
3103; CHECK-GI-NEXT:    ld1.8h { v0, v1, v2 }, [x0]
3104; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
3105; CHECK-GI-NEXT:    str x8, [x1]
3106; CHECK-GI-NEXT:    ret
3107  %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr %A)
3108  %tmp = getelementptr i16, ptr %A, i64 %inc
3109  store ptr %tmp, ptr %ptr
3110  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
3111}
3112
3113declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr)
3114
3115
3116define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(ptr %A, ptr %ptr) {
3117; CHECK-SD-LABEL: test_v4i16_post_imm_ld1x3:
3118; CHECK-SD:       ; %bb.0:
3119; CHECK-SD-NEXT:    ld1.4h { v0, v1, v2 }, [x0], #24
3120; CHECK-SD-NEXT:    str x0, [x1]
3121; CHECK-SD-NEXT:    ret
3122;
3123; CHECK-GI-LABEL: test_v4i16_post_imm_ld1x3:
3124; CHECK-GI:       ; %bb.0:
3125; CHECK-GI-NEXT:    ld1.4h { v0, v1, v2 }, [x0]
3126; CHECK-GI-NEXT:    add x8, x0, #24
3127; CHECK-GI-NEXT:    str x8, [x1]
3128; CHECK-GI-NEXT:    ret
3129  %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr %A)
3130  %tmp = getelementptr i16, ptr %A, i32 12
3131  store ptr %tmp, ptr %ptr
3132  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
3133}
3134
3135define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
3136; CHECK-SD-LABEL: test_v4i16_post_reg_ld1x3:
3137; CHECK-SD:       ; %bb.0:
3138; CHECK-SD-NEXT:    lsl x8, x2, #1
3139; CHECK-SD-NEXT:    ld1.4h { v0, v1, v2 }, [x0], x8
3140; CHECK-SD-NEXT:    str x0, [x1]
3141; CHECK-SD-NEXT:    ret
3142;
3143; CHECK-GI-LABEL: test_v4i16_post_reg_ld1x3:
3144; CHECK-GI:       ; %bb.0:
3145; CHECK-GI-NEXT:    ld1.4h { v0, v1, v2 }, [x0]
3146; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
3147; CHECK-GI-NEXT:    str x8, [x1]
3148; CHECK-GI-NEXT:    ret
3149  %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr %A)
3150  %tmp = getelementptr i16, ptr %A, i64 %inc
3151  store ptr %tmp, ptr %ptr
3152  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
3153}
3154
3155declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr)
3156
3157
3158define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(ptr %A, ptr %ptr) {
3159; CHECK-SD-LABEL: test_v4i32_post_imm_ld1x3:
3160; CHECK-SD:       ; %bb.0:
3161; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2 }, [x0], #48
3162; CHECK-SD-NEXT:    str x0, [x1]
3163; CHECK-SD-NEXT:    ret
3164;
3165; CHECK-GI-LABEL: test_v4i32_post_imm_ld1x3:
3166; CHECK-GI:       ; %bb.0:
3167; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2 }, [x0]
3168; CHECK-GI-NEXT:    add x8, x0, #48
3169; CHECK-GI-NEXT:    str x8, [x1]
3170; CHECK-GI-NEXT:    ret
3171  %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr %A)
3172  %tmp = getelementptr i32, ptr %A, i32 12
3173  store ptr %tmp, ptr %ptr
3174  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
3175}
3176
3177define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
3178; CHECK-SD-LABEL: test_v4i32_post_reg_ld1x3:
3179; CHECK-SD:       ; %bb.0:
3180; CHECK-SD-NEXT:    lsl x8, x2, #2
3181; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2 }, [x0], x8
3182; CHECK-SD-NEXT:    str x0, [x1]
3183; CHECK-SD-NEXT:    ret
3184;
3185; CHECK-GI-LABEL: test_v4i32_post_reg_ld1x3:
3186; CHECK-GI:       ; %bb.0:
3187; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2 }, [x0]
3188; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
3189; CHECK-GI-NEXT:    str x8, [x1]
3190; CHECK-GI-NEXT:    ret
3191  %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr %A)
3192  %tmp = getelementptr i32, ptr %A, i64 %inc
3193  store ptr %tmp, ptr %ptr
3194  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
3195}
3196
3197declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr)
3198
3199
3200define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(ptr %A, ptr %ptr) {
3201; CHECK-SD-LABEL: test_v2i32_post_imm_ld1x3:
3202; CHECK-SD:       ; %bb.0:
3203; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2 }, [x0], #24
3204; CHECK-SD-NEXT:    str x0, [x1]
3205; CHECK-SD-NEXT:    ret
3206;
3207; CHECK-GI-LABEL: test_v2i32_post_imm_ld1x3:
3208; CHECK-GI:       ; %bb.0:
3209; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2 }, [x0]
3210; CHECK-GI-NEXT:    add x8, x0, #24
3211; CHECK-GI-NEXT:    str x8, [x1]
3212; CHECK-GI-NEXT:    ret
3213  %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr %A)
3214  %tmp = getelementptr i32, ptr %A, i32 6
3215  store ptr %tmp, ptr %ptr
3216  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
3217}
3218
3219define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
3220; CHECK-SD-LABEL: test_v2i32_post_reg_ld1x3:
3221; CHECK-SD:       ; %bb.0:
3222; CHECK-SD-NEXT:    lsl x8, x2, #2
3223; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2 }, [x0], x8
3224; CHECK-SD-NEXT:    str x0, [x1]
3225; CHECK-SD-NEXT:    ret
3226;
3227; CHECK-GI-LABEL: test_v2i32_post_reg_ld1x3:
3228; CHECK-GI:       ; %bb.0:
3229; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2 }, [x0]
3230; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
3231; CHECK-GI-NEXT:    str x8, [x1]
3232; CHECK-GI-NEXT:    ret
3233  %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr %A)
3234  %tmp = getelementptr i32, ptr %A, i64 %inc
3235  store ptr %tmp, ptr %ptr
3236  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
3237}
3238
3239declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr)
3240
3241
3242define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(ptr %A, ptr %ptr) {
3243; CHECK-SD-LABEL: test_v2i64_post_imm_ld1x3:
3244; CHECK-SD:       ; %bb.0:
3245; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2 }, [x0], #48
3246; CHECK-SD-NEXT:    str x0, [x1]
3247; CHECK-SD-NEXT:    ret
3248;
3249; CHECK-GI-LABEL: test_v2i64_post_imm_ld1x3:
3250; CHECK-GI:       ; %bb.0:
3251; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2 }, [x0]
3252; CHECK-GI-NEXT:    add x8, x0, #48
3253; CHECK-GI-NEXT:    str x8, [x1]
3254; CHECK-GI-NEXT:    ret
3255  %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %A)
3256  %tmp = getelementptr i64, ptr %A, i32 6
3257  store ptr %tmp, ptr %ptr
3258  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
3259}
3260
3261define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
3262; CHECK-SD-LABEL: test_v2i64_post_reg_ld1x3:
3263; CHECK-SD:       ; %bb.0:
3264; CHECK-SD-NEXT:    lsl x8, x2, #3
3265; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2 }, [x0], x8
3266; CHECK-SD-NEXT:    str x0, [x1]
3267; CHECK-SD-NEXT:    ret
3268;
3269; CHECK-GI-LABEL: test_v2i64_post_reg_ld1x3:
3270; CHECK-GI:       ; %bb.0:
3271; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2 }, [x0]
3272; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
3273; CHECK-GI-NEXT:    str x8, [x1]
3274; CHECK-GI-NEXT:    ret
3275  %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %A)
3276  %tmp = getelementptr i64, ptr %A, i64 %inc
3277  store ptr %tmp, ptr %ptr
3278  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
3279}
3280
3281declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr)
3282
3283
3284define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(ptr %A, ptr %ptr) {
3285; CHECK-SD-LABEL: test_v1i64_post_imm_ld1x3:
3286; CHECK-SD:       ; %bb.0:
3287; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], #24
3288; CHECK-SD-NEXT:    str x0, [x1]
3289; CHECK-SD-NEXT:    ret
3290;
3291; CHECK-GI-LABEL: test_v1i64_post_imm_ld1x3:
3292; CHECK-GI:       ; %bb.0:
3293; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
3294; CHECK-GI-NEXT:    add x8, x0, #24
3295; CHECK-GI-NEXT:    str x8, [x1]
3296; CHECK-GI-NEXT:    ret
3297  %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %A)
3298  %tmp = getelementptr i64, ptr %A, i32 3
3299  store ptr %tmp, ptr %ptr
3300  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
3301}
3302
3303define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
3304; CHECK-SD-LABEL: test_v1i64_post_reg_ld1x3:
3305; CHECK-SD:       ; %bb.0:
3306; CHECK-SD-NEXT:    lsl x8, x2, #3
3307; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], x8
3308; CHECK-SD-NEXT:    str x0, [x1]
3309; CHECK-SD-NEXT:    ret
3310;
3311; CHECK-GI-LABEL: test_v1i64_post_reg_ld1x3:
3312; CHECK-GI:       ; %bb.0:
3313; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
3314; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
3315; CHECK-GI-NEXT:    str x8, [x1]
3316; CHECK-GI-NEXT:    ret
3317  %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %A)
3318  %tmp = getelementptr i64, ptr %A, i64 %inc
3319  store ptr %tmp, ptr %ptr
3320  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
3321}
3322
3323declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr)
3324
3325
3326define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(ptr %A, ptr %ptr) {
3327; CHECK-SD-LABEL: test_v4f32_post_imm_ld1x3:
3328; CHECK-SD:       ; %bb.0:
3329; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2 }, [x0], #48
3330; CHECK-SD-NEXT:    str x0, [x1]
3331; CHECK-SD-NEXT:    ret
3332;
3333; CHECK-GI-LABEL: test_v4f32_post_imm_ld1x3:
3334; CHECK-GI:       ; %bb.0:
3335; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2 }, [x0]
3336; CHECK-GI-NEXT:    add x8, x0, #48
3337; CHECK-GI-NEXT:    str x8, [x1]
3338; CHECK-GI-NEXT:    ret
3339  %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr %A)
3340  %tmp = getelementptr float, ptr %A, i32 12
3341  store ptr %tmp, ptr %ptr
3342  ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
3343}
3344
3345define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
3346; CHECK-SD-LABEL: test_v4f32_post_reg_ld1x3:
3347; CHECK-SD:       ; %bb.0:
3348; CHECK-SD-NEXT:    lsl x8, x2, #2
3349; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2 }, [x0], x8
3350; CHECK-SD-NEXT:    str x0, [x1]
3351; CHECK-SD-NEXT:    ret
3352;
3353; CHECK-GI-LABEL: test_v4f32_post_reg_ld1x3:
3354; CHECK-GI:       ; %bb.0:
3355; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2 }, [x0]
3356; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
3357; CHECK-GI-NEXT:    str x8, [x1]
3358; CHECK-GI-NEXT:    ret
3359  %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr %A)
3360  %tmp = getelementptr float, ptr %A, i64 %inc
3361  store ptr %tmp, ptr %ptr
3362  ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
3363}
3364
3365declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr)
3366
3367
3368define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(ptr %A, ptr %ptr) {
3369; CHECK-SD-LABEL: test_v2f32_post_imm_ld1x3:
3370; CHECK-SD:       ; %bb.0:
3371; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2 }, [x0], #24
3372; CHECK-SD-NEXT:    str x0, [x1]
3373; CHECK-SD-NEXT:    ret
3374;
3375; CHECK-GI-LABEL: test_v2f32_post_imm_ld1x3:
3376; CHECK-GI:       ; %bb.0:
3377; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2 }, [x0]
3378; CHECK-GI-NEXT:    add x8, x0, #24
3379; CHECK-GI-NEXT:    str x8, [x1]
3380; CHECK-GI-NEXT:    ret
3381  %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr %A)
3382  %tmp = getelementptr float, ptr %A, i32 6
3383  store ptr %tmp, ptr %ptr
3384  ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
3385}
3386
3387define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
3388; CHECK-SD-LABEL: test_v2f32_post_reg_ld1x3:
3389; CHECK-SD:       ; %bb.0:
3390; CHECK-SD-NEXT:    lsl x8, x2, #2
3391; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2 }, [x0], x8
3392; CHECK-SD-NEXT:    str x0, [x1]
3393; CHECK-SD-NEXT:    ret
3394;
3395; CHECK-GI-LABEL: test_v2f32_post_reg_ld1x3:
3396; CHECK-GI:       ; %bb.0:
3397; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2 }, [x0]
3398; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
3399; CHECK-GI-NEXT:    str x8, [x1]
3400; CHECK-GI-NEXT:    ret
3401  %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr %A)
3402  %tmp = getelementptr float, ptr %A, i64 %inc
3403  store ptr %tmp, ptr %ptr
3404  ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
3405}
3406
3407declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr)
3408
3409
3410define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(ptr %A, ptr %ptr) {
3411; CHECK-SD-LABEL: test_v2f64_post_imm_ld1x3:
3412; CHECK-SD:       ; %bb.0:
3413; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2 }, [x0], #48
3414; CHECK-SD-NEXT:    str x0, [x1]
3415; CHECK-SD-NEXT:    ret
3416;
3417; CHECK-GI-LABEL: test_v2f64_post_imm_ld1x3:
3418; CHECK-GI:       ; %bb.0:
3419; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2 }, [x0]
3420; CHECK-GI-NEXT:    add x8, x0, #48
3421; CHECK-GI-NEXT:    str x8, [x1]
3422; CHECK-GI-NEXT:    ret
3423  %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %A)
3424  %tmp = getelementptr double, ptr %A, i32 6
3425  store ptr %tmp, ptr %ptr
3426  ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
3427}
3428
3429define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
3430; CHECK-SD-LABEL: test_v2f64_post_reg_ld1x3:
3431; CHECK-SD:       ; %bb.0:
3432; CHECK-SD-NEXT:    lsl x8, x2, #3
3433; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2 }, [x0], x8
3434; CHECK-SD-NEXT:    str x0, [x1]
3435; CHECK-SD-NEXT:    ret
3436;
3437; CHECK-GI-LABEL: test_v2f64_post_reg_ld1x3:
3438; CHECK-GI:       ; %bb.0:
3439; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2 }, [x0]
3440; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
3441; CHECK-GI-NEXT:    str x8, [x1]
3442; CHECK-GI-NEXT:    ret
3443  %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %A)
3444  %tmp = getelementptr double, ptr %A, i64 %inc
3445  store ptr %tmp, ptr %ptr
3446  ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
3447}
3448
3449declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr)
3450
3451
3452define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(ptr %A, ptr %ptr) {
3453; CHECK-SD-LABEL: test_v1f64_post_imm_ld1x3:
3454; CHECK-SD:       ; %bb.0:
3455; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], #24
3456; CHECK-SD-NEXT:    str x0, [x1]
3457; CHECK-SD-NEXT:    ret
3458;
3459; CHECK-GI-LABEL: test_v1f64_post_imm_ld1x3:
3460; CHECK-GI:       ; %bb.0:
3461; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
3462; CHECK-GI-NEXT:    add x8, x0, #24
3463; CHECK-GI-NEXT:    str x8, [x1]
3464; CHECK-GI-NEXT:    ret
3465  %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %A)
3466  %tmp = getelementptr double, ptr %A, i32 3
3467  store ptr %tmp, ptr %ptr
3468  ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
3469}
3470
3471define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(ptr %A, ptr %ptr, i64 %inc) {
3472; CHECK-SD-LABEL: test_v1f64_post_reg_ld1x3:
3473; CHECK-SD:       ; %bb.0:
3474; CHECK-SD-NEXT:    lsl x8, x2, #3
3475; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2 }, [x0], x8
3476; CHECK-SD-NEXT:    str x0, [x1]
3477; CHECK-SD-NEXT:    ret
3478;
3479; CHECK-GI-LABEL: test_v1f64_post_reg_ld1x3:
3480; CHECK-GI:       ; %bb.0:
3481; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
3482; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
3483; CHECK-GI-NEXT:    str x8, [x1]
3484; CHECK-GI-NEXT:    ret
3485  %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %A)
3486  %tmp = getelementptr double, ptr %A, i64 %inc
3487  store ptr %tmp, ptr %ptr
3488  ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
3489}
3490
3491declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr)
3492
3493
3494define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4(ptr %A, ptr %ptr) {
3495; CHECK-SD-LABEL: test_v16i8_post_imm_ld1x4:
3496; CHECK-SD:       ; %bb.0:
3497; CHECK-SD-NEXT:    ld1.16b { v0, v1, v2, v3 }, [x0], #64
3498; CHECK-SD-NEXT:    str x0, [x1]
3499; CHECK-SD-NEXT:    ret
3500;
3501; CHECK-GI-LABEL: test_v16i8_post_imm_ld1x4:
3502; CHECK-GI:       ; %bb.0:
3503; CHECK-GI-NEXT:    ld1.16b { v0, v1, v2, v3 }, [x0]
3504; CHECK-GI-NEXT:    add x8, x0, #64
3505; CHECK-GI-NEXT:    str x8, [x1]
3506; CHECK-GI-NEXT:    ret
3507  %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr %A)
3508  %tmp = getelementptr i8, ptr %A, i32 64
3509  store ptr %tmp, ptr %ptr
3510  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
3511}
3512
3513define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
3514; CHECK-SD-LABEL: test_v16i8_post_reg_ld1x4:
3515; CHECK-SD:       ; %bb.0:
3516; CHECK-SD-NEXT:    ld1.16b { v0, v1, v2, v3 }, [x0], x2
3517; CHECK-SD-NEXT:    str x0, [x1]
3518; CHECK-SD-NEXT:    ret
3519;
3520; CHECK-GI-LABEL: test_v16i8_post_reg_ld1x4:
3521; CHECK-GI:       ; %bb.0:
3522; CHECK-GI-NEXT:    ld1.16b { v0, v1, v2, v3 }, [x0]
3523; CHECK-GI-NEXT:    add x8, x0, x2
3524; CHECK-GI-NEXT:    str x8, [x1]
3525; CHECK-GI-NEXT:    ret
3526  %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr %A)
3527  %tmp = getelementptr i8, ptr %A, i64 %inc
3528  store ptr %tmp, ptr %ptr
3529  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
3530}
3531
3532declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr)
3533
3534
3535define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(ptr %A, ptr %ptr) {
3536; CHECK-SD-LABEL: test_v8i8_post_imm_ld1x4:
3537; CHECK-SD:       ; %bb.0:
3538; CHECK-SD-NEXT:    ld1.8b { v0, v1, v2, v3 }, [x0], #32
3539; CHECK-SD-NEXT:    str x0, [x1]
3540; CHECK-SD-NEXT:    ret
3541;
3542; CHECK-GI-LABEL: test_v8i8_post_imm_ld1x4:
3543; CHECK-GI:       ; %bb.0:
3544; CHECK-GI-NEXT:    ld1.8b { v0, v1, v2, v3 }, [x0]
3545; CHECK-GI-NEXT:    add x8, x0, #32
3546; CHECK-GI-NEXT:    str x8, [x1]
3547; CHECK-GI-NEXT:    ret
3548  %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr %A)
3549  %tmp = getelementptr i8, ptr %A, i32 32
3550  store ptr %tmp, ptr %ptr
3551  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
3552}
3553
3554define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
3555; CHECK-SD-LABEL: test_v8i8_post_reg_ld1x4:
3556; CHECK-SD:       ; %bb.0:
3557; CHECK-SD-NEXT:    ld1.8b { v0, v1, v2, v3 }, [x0], x2
3558; CHECK-SD-NEXT:    str x0, [x1]
3559; CHECK-SD-NEXT:    ret
3560;
3561; CHECK-GI-LABEL: test_v8i8_post_reg_ld1x4:
3562; CHECK-GI:       ; %bb.0:
3563; CHECK-GI-NEXT:    ld1.8b { v0, v1, v2, v3 }, [x0]
3564; CHECK-GI-NEXT:    add x8, x0, x2
3565; CHECK-GI-NEXT:    str x8, [x1]
3566; CHECK-GI-NEXT:    ret
3567  %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr %A)
3568  %tmp = getelementptr i8, ptr %A, i64 %inc
3569  store ptr %tmp, ptr %ptr
3570  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
3571}
3572
3573declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr)
3574
3575
3576define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4(ptr %A, ptr %ptr) {
3577; CHECK-SD-LABEL: test_v8i16_post_imm_ld1x4:
3578; CHECK-SD:       ; %bb.0:
3579; CHECK-SD-NEXT:    ld1.8h { v0, v1, v2, v3 }, [x0], #64
3580; CHECK-SD-NEXT:    str x0, [x1]
3581; CHECK-SD-NEXT:    ret
3582;
3583; CHECK-GI-LABEL: test_v8i16_post_imm_ld1x4:
3584; CHECK-GI:       ; %bb.0:
3585; CHECK-GI-NEXT:    ld1.8h { v0, v1, v2, v3 }, [x0]
3586; CHECK-GI-NEXT:    add x8, x0, #64
3587; CHECK-GI-NEXT:    str x8, [x1]
3588; CHECK-GI-NEXT:    ret
3589  %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr %A)
3590  %tmp = getelementptr i16, ptr %A, i32 32
3591  store ptr %tmp, ptr %ptr
3592  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
3593}
3594
3595define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
3596; CHECK-SD-LABEL: test_v8i16_post_reg_ld1x4:
3597; CHECK-SD:       ; %bb.0:
3598; CHECK-SD-NEXT:    lsl x8, x2, #1
3599; CHECK-SD-NEXT:    ld1.8h { v0, v1, v2, v3 }, [x0], x8
3600; CHECK-SD-NEXT:    str x0, [x1]
3601; CHECK-SD-NEXT:    ret
3602;
3603; CHECK-GI-LABEL: test_v8i16_post_reg_ld1x4:
3604; CHECK-GI:       ; %bb.0:
3605; CHECK-GI-NEXT:    ld1.8h { v0, v1, v2, v3 }, [x0]
3606; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
3607; CHECK-GI-NEXT:    str x8, [x1]
3608; CHECK-GI-NEXT:    ret
3609  %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr %A)
3610  %tmp = getelementptr i16, ptr %A, i64 %inc
3611  store ptr %tmp, ptr %ptr
3612  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
3613}
3614
3615declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr)
3616
3617
3618define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4(ptr %A, ptr %ptr) {
3619; CHECK-SD-LABEL: test_v4i16_post_imm_ld1x4:
3620; CHECK-SD:       ; %bb.0:
3621; CHECK-SD-NEXT:    ld1.4h { v0, v1, v2, v3 }, [x0], #32
3622; CHECK-SD-NEXT:    str x0, [x1]
3623; CHECK-SD-NEXT:    ret
3624;
3625; CHECK-GI-LABEL: test_v4i16_post_imm_ld1x4:
3626; CHECK-GI:       ; %bb.0:
3627; CHECK-GI-NEXT:    ld1.4h { v0, v1, v2, v3 }, [x0]
3628; CHECK-GI-NEXT:    add x8, x0, #32
3629; CHECK-GI-NEXT:    str x8, [x1]
3630; CHECK-GI-NEXT:    ret
3631  %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr %A)
3632  %tmp = getelementptr i16, ptr %A, i32 16
3633  store ptr %tmp, ptr %ptr
3634  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
3635}
3636
3637define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
3638; CHECK-SD-LABEL: test_v4i16_post_reg_ld1x4:
3639; CHECK-SD:       ; %bb.0:
3640; CHECK-SD-NEXT:    lsl x8, x2, #1
3641; CHECK-SD-NEXT:    ld1.4h { v0, v1, v2, v3 }, [x0], x8
3642; CHECK-SD-NEXT:    str x0, [x1]
3643; CHECK-SD-NEXT:    ret
3644;
3645; CHECK-GI-LABEL: test_v4i16_post_reg_ld1x4:
3646; CHECK-GI:       ; %bb.0:
3647; CHECK-GI-NEXT:    ld1.4h { v0, v1, v2, v3 }, [x0]
3648; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
3649; CHECK-GI-NEXT:    str x8, [x1]
3650; CHECK-GI-NEXT:    ret
3651  %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr %A)
3652  %tmp = getelementptr i16, ptr %A, i64 %inc
3653  store ptr %tmp, ptr %ptr
3654  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
3655}
3656
3657declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr)
3658
3659
3660define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4(ptr %A, ptr %ptr) {
3661; CHECK-SD-LABEL: test_v4i32_post_imm_ld1x4:
3662; CHECK-SD:       ; %bb.0:
3663; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0], #64
3664; CHECK-SD-NEXT:    str x0, [x1]
3665; CHECK-SD-NEXT:    ret
3666;
3667; CHECK-GI-LABEL: test_v4i32_post_imm_ld1x4:
3668; CHECK-GI:       ; %bb.0:
3669; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0]
3670; CHECK-GI-NEXT:    add x8, x0, #64
3671; CHECK-GI-NEXT:    str x8, [x1]
3672; CHECK-GI-NEXT:    ret
3673  %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr %A)
3674  %tmp = getelementptr i32, ptr %A, i32 16
3675  store ptr %tmp, ptr %ptr
3676  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
3677}
3678
3679define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
3680; CHECK-SD-LABEL: test_v4i32_post_reg_ld1x4:
3681; CHECK-SD:       ; %bb.0:
3682; CHECK-SD-NEXT:    lsl x8, x2, #2
3683; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0], x8
3684; CHECK-SD-NEXT:    str x0, [x1]
3685; CHECK-SD-NEXT:    ret
3686;
3687; CHECK-GI-LABEL: test_v4i32_post_reg_ld1x4:
3688; CHECK-GI:       ; %bb.0:
3689; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0]
3690; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
3691; CHECK-GI-NEXT:    str x8, [x1]
3692; CHECK-GI-NEXT:    ret
3693  %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr %A)
3694  %tmp = getelementptr i32, ptr %A, i64 %inc
3695  store ptr %tmp, ptr %ptr
3696  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
3697}
3698
3699declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr)
3700
3701
3702define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4(ptr %A, ptr %ptr) {
3703; CHECK-SD-LABEL: test_v2i32_post_imm_ld1x4:
3704; CHECK-SD:       ; %bb.0:
3705; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0], #32
3706; CHECK-SD-NEXT:    str x0, [x1]
3707; CHECK-SD-NEXT:    ret
3708;
3709; CHECK-GI-LABEL: test_v2i32_post_imm_ld1x4:
3710; CHECK-GI:       ; %bb.0:
3711; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0]
3712; CHECK-GI-NEXT:    add x8, x0, #32
3713; CHECK-GI-NEXT:    str x8, [x1]
3714; CHECK-GI-NEXT:    ret
3715  %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr %A)
3716  %tmp = getelementptr i32, ptr %A, i32 8
3717  store ptr %tmp, ptr %ptr
3718  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
3719}
3720
3721define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
3722; CHECK-SD-LABEL: test_v2i32_post_reg_ld1x4:
3723; CHECK-SD:       ; %bb.0:
3724; CHECK-SD-NEXT:    lsl x8, x2, #2
3725; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0], x8
3726; CHECK-SD-NEXT:    str x0, [x1]
3727; CHECK-SD-NEXT:    ret
3728;
3729; CHECK-GI-LABEL: test_v2i32_post_reg_ld1x4:
3730; CHECK-GI:       ; %bb.0:
3731; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0]
3732; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
3733; CHECK-GI-NEXT:    str x8, [x1]
3734; CHECK-GI-NEXT:    ret
3735  %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr %A)
3736  %tmp = getelementptr i32, ptr %A, i64 %inc
3737  store ptr %tmp, ptr %ptr
3738  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
3739}
3740
3741declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr)
3742
3743
3744define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4(ptr %A, ptr %ptr) {
3745; CHECK-SD-LABEL: test_v2i64_post_imm_ld1x4:
3746; CHECK-SD:       ; %bb.0:
3747; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0], #64
3748; CHECK-SD-NEXT:    str x0, [x1]
3749; CHECK-SD-NEXT:    ret
3750;
3751; CHECK-GI-LABEL: test_v2i64_post_imm_ld1x4:
3752; CHECK-GI:       ; %bb.0:
3753; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0]
3754; CHECK-GI-NEXT:    add x8, x0, #64
3755; CHECK-GI-NEXT:    str x8, [x1]
3756; CHECK-GI-NEXT:    ret
3757  %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %A)
3758  %tmp = getelementptr i64, ptr %A, i32 8
3759  store ptr %tmp, ptr %ptr
3760  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
3761}
3762
3763define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
3764; CHECK-SD-LABEL: test_v2i64_post_reg_ld1x4:
3765; CHECK-SD:       ; %bb.0:
3766; CHECK-SD-NEXT:    lsl x8, x2, #3
3767; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0], x8
3768; CHECK-SD-NEXT:    str x0, [x1]
3769; CHECK-SD-NEXT:    ret
3770;
3771; CHECK-GI-LABEL: test_v2i64_post_reg_ld1x4:
3772; CHECK-GI:       ; %bb.0:
3773; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0]
3774; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
3775; CHECK-GI-NEXT:    str x8, [x1]
3776; CHECK-GI-NEXT:    ret
3777  %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %A)
3778  %tmp = getelementptr i64, ptr %A, i64 %inc
3779  store ptr %tmp, ptr %ptr
3780  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
3781}
3782
3783declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr)
3784
3785
3786define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4(ptr %A, ptr %ptr) {
3787; CHECK-SD-LABEL: test_v1i64_post_imm_ld1x4:
3788; CHECK-SD:       ; %bb.0:
3789; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], #32
3790; CHECK-SD-NEXT:    str x0, [x1]
3791; CHECK-SD-NEXT:    ret
3792;
3793; CHECK-GI-LABEL: test_v1i64_post_imm_ld1x4:
3794; CHECK-GI:       ; %bb.0:
3795; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
3796; CHECK-GI-NEXT:    add x8, x0, #32
3797; CHECK-GI-NEXT:    str x8, [x1]
3798; CHECK-GI-NEXT:    ret
3799  %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %A)
3800  %tmp = getelementptr i64, ptr %A, i32 4
3801  store ptr %tmp, ptr %ptr
3802  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
3803}
3804
3805define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
3806; CHECK-SD-LABEL: test_v1i64_post_reg_ld1x4:
3807; CHECK-SD:       ; %bb.0:
3808; CHECK-SD-NEXT:    lsl x8, x2, #3
3809; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], x8
3810; CHECK-SD-NEXT:    str x0, [x1]
3811; CHECK-SD-NEXT:    ret
3812;
3813; CHECK-GI-LABEL: test_v1i64_post_reg_ld1x4:
3814; CHECK-GI:       ; %bb.0:
3815; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
3816; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
3817; CHECK-GI-NEXT:    str x8, [x1]
3818; CHECK-GI-NEXT:    ret
3819  %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %A)
3820  %tmp = getelementptr i64, ptr %A, i64 %inc
3821  store ptr %tmp, ptr %ptr
3822  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
3823}
3824
3825declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr)
3826
3827
3828define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x4(ptr %A, ptr %ptr) {
3829; CHECK-SD-LABEL: test_v4f32_post_imm_ld1x4:
3830; CHECK-SD:       ; %bb.0:
3831; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0], #64
3832; CHECK-SD-NEXT:    str x0, [x1]
3833; CHECK-SD-NEXT:    ret
3834;
3835; CHECK-GI-LABEL: test_v4f32_post_imm_ld1x4:
3836; CHECK-GI:       ; %bb.0:
3837; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0]
3838; CHECK-GI-NEXT:    add x8, x0, #64
3839; CHECK-GI-NEXT:    str x8, [x1]
3840; CHECK-GI-NEXT:    ret
3841  %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr %A)
3842  %tmp = getelementptr float, ptr %A, i32 16
3843  store ptr %tmp, ptr %ptr
3844  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
3845}
3846
3847define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
3848; CHECK-SD-LABEL: test_v4f32_post_reg_ld1x4:
3849; CHECK-SD:       ; %bb.0:
3850; CHECK-SD-NEXT:    lsl x8, x2, #2
3851; CHECK-SD-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0], x8
3852; CHECK-SD-NEXT:    str x0, [x1]
3853; CHECK-SD-NEXT:    ret
3854;
3855; CHECK-GI-LABEL: test_v4f32_post_reg_ld1x4:
3856; CHECK-GI:       ; %bb.0:
3857; CHECK-GI-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0]
3858; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
3859; CHECK-GI-NEXT:    str x8, [x1]
3860; CHECK-GI-NEXT:    ret
3861  %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr %A)
3862  %tmp = getelementptr float, ptr %A, i64 %inc
3863  store ptr %tmp, ptr %ptr
3864  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
3865}
3866
3867declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr)
3868
3869
3870define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x4(ptr %A, ptr %ptr) {
3871; CHECK-SD-LABEL: test_v2f32_post_imm_ld1x4:
3872; CHECK-SD:       ; %bb.0:
3873; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0], #32
3874; CHECK-SD-NEXT:    str x0, [x1]
3875; CHECK-SD-NEXT:    ret
3876;
3877; CHECK-GI-LABEL: test_v2f32_post_imm_ld1x4:
3878; CHECK-GI:       ; %bb.0:
3879; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0]
3880; CHECK-GI-NEXT:    add x8, x0, #32
3881; CHECK-GI-NEXT:    str x8, [x1]
3882; CHECK-GI-NEXT:    ret
3883  %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr %A)
3884  %tmp = getelementptr float, ptr %A, i32 8
3885  store ptr %tmp, ptr %ptr
3886  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
3887}
3888
3889define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
3890; CHECK-SD-LABEL: test_v2f32_post_reg_ld1x4:
3891; CHECK-SD:       ; %bb.0:
3892; CHECK-SD-NEXT:    lsl x8, x2, #2
3893; CHECK-SD-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0], x8
3894; CHECK-SD-NEXT:    str x0, [x1]
3895; CHECK-SD-NEXT:    ret
3896;
3897; CHECK-GI-LABEL: test_v2f32_post_reg_ld1x4:
3898; CHECK-GI:       ; %bb.0:
3899; CHECK-GI-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0]
3900; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
3901; CHECK-GI-NEXT:    str x8, [x1]
3902; CHECK-GI-NEXT:    ret
3903  %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr %A)
3904  %tmp = getelementptr float, ptr %A, i64 %inc
3905  store ptr %tmp, ptr %ptr
3906  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
3907}
3908
3909declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr)
3910
3911
3912define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x4(ptr %A, ptr %ptr) {
3913; CHECK-SD-LABEL: test_v2f64_post_imm_ld1x4:
3914; CHECK-SD:       ; %bb.0:
3915; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0], #64
3916; CHECK-SD-NEXT:    str x0, [x1]
3917; CHECK-SD-NEXT:    ret
3918;
3919; CHECK-GI-LABEL: test_v2f64_post_imm_ld1x4:
3920; CHECK-GI:       ; %bb.0:
3921; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0]
3922; CHECK-GI-NEXT:    add x8, x0, #64
3923; CHECK-GI-NEXT:    str x8, [x1]
3924; CHECK-GI-NEXT:    ret
3925  %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %A)
3926  %tmp = getelementptr double, ptr %A, i32 8
3927  store ptr %tmp, ptr %ptr
3928  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
3929}
3930
3931define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
3932; CHECK-SD-LABEL: test_v2f64_post_reg_ld1x4:
3933; CHECK-SD:       ; %bb.0:
3934; CHECK-SD-NEXT:    lsl x8, x2, #3
3935; CHECK-SD-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0], x8
3936; CHECK-SD-NEXT:    str x0, [x1]
3937; CHECK-SD-NEXT:    ret
3938;
3939; CHECK-GI-LABEL: test_v2f64_post_reg_ld1x4:
3940; CHECK-GI:       ; %bb.0:
3941; CHECK-GI-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0]
3942; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
3943; CHECK-GI-NEXT:    str x8, [x1]
3944; CHECK-GI-NEXT:    ret
3945  %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %A)
3946  %tmp = getelementptr double, ptr %A, i64 %inc
3947  store ptr %tmp, ptr %ptr
3948  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
3949}
3950
3951declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr)
3952
3953
3954define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x4(ptr %A, ptr %ptr) {
3955; CHECK-SD-LABEL: test_v1f64_post_imm_ld1x4:
3956; CHECK-SD:       ; %bb.0:
3957; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], #32
3958; CHECK-SD-NEXT:    str x0, [x1]
3959; CHECK-SD-NEXT:    ret
3960;
3961; CHECK-GI-LABEL: test_v1f64_post_imm_ld1x4:
3962; CHECK-GI:       ; %bb.0:
3963; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
3964; CHECK-GI-NEXT:    add x8, x0, #32
3965; CHECK-GI-NEXT:    str x8, [x1]
3966; CHECK-GI-NEXT:    ret
3967  %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %A)
3968  %tmp = getelementptr double, ptr %A, i32 4
3969  store ptr %tmp, ptr %ptr
3970  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
3971}
3972
3973define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x4(ptr %A, ptr %ptr, i64 %inc) {
3974; CHECK-SD-LABEL: test_v1f64_post_reg_ld1x4:
3975; CHECK-SD:       ; %bb.0:
3976; CHECK-SD-NEXT:    lsl x8, x2, #3
3977; CHECK-SD-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0], x8
3978; CHECK-SD-NEXT:    str x0, [x1]
3979; CHECK-SD-NEXT:    ret
3980;
3981; CHECK-GI-LABEL: test_v1f64_post_reg_ld1x4:
3982; CHECK-GI:       ; %bb.0:
3983; CHECK-GI-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
3984; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
3985; CHECK-GI-NEXT:    str x8, [x1]
3986; CHECK-GI-NEXT:    ret
3987  %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %A)
3988  %tmp = getelementptr double, ptr %A, i64 %inc
3989  store ptr %tmp, ptr %ptr
3990  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
3991}
3992
3993declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr)
3994
3995
3996define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
3997; CHECK-SD-LABEL: test_v16i8_post_imm_ld2r:
3998; CHECK-SD:       ; %bb.0:
3999; CHECK-SD-NEXT:    ld2r.16b { v0, v1 }, [x0], #2
4000; CHECK-SD-NEXT:    str x0, [x1]
4001; CHECK-SD-NEXT:    ret
4002;
4003; CHECK-GI-LABEL: test_v16i8_post_imm_ld2r:
4004; CHECK-GI:       ; %bb.0:
4005; CHECK-GI-NEXT:    ld2r.16b { v0, v1 }, [x0]
4006; CHECK-GI-NEXT:    add x8, x0, #2
4007; CHECK-GI-NEXT:    str x8, [x1]
4008; CHECK-GI-NEXT:    ret
4009  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr %A)
4010  %tmp = getelementptr i8, ptr %A, i32 2
4011  store ptr %tmp, ptr %ptr
4012  ret { <16 x i8>, <16 x i8> } %ld2
4013}
4014
4015define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4016; CHECK-SD-LABEL: test_v16i8_post_reg_ld2r:
4017; CHECK-SD:       ; %bb.0:
4018; CHECK-SD-NEXT:    ld2r.16b { v0, v1 }, [x0], x2
4019; CHECK-SD-NEXT:    str x0, [x1]
4020; CHECK-SD-NEXT:    ret
4021;
4022; CHECK-GI-LABEL: test_v16i8_post_reg_ld2r:
4023; CHECK-GI:       ; %bb.0:
4024; CHECK-GI-NEXT:    ld2r.16b { v0, v1 }, [x0]
4025; CHECK-GI-NEXT:    add x8, x0, x2
4026; CHECK-GI-NEXT:    str x8, [x1]
4027; CHECK-GI-NEXT:    ret
4028  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr %A)
4029  %tmp = getelementptr i8, ptr %A, i64 %inc
4030  store ptr %tmp, ptr %ptr
4031  ret { <16 x i8>, <16 x i8> } %ld2
4032}
4033
4034declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr) nounwind readonly
4035
4036
4037define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
4038; CHECK-SD-LABEL: test_v8i8_post_imm_ld2r:
4039; CHECK-SD:       ; %bb.0:
4040; CHECK-SD-NEXT:    ld2r.8b { v0, v1 }, [x0], #2
4041; CHECK-SD-NEXT:    str x0, [x1]
4042; CHECK-SD-NEXT:    ret
4043;
4044; CHECK-GI-LABEL: test_v8i8_post_imm_ld2r:
4045; CHECK-GI:       ; %bb.0:
4046; CHECK-GI-NEXT:    ld2r.8b { v0, v1 }, [x0]
4047; CHECK-GI-NEXT:    add x8, x0, #2
4048; CHECK-GI-NEXT:    str x8, [x1]
4049; CHECK-GI-NEXT:    ret
4050  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr %A)
4051  %tmp = getelementptr i8, ptr %A, i32 2
4052  store ptr %tmp, ptr %ptr
4053  ret { <8 x i8>, <8 x i8> } %ld2
4054}
4055
4056define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4057; CHECK-SD-LABEL: test_v8i8_post_reg_ld2r:
4058; CHECK-SD:       ; %bb.0:
4059; CHECK-SD-NEXT:    ld2r.8b { v0, v1 }, [x0], x2
4060; CHECK-SD-NEXT:    str x0, [x1]
4061; CHECK-SD-NEXT:    ret
4062;
4063; CHECK-GI-LABEL: test_v8i8_post_reg_ld2r:
4064; CHECK-GI:       ; %bb.0:
4065; CHECK-GI-NEXT:    ld2r.8b { v0, v1 }, [x0]
4066; CHECK-GI-NEXT:    add x8, x0, x2
4067; CHECK-GI-NEXT:    str x8, [x1]
4068; CHECK-GI-NEXT:    ret
4069  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr %A)
4070  %tmp = getelementptr i8, ptr %A, i64 %inc
4071  store ptr %tmp, ptr %ptr
4072  ret { <8 x i8>, <8 x i8> } %ld2
4073}
4074
4075declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr) nounwind readonly
4076
4077
4078define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
4079; CHECK-SD-LABEL: test_v8i16_post_imm_ld2r:
4080; CHECK-SD:       ; %bb.0:
4081; CHECK-SD-NEXT:    ld2r.8h { v0, v1 }, [x0], #4
4082; CHECK-SD-NEXT:    str x0, [x1]
4083; CHECK-SD-NEXT:    ret
4084;
4085; CHECK-GI-LABEL: test_v8i16_post_imm_ld2r:
4086; CHECK-GI:       ; %bb.0:
4087; CHECK-GI-NEXT:    ld2r.8h { v0, v1 }, [x0]
4088; CHECK-GI-NEXT:    add x8, x0, #4
4089; CHECK-GI-NEXT:    str x8, [x1]
4090; CHECK-GI-NEXT:    ret
4091  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0(ptr %A)
4092  %tmp = getelementptr i16, ptr %A, i32 2
4093  store ptr %tmp, ptr %ptr
4094  ret { <8 x i16>, <8 x i16> } %ld2
4095}
4096
4097define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4098; CHECK-SD-LABEL: test_v8i16_post_reg_ld2r:
4099; CHECK-SD:       ; %bb.0:
4100; CHECK-SD-NEXT:    lsl x8, x2, #1
4101; CHECK-SD-NEXT:    ld2r.8h { v0, v1 }, [x0], x8
4102; CHECK-SD-NEXT:    str x0, [x1]
4103; CHECK-SD-NEXT:    ret
4104;
4105; CHECK-GI-LABEL: test_v8i16_post_reg_ld2r:
4106; CHECK-GI:       ; %bb.0:
4107; CHECK-GI-NEXT:    ld2r.8h { v0, v1 }, [x0]
4108; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
4109; CHECK-GI-NEXT:    str x8, [x1]
4110; CHECK-GI-NEXT:    ret
4111  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0(ptr %A)
4112  %tmp = getelementptr i16, ptr %A, i64 %inc
4113  store ptr %tmp, ptr %ptr
4114  ret { <8 x i16>, <8 x i16> } %ld2
4115}
4116
4117declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0(ptr) nounwind readonly
4118
4119
4120define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
4121; CHECK-SD-LABEL: test_v4i16_post_imm_ld2r:
4122; CHECK-SD:       ; %bb.0:
4123; CHECK-SD-NEXT:    ld2r.4h { v0, v1 }, [x0], #4
4124; CHECK-SD-NEXT:    str x0, [x1]
4125; CHECK-SD-NEXT:    ret
4126;
4127; CHECK-GI-LABEL: test_v4i16_post_imm_ld2r:
4128; CHECK-GI:       ; %bb.0:
4129; CHECK-GI-NEXT:    ld2r.4h { v0, v1 }, [x0]
4130; CHECK-GI-NEXT:    add x8, x0, #4
4131; CHECK-GI-NEXT:    str x8, [x1]
4132; CHECK-GI-NEXT:    ret
4133  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0(ptr %A)
4134  %tmp = getelementptr i16, ptr %A, i32 2
4135  store ptr %tmp, ptr %ptr
4136  ret { <4 x i16>, <4 x i16> } %ld2
4137}
4138
4139define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4140; CHECK-SD-LABEL: test_v4i16_post_reg_ld2r:
4141; CHECK-SD:       ; %bb.0:
4142; CHECK-SD-NEXT:    lsl x8, x2, #1
4143; CHECK-SD-NEXT:    ld2r.4h { v0, v1 }, [x0], x8
4144; CHECK-SD-NEXT:    str x0, [x1]
4145; CHECK-SD-NEXT:    ret
4146;
4147; CHECK-GI-LABEL: test_v4i16_post_reg_ld2r:
4148; CHECK-GI:       ; %bb.0:
4149; CHECK-GI-NEXT:    ld2r.4h { v0, v1 }, [x0]
4150; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
4151; CHECK-GI-NEXT:    str x8, [x1]
4152; CHECK-GI-NEXT:    ret
4153  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0(ptr %A)
4154  %tmp = getelementptr i16, ptr %A, i64 %inc
4155  store ptr %tmp, ptr %ptr
4156  ret { <4 x i16>, <4 x i16> } %ld2
4157}
4158
4159declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0(ptr) nounwind readonly
4160
4161
4162define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
4163; CHECK-SD-LABEL: test_v4i32_post_imm_ld2r:
4164; CHECK-SD:       ; %bb.0:
4165; CHECK-SD-NEXT:    ld2r.4s { v0, v1 }, [x0], #8
4166; CHECK-SD-NEXT:    str x0, [x1]
4167; CHECK-SD-NEXT:    ret
4168;
4169; CHECK-GI-LABEL: test_v4i32_post_imm_ld2r:
4170; CHECK-GI:       ; %bb.0:
4171; CHECK-GI-NEXT:    ld2r.4s { v0, v1 }, [x0]
4172; CHECK-GI-NEXT:    add x8, x0, #8
4173; CHECK-GI-NEXT:    str x8, [x1]
4174; CHECK-GI-NEXT:    ret
4175  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0(ptr %A)
4176  %tmp = getelementptr i32, ptr %A, i32 2
4177  store ptr %tmp, ptr %ptr
4178  ret { <4 x i32>, <4 x i32> } %ld2
4179}
4180
4181define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4182; CHECK-SD-LABEL: test_v4i32_post_reg_ld2r:
4183; CHECK-SD:       ; %bb.0:
4184; CHECK-SD-NEXT:    lsl x8, x2, #2
4185; CHECK-SD-NEXT:    ld2r.4s { v0, v1 }, [x0], x8
4186; CHECK-SD-NEXT:    str x0, [x1]
4187; CHECK-SD-NEXT:    ret
4188;
4189; CHECK-GI-LABEL: test_v4i32_post_reg_ld2r:
4190; CHECK-GI:       ; %bb.0:
4191; CHECK-GI-NEXT:    ld2r.4s { v0, v1 }, [x0]
4192; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
4193; CHECK-GI-NEXT:    str x8, [x1]
4194; CHECK-GI-NEXT:    ret
4195  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0(ptr %A)
4196  %tmp = getelementptr i32, ptr %A, i64 %inc
4197  store ptr %tmp, ptr %ptr
4198  ret { <4 x i32>, <4 x i32> } %ld2
4199}
4200
4201declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0(ptr) nounwind readonly
4202
4203define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
4204; CHECK-SD-LABEL: test_v2i32_post_imm_ld2r:
4205; CHECK-SD:       ; %bb.0:
4206; CHECK-SD-NEXT:    ld2r.2s { v0, v1 }, [x0], #8
4207; CHECK-SD-NEXT:    str x0, [x1]
4208; CHECK-SD-NEXT:    ret
4209;
4210; CHECK-GI-LABEL: test_v2i32_post_imm_ld2r:
4211; CHECK-GI:       ; %bb.0:
4212; CHECK-GI-NEXT:    ld2r.2s { v0, v1 }, [x0]
4213; CHECK-GI-NEXT:    add x8, x0, #8
4214; CHECK-GI-NEXT:    str x8, [x1]
4215; CHECK-GI-NEXT:    ret
4216  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0(ptr %A)
4217  %tmp = getelementptr i32, ptr %A, i32 2
4218  store ptr %tmp, ptr %ptr
4219  ret { <2 x i32>, <2 x i32> } %ld2
4220}
4221
4222define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4223; CHECK-SD-LABEL: test_v2i32_post_reg_ld2r:
4224; CHECK-SD:       ; %bb.0:
4225; CHECK-SD-NEXT:    lsl x8, x2, #2
4226; CHECK-SD-NEXT:    ld2r.2s { v0, v1 }, [x0], x8
4227; CHECK-SD-NEXT:    str x0, [x1]
4228; CHECK-SD-NEXT:    ret
4229;
4230; CHECK-GI-LABEL: test_v2i32_post_reg_ld2r:
4231; CHECK-GI:       ; %bb.0:
4232; CHECK-GI-NEXT:    ld2r.2s { v0, v1 }, [x0]
4233; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
4234; CHECK-GI-NEXT:    str x8, [x1]
4235; CHECK-GI-NEXT:    ret
4236  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0(ptr %A)
4237  %tmp = getelementptr i32, ptr %A, i64 %inc
4238  store ptr %tmp, ptr %ptr
4239  ret { <2 x i32>, <2 x i32> } %ld2
4240}
4241
4242declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0(ptr) nounwind readonly
4243
4244
4245define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
4246; CHECK-SD-LABEL: test_v2i64_post_imm_ld2r:
4247; CHECK-SD:       ; %bb.0:
4248; CHECK-SD-NEXT:    ld2r.2d { v0, v1 }, [x0], #16
4249; CHECK-SD-NEXT:    str x0, [x1]
4250; CHECK-SD-NEXT:    ret
4251;
4252; CHECK-GI-LABEL: test_v2i64_post_imm_ld2r:
4253; CHECK-GI:       ; %bb.0:
4254; CHECK-GI-NEXT:    ld2r.2d { v0, v1 }, [x0]
4255; CHECK-GI-NEXT:    add x8, x0, #16
4256; CHECK-GI-NEXT:    str x8, [x1]
4257; CHECK-GI-NEXT:    ret
4258  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %A)
4259  %tmp = getelementptr i64, ptr %A, i32 2
4260  store ptr %tmp, ptr %ptr
4261  ret { <2 x i64>, <2 x i64> } %ld2
4262}
4263
4264define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4265; CHECK-SD-LABEL: test_v2i64_post_reg_ld2r:
4266; CHECK-SD:       ; %bb.0:
4267; CHECK-SD-NEXT:    lsl x8, x2, #3
4268; CHECK-SD-NEXT:    ld2r.2d { v0, v1 }, [x0], x8
4269; CHECK-SD-NEXT:    str x0, [x1]
4270; CHECK-SD-NEXT:    ret
4271;
4272; CHECK-GI-LABEL: test_v2i64_post_reg_ld2r:
4273; CHECK-GI:       ; %bb.0:
4274; CHECK-GI-NEXT:    ld2r.2d { v0, v1 }, [x0]
4275; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
4276; CHECK-GI-NEXT:    str x8, [x1]
4277; CHECK-GI-NEXT:    ret
4278  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %A)
4279  %tmp = getelementptr i64, ptr %A, i64 %inc
4280  store ptr %tmp, ptr %ptr
4281  ret { <2 x i64>, <2 x i64> } %ld2
4282}
4283
4284declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr) nounwind readonly
4285
4286define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
4287; CHECK-SD-LABEL: test_v1i64_post_imm_ld2r:
4288; CHECK-SD:       ; %bb.0:
4289; CHECK-SD-NEXT:    ld2r.1d { v0, v1 }, [x0], #16
4290; CHECK-SD-NEXT:    str x0, [x1]
4291; CHECK-SD-NEXT:    ret
4292;
4293; CHECK-GI-LABEL: test_v1i64_post_imm_ld2r:
4294; CHECK-GI:       ; %bb.0:
4295; CHECK-GI-NEXT:    ld2r.1d { v0, v1 }, [x0]
4296; CHECK-GI-NEXT:    add x8, x0, #16
4297; CHECK-GI-NEXT:    str x8, [x1]
4298; CHECK-GI-NEXT:    ret
4299  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0(ptr %A)
4300  %tmp = getelementptr i64, ptr %A, i32 2
4301  store ptr %tmp, ptr %ptr
4302  ret { <1 x i64>, <1 x i64> } %ld2
4303}
4304
4305define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4306; CHECK-SD-LABEL: test_v1i64_post_reg_ld2r:
4307; CHECK-SD:       ; %bb.0:
4308; CHECK-SD-NEXT:    lsl x8, x2, #3
4309; CHECK-SD-NEXT:    ld2r.1d { v0, v1 }, [x0], x8
4310; CHECK-SD-NEXT:    str x0, [x1]
4311; CHECK-SD-NEXT:    ret
4312;
4313; CHECK-GI-LABEL: test_v1i64_post_reg_ld2r:
4314; CHECK-GI:       ; %bb.0:
4315; CHECK-GI-NEXT:    ld2r.1d { v0, v1 }, [x0]
4316; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
4317; CHECK-GI-NEXT:    str x8, [x1]
4318; CHECK-GI-NEXT:    ret
4319  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0(ptr %A)
4320  %tmp = getelementptr i64, ptr %A, i64 %inc
4321  store ptr %tmp, ptr %ptr
4322  ret { <1 x i64>, <1 x i64> } %ld2
4323}
4324
4325declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0(ptr) nounwind readonly
4326
4327
4328define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
4329; CHECK-SD-LABEL: test_v4f32_post_imm_ld2r:
4330; CHECK-SD:       ; %bb.0:
4331; CHECK-SD-NEXT:    ld2r.4s { v0, v1 }, [x0], #8
4332; CHECK-SD-NEXT:    str x0, [x1]
4333; CHECK-SD-NEXT:    ret
4334;
4335; CHECK-GI-LABEL: test_v4f32_post_imm_ld2r:
4336; CHECK-GI:       ; %bb.0:
4337; CHECK-GI-NEXT:    ld2r.4s { v0, v1 }, [x0]
4338; CHECK-GI-NEXT:    add x8, x0, #8
4339; CHECK-GI-NEXT:    str x8, [x1]
4340; CHECK-GI-NEXT:    ret
4341  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0(ptr %A)
4342  %tmp = getelementptr float, ptr %A, i32 2
4343  store ptr %tmp, ptr %ptr
4344  ret { <4 x float>, <4 x float> } %ld2
4345}
4346
4347define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4348; CHECK-SD-LABEL: test_v4f32_post_reg_ld2r:
4349; CHECK-SD:       ; %bb.0:
4350; CHECK-SD-NEXT:    lsl x8, x2, #2
4351; CHECK-SD-NEXT:    ld2r.4s { v0, v1 }, [x0], x8
4352; CHECK-SD-NEXT:    str x0, [x1]
4353; CHECK-SD-NEXT:    ret
4354;
4355; CHECK-GI-LABEL: test_v4f32_post_reg_ld2r:
4356; CHECK-GI:       ; %bb.0:
4357; CHECK-GI-NEXT:    ld2r.4s { v0, v1 }, [x0]
4358; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
4359; CHECK-GI-NEXT:    str x8, [x1]
4360; CHECK-GI-NEXT:    ret
4361  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0(ptr %A)
4362  %tmp = getelementptr float, ptr %A, i64 %inc
4363  store ptr %tmp, ptr %ptr
4364  ret { <4 x float>, <4 x float> } %ld2
4365}
4366
4367declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0(ptr) nounwind readonly
4368
4369define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
4370; CHECK-SD-LABEL: test_v2f32_post_imm_ld2r:
4371; CHECK-SD:       ; %bb.0:
4372; CHECK-SD-NEXT:    ld2r.2s { v0, v1 }, [x0], #8
4373; CHECK-SD-NEXT:    str x0, [x1]
4374; CHECK-SD-NEXT:    ret
4375;
4376; CHECK-GI-LABEL: test_v2f32_post_imm_ld2r:
4377; CHECK-GI:       ; %bb.0:
4378; CHECK-GI-NEXT:    ld2r.2s { v0, v1 }, [x0]
4379; CHECK-GI-NEXT:    add x8, x0, #8
4380; CHECK-GI-NEXT:    str x8, [x1]
4381; CHECK-GI-NEXT:    ret
4382  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0(ptr %A)
4383  %tmp = getelementptr float, ptr %A, i32 2
4384  store ptr %tmp, ptr %ptr
4385  ret { <2 x float>, <2 x float> } %ld2
4386}
4387
4388define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4389; CHECK-SD-LABEL: test_v2f32_post_reg_ld2r:
4390; CHECK-SD:       ; %bb.0:
4391; CHECK-SD-NEXT:    lsl x8, x2, #2
4392; CHECK-SD-NEXT:    ld2r.2s { v0, v1 }, [x0], x8
4393; CHECK-SD-NEXT:    str x0, [x1]
4394; CHECK-SD-NEXT:    ret
4395;
4396; CHECK-GI-LABEL: test_v2f32_post_reg_ld2r:
4397; CHECK-GI:       ; %bb.0:
4398; CHECK-GI-NEXT:    ld2r.2s { v0, v1 }, [x0]
4399; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
4400; CHECK-GI-NEXT:    str x8, [x1]
4401; CHECK-GI-NEXT:    ret
4402  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0(ptr %A)
4403  %tmp = getelementptr float, ptr %A, i64 %inc
4404  store ptr %tmp, ptr %ptr
4405  ret { <2 x float>, <2 x float> } %ld2
4406}
4407
4408declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0(ptr) nounwind readonly
4409
4410
4411define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
4412; CHECK-SD-LABEL: test_v2f64_post_imm_ld2r:
4413; CHECK-SD:       ; %bb.0:
4414; CHECK-SD-NEXT:    ld2r.2d { v0, v1 }, [x0], #16
4415; CHECK-SD-NEXT:    str x0, [x1]
4416; CHECK-SD-NEXT:    ret
4417;
4418; CHECK-GI-LABEL: test_v2f64_post_imm_ld2r:
4419; CHECK-GI:       ; %bb.0:
4420; CHECK-GI-NEXT:    ld2r.2d { v0, v1 }, [x0]
4421; CHECK-GI-NEXT:    add x8, x0, #16
4422; CHECK-GI-NEXT:    str x8, [x1]
4423; CHECK-GI-NEXT:    ret
4424  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0(ptr %A)
4425  %tmp = getelementptr double, ptr %A, i32 2
4426  store ptr %tmp, ptr %ptr
4427  ret { <2 x double>, <2 x double> } %ld2
4428}
4429
4430define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4431; CHECK-SD-LABEL: test_v2f64_post_reg_ld2r:
4432; CHECK-SD:       ; %bb.0:
4433; CHECK-SD-NEXT:    lsl x8, x2, #3
4434; CHECK-SD-NEXT:    ld2r.2d { v0, v1 }, [x0], x8
4435; CHECK-SD-NEXT:    str x0, [x1]
4436; CHECK-SD-NEXT:    ret
4437;
4438; CHECK-GI-LABEL: test_v2f64_post_reg_ld2r:
4439; CHECK-GI:       ; %bb.0:
4440; CHECK-GI-NEXT:    ld2r.2d { v0, v1 }, [x0]
4441; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
4442; CHECK-GI-NEXT:    str x8, [x1]
4443; CHECK-GI-NEXT:    ret
4444  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0(ptr %A)
4445  %tmp = getelementptr double, ptr %A, i64 %inc
4446  store ptr %tmp, ptr %ptr
4447  ret { <2 x double>, <2 x double> } %ld2
4448}
4449
4450declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0(ptr) nounwind readonly
4451
4452define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(ptr %A, ptr %ptr) nounwind {
4453; CHECK-SD-LABEL: test_v1f64_post_imm_ld2r:
4454; CHECK-SD:       ; %bb.0:
4455; CHECK-SD-NEXT:    ld2r.1d { v0, v1 }, [x0], #16
4456; CHECK-SD-NEXT:    str x0, [x1]
4457; CHECK-SD-NEXT:    ret
4458;
4459; CHECK-GI-LABEL: test_v1f64_post_imm_ld2r:
4460; CHECK-GI:       ; %bb.0:
4461; CHECK-GI-NEXT:    ld2r.1d { v0, v1 }, [x0]
4462; CHECK-GI-NEXT:    add x8, x0, #16
4463; CHECK-GI-NEXT:    str x8, [x1]
4464; CHECK-GI-NEXT:    ret
4465  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0(ptr %A)
4466  %tmp = getelementptr double, ptr %A, i32 2
4467  store ptr %tmp, ptr %ptr
4468  ret { <1 x double>, <1 x double> } %ld2
4469}
4470
4471define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4472; CHECK-SD-LABEL: test_v1f64_post_reg_ld2r:
4473; CHECK-SD:       ; %bb.0:
4474; CHECK-SD-NEXT:    lsl x8, x2, #3
4475; CHECK-SD-NEXT:    ld2r.1d { v0, v1 }, [x0], x8
4476; CHECK-SD-NEXT:    str x0, [x1]
4477; CHECK-SD-NEXT:    ret
4478;
4479; CHECK-GI-LABEL: test_v1f64_post_reg_ld2r:
4480; CHECK-GI:       ; %bb.0:
4481; CHECK-GI-NEXT:    ld2r.1d { v0, v1 }, [x0]
4482; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
4483; CHECK-GI-NEXT:    str x8, [x1]
4484; CHECK-GI-NEXT:    ret
4485  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0(ptr %A)
4486  %tmp = getelementptr double, ptr %A, i64 %inc
4487  store ptr %tmp, ptr %ptr
4488  ret { <1 x double>, <1 x double> } %ld2
4489}
4490
4491declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0(ptr) nounwind readonly
4492
4493
4494define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
4495; CHECK-SD-LABEL: test_v16i8_post_imm_ld3r:
4496; CHECK-SD:       ; %bb.0:
4497; CHECK-SD-NEXT:    ld3r.16b { v0, v1, v2 }, [x0], #3
4498; CHECK-SD-NEXT:    str x0, [x1]
4499; CHECK-SD-NEXT:    ret
4500;
4501; CHECK-GI-LABEL: test_v16i8_post_imm_ld3r:
4502; CHECK-GI:       ; %bb.0:
4503; CHECK-GI-NEXT:    ld3r.16b { v0, v1, v2 }, [x0]
4504; CHECK-GI-NEXT:    add x8, x0, #3
4505; CHECK-GI-NEXT:    str x8, [x1]
4506; CHECK-GI-NEXT:    ret
4507  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr %A)
4508  %tmp = getelementptr i8, ptr %A, i32 3
4509  store ptr %tmp, ptr %ptr
4510  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
4511}
4512
4513define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4514; CHECK-SD-LABEL: test_v16i8_post_reg_ld3r:
4515; CHECK-SD:       ; %bb.0:
4516; CHECK-SD-NEXT:    ld3r.16b { v0, v1, v2 }, [x0], x2
4517; CHECK-SD-NEXT:    str x0, [x1]
4518; CHECK-SD-NEXT:    ret
4519;
4520; CHECK-GI-LABEL: test_v16i8_post_reg_ld3r:
4521; CHECK-GI:       ; %bb.0:
4522; CHECK-GI-NEXT:    ld3r.16b { v0, v1, v2 }, [x0]
4523; CHECK-GI-NEXT:    add x8, x0, x2
4524; CHECK-GI-NEXT:    str x8, [x1]
4525; CHECK-GI-NEXT:    ret
4526  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr %A)
4527  %tmp = getelementptr i8, ptr %A, i64 %inc
4528  store ptr %tmp, ptr %ptr
4529  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
4530}
4531
4532declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr) nounwind readonly
4533
4534
4535define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
4536; CHECK-SD-LABEL: test_v8i8_post_imm_ld3r:
4537; CHECK-SD:       ; %bb.0:
4538; CHECK-SD-NEXT:    ld3r.8b { v0, v1, v2 }, [x0], #3
4539; CHECK-SD-NEXT:    str x0, [x1]
4540; CHECK-SD-NEXT:    ret
4541;
4542; CHECK-GI-LABEL: test_v8i8_post_imm_ld3r:
4543; CHECK-GI:       ; %bb.0:
4544; CHECK-GI-NEXT:    ld3r.8b { v0, v1, v2 }, [x0]
4545; CHECK-GI-NEXT:    add x8, x0, #3
4546; CHECK-GI-NEXT:    str x8, [x1]
4547; CHECK-GI-NEXT:    ret
4548  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr %A)
4549  %tmp = getelementptr i8, ptr %A, i32 3
4550  store ptr %tmp, ptr %ptr
4551  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
4552}
4553
4554define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4555; CHECK-SD-LABEL: test_v8i8_post_reg_ld3r:
4556; CHECK-SD:       ; %bb.0:
4557; CHECK-SD-NEXT:    ld3r.8b { v0, v1, v2 }, [x0], x2
4558; CHECK-SD-NEXT:    str x0, [x1]
4559; CHECK-SD-NEXT:    ret
4560;
4561; CHECK-GI-LABEL: test_v8i8_post_reg_ld3r:
4562; CHECK-GI:       ; %bb.0:
4563; CHECK-GI-NEXT:    ld3r.8b { v0, v1, v2 }, [x0]
4564; CHECK-GI-NEXT:    add x8, x0, x2
4565; CHECK-GI-NEXT:    str x8, [x1]
4566; CHECK-GI-NEXT:    ret
4567  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr %A)
4568  %tmp = getelementptr i8, ptr %A, i64 %inc
4569  store ptr %tmp, ptr %ptr
4570  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
4571}
4572
4573declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr) nounwind readonly
4574
4575
4576define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
4577; CHECK-SD-LABEL: test_v8i16_post_imm_ld3r:
4578; CHECK-SD:       ; %bb.0:
4579; CHECK-SD-NEXT:    ld3r.8h { v0, v1, v2 }, [x0], #6
4580; CHECK-SD-NEXT:    str x0, [x1]
4581; CHECK-SD-NEXT:    ret
4582;
4583; CHECK-GI-LABEL: test_v8i16_post_imm_ld3r:
4584; CHECK-GI:       ; %bb.0:
4585; CHECK-GI-NEXT:    ld3r.8h { v0, v1, v2 }, [x0]
4586; CHECK-GI-NEXT:    add x8, x0, #6
4587; CHECK-GI-NEXT:    str x8, [x1]
4588; CHECK-GI-NEXT:    ret
4589  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0(ptr %A)
4590  %tmp = getelementptr i16, ptr %A, i32 3
4591  store ptr %tmp, ptr %ptr
4592  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
4593}
4594
4595define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4596; CHECK-SD-LABEL: test_v8i16_post_reg_ld3r:
4597; CHECK-SD:       ; %bb.0:
4598; CHECK-SD-NEXT:    lsl x8, x2, #1
4599; CHECK-SD-NEXT:    ld3r.8h { v0, v1, v2 }, [x0], x8
4600; CHECK-SD-NEXT:    str x0, [x1]
4601; CHECK-SD-NEXT:    ret
4602;
4603; CHECK-GI-LABEL: test_v8i16_post_reg_ld3r:
4604; CHECK-GI:       ; %bb.0:
4605; CHECK-GI-NEXT:    ld3r.8h { v0, v1, v2 }, [x0]
4606; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
4607; CHECK-GI-NEXT:    str x8, [x1]
4608; CHECK-GI-NEXT:    ret
4609  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0(ptr %A)
4610  %tmp = getelementptr i16, ptr %A, i64 %inc
4611  store ptr %tmp, ptr %ptr
4612  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
4613}
4614
4615declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0(ptr) nounwind readonly
4616
4617
4618define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
4619; CHECK-SD-LABEL: test_v4i16_post_imm_ld3r:
4620; CHECK-SD:       ; %bb.0:
4621; CHECK-SD-NEXT:    ld3r.4h { v0, v1, v2 }, [x0], #6
4622; CHECK-SD-NEXT:    str x0, [x1]
4623; CHECK-SD-NEXT:    ret
4624;
4625; CHECK-GI-LABEL: test_v4i16_post_imm_ld3r:
4626; CHECK-GI:       ; %bb.0:
4627; CHECK-GI-NEXT:    ld3r.4h { v0, v1, v2 }, [x0]
4628; CHECK-GI-NEXT:    add x8, x0, #6
4629; CHECK-GI-NEXT:    str x8, [x1]
4630; CHECK-GI-NEXT:    ret
4631  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0(ptr %A)
4632  %tmp = getelementptr i16, ptr %A, i32 3
4633  store ptr %tmp, ptr %ptr
4634  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
4635}
4636
4637define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4638; CHECK-SD-LABEL: test_v4i16_post_reg_ld3r:
4639; CHECK-SD:       ; %bb.0:
4640; CHECK-SD-NEXT:    lsl x8, x2, #1
4641; CHECK-SD-NEXT:    ld3r.4h { v0, v1, v2 }, [x0], x8
4642; CHECK-SD-NEXT:    str x0, [x1]
4643; CHECK-SD-NEXT:    ret
4644;
4645; CHECK-GI-LABEL: test_v4i16_post_reg_ld3r:
4646; CHECK-GI:       ; %bb.0:
4647; CHECK-GI-NEXT:    ld3r.4h { v0, v1, v2 }, [x0]
4648; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
4649; CHECK-GI-NEXT:    str x8, [x1]
4650; CHECK-GI-NEXT:    ret
4651  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0(ptr %A)
4652  %tmp = getelementptr i16, ptr %A, i64 %inc
4653  store ptr %tmp, ptr %ptr
4654  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
4655}
4656
4657declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0(ptr) nounwind readonly
4658
4659
4660define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
4661; CHECK-SD-LABEL: test_v4i32_post_imm_ld3r:
4662; CHECK-SD:       ; %bb.0:
4663; CHECK-SD-NEXT:    ld3r.4s { v0, v1, v2 }, [x0], #12
4664; CHECK-SD-NEXT:    str x0, [x1]
4665; CHECK-SD-NEXT:    ret
4666;
4667; CHECK-GI-LABEL: test_v4i32_post_imm_ld3r:
4668; CHECK-GI:       ; %bb.0:
4669; CHECK-GI-NEXT:    ld3r.4s { v0, v1, v2 }, [x0]
4670; CHECK-GI-NEXT:    add x8, x0, #12
4671; CHECK-GI-NEXT:    str x8, [x1]
4672; CHECK-GI-NEXT:    ret
4673  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0(ptr %A)
4674  %tmp = getelementptr i32, ptr %A, i32 3
4675  store ptr %tmp, ptr %ptr
4676  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
4677}
4678
4679define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4680; CHECK-SD-LABEL: test_v4i32_post_reg_ld3r:
4681; CHECK-SD:       ; %bb.0:
4682; CHECK-SD-NEXT:    lsl x8, x2, #2
4683; CHECK-SD-NEXT:    ld3r.4s { v0, v1, v2 }, [x0], x8
4684; CHECK-SD-NEXT:    str x0, [x1]
4685; CHECK-SD-NEXT:    ret
4686;
4687; CHECK-GI-LABEL: test_v4i32_post_reg_ld3r:
4688; CHECK-GI:       ; %bb.0:
4689; CHECK-GI-NEXT:    ld3r.4s { v0, v1, v2 }, [x0]
4690; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
4691; CHECK-GI-NEXT:    str x8, [x1]
4692; CHECK-GI-NEXT:    ret
4693  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0(ptr %A)
4694  %tmp = getelementptr i32, ptr %A, i64 %inc
4695  store ptr %tmp, ptr %ptr
4696  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
4697}
4698
4699declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0(ptr) nounwind readonly
4700
4701define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
4702; CHECK-SD-LABEL: test_v2i32_post_imm_ld3r:
4703; CHECK-SD:       ; %bb.0:
4704; CHECK-SD-NEXT:    ld3r.2s { v0, v1, v2 }, [x0], #12
4705; CHECK-SD-NEXT:    str x0, [x1]
4706; CHECK-SD-NEXT:    ret
4707;
4708; CHECK-GI-LABEL: test_v2i32_post_imm_ld3r:
4709; CHECK-GI:       ; %bb.0:
4710; CHECK-GI-NEXT:    ld3r.2s { v0, v1, v2 }, [x0]
4711; CHECK-GI-NEXT:    add x8, x0, #12
4712; CHECK-GI-NEXT:    str x8, [x1]
4713; CHECK-GI-NEXT:    ret
4714  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0(ptr %A)
4715  %tmp = getelementptr i32, ptr %A, i32 3
4716  store ptr %tmp, ptr %ptr
4717  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
4718}
4719
4720define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4721; CHECK-SD-LABEL: test_v2i32_post_reg_ld3r:
4722; CHECK-SD:       ; %bb.0:
4723; CHECK-SD-NEXT:    lsl x8, x2, #2
4724; CHECK-SD-NEXT:    ld3r.2s { v0, v1, v2 }, [x0], x8
4725; CHECK-SD-NEXT:    str x0, [x1]
4726; CHECK-SD-NEXT:    ret
4727;
4728; CHECK-GI-LABEL: test_v2i32_post_reg_ld3r:
4729; CHECK-GI:       ; %bb.0:
4730; CHECK-GI-NEXT:    ld3r.2s { v0, v1, v2 }, [x0]
4731; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
4732; CHECK-GI-NEXT:    str x8, [x1]
4733; CHECK-GI-NEXT:    ret
4734  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0(ptr %A)
4735  %tmp = getelementptr i32, ptr %A, i64 %inc
4736  store ptr %tmp, ptr %ptr
4737  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
4738}
4739
4740declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0(ptr) nounwind readonly
4741
4742
4743define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
4744; CHECK-SD-LABEL: test_v2i64_post_imm_ld3r:
4745; CHECK-SD:       ; %bb.0:
4746; CHECK-SD-NEXT:    ld3r.2d { v0, v1, v2 }, [x0], #24
4747; CHECK-SD-NEXT:    str x0, [x1]
4748; CHECK-SD-NEXT:    ret
4749;
4750; CHECK-GI-LABEL: test_v2i64_post_imm_ld3r:
4751; CHECK-GI:       ; %bb.0:
4752; CHECK-GI-NEXT:    ld3r.2d { v0, v1, v2 }, [x0]
4753; CHECK-GI-NEXT:    add x8, x0, #24
4754; CHECK-GI-NEXT:    str x8, [x1]
4755; CHECK-GI-NEXT:    ret
4756  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %A)
4757  %tmp = getelementptr i64, ptr %A, i32 3
4758  store ptr %tmp, ptr %ptr
4759  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
4760}
4761
4762define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4763; CHECK-SD-LABEL: test_v2i64_post_reg_ld3r:
4764; CHECK-SD:       ; %bb.0:
4765; CHECK-SD-NEXT:    lsl x8, x2, #3
4766; CHECK-SD-NEXT:    ld3r.2d { v0, v1, v2 }, [x0], x8
4767; CHECK-SD-NEXT:    str x0, [x1]
4768; CHECK-SD-NEXT:    ret
4769;
4770; CHECK-GI-LABEL: test_v2i64_post_reg_ld3r:
4771; CHECK-GI:       ; %bb.0:
4772; CHECK-GI-NEXT:    ld3r.2d { v0, v1, v2 }, [x0]
4773; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
4774; CHECK-GI-NEXT:    str x8, [x1]
4775; CHECK-GI-NEXT:    ret
4776  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %A)
4777  %tmp = getelementptr i64, ptr %A, i64 %inc
4778  store ptr %tmp, ptr %ptr
4779  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
4780}
4781
4782declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr) nounwind readonly
4783
4784define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
4785; CHECK-SD-LABEL: test_v1i64_post_imm_ld3r:
4786; CHECK-SD:       ; %bb.0:
4787; CHECK-SD-NEXT:    ld3r.1d { v0, v1, v2 }, [x0], #24
4788; CHECK-SD-NEXT:    str x0, [x1]
4789; CHECK-SD-NEXT:    ret
4790;
4791; CHECK-GI-LABEL: test_v1i64_post_imm_ld3r:
4792; CHECK-GI:       ; %bb.0:
4793; CHECK-GI-NEXT:    ld3r.1d { v0, v1, v2 }, [x0]
4794; CHECK-GI-NEXT:    add x8, x0, #24
4795; CHECK-GI-NEXT:    str x8, [x1]
4796; CHECK-GI-NEXT:    ret
4797  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0(ptr %A)
4798  %tmp = getelementptr i64, ptr %A, i32 3
4799  store ptr %tmp, ptr %ptr
4800  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
4801}
4802
4803define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4804; CHECK-SD-LABEL: test_v1i64_post_reg_ld3r:
4805; CHECK-SD:       ; %bb.0:
4806; CHECK-SD-NEXT:    lsl x8, x2, #3
4807; CHECK-SD-NEXT:    ld3r.1d { v0, v1, v2 }, [x0], x8
4808; CHECK-SD-NEXT:    str x0, [x1]
4809; CHECK-SD-NEXT:    ret
4810;
4811; CHECK-GI-LABEL: test_v1i64_post_reg_ld3r:
4812; CHECK-GI:       ; %bb.0:
4813; CHECK-GI-NEXT:    ld3r.1d { v0, v1, v2 }, [x0]
4814; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
4815; CHECK-GI-NEXT:    str x8, [x1]
4816; CHECK-GI-NEXT:    ret
4817  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0(ptr %A)
4818  %tmp = getelementptr i64, ptr %A, i64 %inc
4819  store ptr %tmp, ptr %ptr
4820  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
4821}
4822
4823declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0(ptr) nounwind readonly
4824
4825
4826define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
4827; CHECK-SD-LABEL: test_v4f32_post_imm_ld3r:
4828; CHECK-SD:       ; %bb.0:
4829; CHECK-SD-NEXT:    ld3r.4s { v0, v1, v2 }, [x0], #12
4830; CHECK-SD-NEXT:    str x0, [x1]
4831; CHECK-SD-NEXT:    ret
4832;
4833; CHECK-GI-LABEL: test_v4f32_post_imm_ld3r:
4834; CHECK-GI:       ; %bb.0:
4835; CHECK-GI-NEXT:    ld3r.4s { v0, v1, v2 }, [x0]
4836; CHECK-GI-NEXT:    add x8, x0, #12
4837; CHECK-GI-NEXT:    str x8, [x1]
4838; CHECK-GI-NEXT:    ret
4839  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0(ptr %A)
4840  %tmp = getelementptr float, ptr %A, i32 3
4841  store ptr %tmp, ptr %ptr
4842  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
4843}
4844
4845define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4846; CHECK-SD-LABEL: test_v4f32_post_reg_ld3r:
4847; CHECK-SD:       ; %bb.0:
4848; CHECK-SD-NEXT:    lsl x8, x2, #2
4849; CHECK-SD-NEXT:    ld3r.4s { v0, v1, v2 }, [x0], x8
4850; CHECK-SD-NEXT:    str x0, [x1]
4851; CHECK-SD-NEXT:    ret
4852;
4853; CHECK-GI-LABEL: test_v4f32_post_reg_ld3r:
4854; CHECK-GI:       ; %bb.0:
4855; CHECK-GI-NEXT:    ld3r.4s { v0, v1, v2 }, [x0]
4856; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
4857; CHECK-GI-NEXT:    str x8, [x1]
4858; CHECK-GI-NEXT:    ret
4859  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0(ptr %A)
4860  %tmp = getelementptr float, ptr %A, i64 %inc
4861  store ptr %tmp, ptr %ptr
4862  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
4863}
4864
4865declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0(ptr) nounwind readonly
4866
4867define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
4868; CHECK-SD-LABEL: test_v2f32_post_imm_ld3r:
4869; CHECK-SD:       ; %bb.0:
4870; CHECK-SD-NEXT:    ld3r.2s { v0, v1, v2 }, [x0], #12
4871; CHECK-SD-NEXT:    str x0, [x1]
4872; CHECK-SD-NEXT:    ret
4873;
4874; CHECK-GI-LABEL: test_v2f32_post_imm_ld3r:
4875; CHECK-GI:       ; %bb.0:
4876; CHECK-GI-NEXT:    ld3r.2s { v0, v1, v2 }, [x0]
4877; CHECK-GI-NEXT:    add x8, x0, #12
4878; CHECK-GI-NEXT:    str x8, [x1]
4879; CHECK-GI-NEXT:    ret
4880  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0(ptr %A)
4881  %tmp = getelementptr float, ptr %A, i32 3
4882  store ptr %tmp, ptr %ptr
4883  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
4884}
4885
4886define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4887; CHECK-SD-LABEL: test_v2f32_post_reg_ld3r:
4888; CHECK-SD:       ; %bb.0:
4889; CHECK-SD-NEXT:    lsl x8, x2, #2
4890; CHECK-SD-NEXT:    ld3r.2s { v0, v1, v2 }, [x0], x8
4891; CHECK-SD-NEXT:    str x0, [x1]
4892; CHECK-SD-NEXT:    ret
4893;
4894; CHECK-GI-LABEL: test_v2f32_post_reg_ld3r:
4895; CHECK-GI:       ; %bb.0:
4896; CHECK-GI-NEXT:    ld3r.2s { v0, v1, v2 }, [x0]
4897; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
4898; CHECK-GI-NEXT:    str x8, [x1]
4899; CHECK-GI-NEXT:    ret
4900  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0(ptr %A)
4901  %tmp = getelementptr float, ptr %A, i64 %inc
4902  store ptr %tmp, ptr %ptr
4903  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
4904}
4905
4906declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0(ptr) nounwind readonly
4907
4908
4909define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
4910; CHECK-SD-LABEL: test_v2f64_post_imm_ld3r:
4911; CHECK-SD:       ; %bb.0:
4912; CHECK-SD-NEXT:    ld3r.2d { v0, v1, v2 }, [x0], #24
4913; CHECK-SD-NEXT:    str x0, [x1]
4914; CHECK-SD-NEXT:    ret
4915;
4916; CHECK-GI-LABEL: test_v2f64_post_imm_ld3r:
4917; CHECK-GI:       ; %bb.0:
4918; CHECK-GI-NEXT:    ld3r.2d { v0, v1, v2 }, [x0]
4919; CHECK-GI-NEXT:    add x8, x0, #24
4920; CHECK-GI-NEXT:    str x8, [x1]
4921; CHECK-GI-NEXT:    ret
4922  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0(ptr %A)
4923  %tmp = getelementptr double, ptr %A, i32 3
4924  store ptr %tmp, ptr %ptr
4925  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
4926}
4927
4928define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4929; CHECK-SD-LABEL: test_v2f64_post_reg_ld3r:
4930; CHECK-SD:       ; %bb.0:
4931; CHECK-SD-NEXT:    lsl x8, x2, #3
4932; CHECK-SD-NEXT:    ld3r.2d { v0, v1, v2 }, [x0], x8
4933; CHECK-SD-NEXT:    str x0, [x1]
4934; CHECK-SD-NEXT:    ret
4935;
4936; CHECK-GI-LABEL: test_v2f64_post_reg_ld3r:
4937; CHECK-GI:       ; %bb.0:
4938; CHECK-GI-NEXT:    ld3r.2d { v0, v1, v2 }, [x0]
4939; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
4940; CHECK-GI-NEXT:    str x8, [x1]
4941; CHECK-GI-NEXT:    ret
4942  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0(ptr %A)
4943  %tmp = getelementptr double, ptr %A, i64 %inc
4944  store ptr %tmp, ptr %ptr
4945  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
4946}
4947
4948declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0(ptr) nounwind readonly
4949
4950define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(ptr %A, ptr %ptr) nounwind {
4951; CHECK-SD-LABEL: test_v1f64_post_imm_ld3r:
4952; CHECK-SD:       ; %bb.0:
4953; CHECK-SD-NEXT:    ld3r.1d { v0, v1, v2 }, [x0], #24
4954; CHECK-SD-NEXT:    str x0, [x1]
4955; CHECK-SD-NEXT:    ret
4956;
4957; CHECK-GI-LABEL: test_v1f64_post_imm_ld3r:
4958; CHECK-GI:       ; %bb.0:
4959; CHECK-GI-NEXT:    ld3r.1d { v0, v1, v2 }, [x0]
4960; CHECK-GI-NEXT:    add x8, x0, #24
4961; CHECK-GI-NEXT:    str x8, [x1]
4962; CHECK-GI-NEXT:    ret
4963  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0(ptr %A)
4964  %tmp = getelementptr double, ptr %A, i32 3
4965  store ptr %tmp, ptr %ptr
4966  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
4967}
4968
4969define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(ptr %A, ptr %ptr, i64 %inc) nounwind {
4970; CHECK-SD-LABEL: test_v1f64_post_reg_ld3r:
4971; CHECK-SD:       ; %bb.0:
4972; CHECK-SD-NEXT:    lsl x8, x2, #3
4973; CHECK-SD-NEXT:    ld3r.1d { v0, v1, v2 }, [x0], x8
4974; CHECK-SD-NEXT:    str x0, [x1]
4975; CHECK-SD-NEXT:    ret
4976;
4977; CHECK-GI-LABEL: test_v1f64_post_reg_ld3r:
4978; CHECK-GI:       ; %bb.0:
4979; CHECK-GI-NEXT:    ld3r.1d { v0, v1, v2 }, [x0]
4980; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
4981; CHECK-GI-NEXT:    str x8, [x1]
4982; CHECK-GI-NEXT:    ret
4983  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0(ptr %A)
4984  %tmp = getelementptr double, ptr %A, i64 %inc
4985  store ptr %tmp, ptr %ptr
4986  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
4987}
4988
4989declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0(ptr) nounwind readonly
4990
4991
4992define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
4993; CHECK-SD-LABEL: test_v16i8_post_imm_ld4r:
4994; CHECK-SD:       ; %bb.0:
4995; CHECK-SD-NEXT:    ld4r.16b { v0, v1, v2, v3 }, [x0], #4
4996; CHECK-SD-NEXT:    str x0, [x1]
4997; CHECK-SD-NEXT:    ret
4998;
4999; CHECK-GI-LABEL: test_v16i8_post_imm_ld4r:
5000; CHECK-GI:       ; %bb.0:
5001; CHECK-GI-NEXT:    ld4r.16b { v0, v1, v2, v3 }, [x0]
5002; CHECK-GI-NEXT:    add x8, x0, #4
5003; CHECK-GI-NEXT:    str x8, [x1]
5004; CHECK-GI-NEXT:    ret
5005  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr %A)
5006  %tmp = getelementptr i8, ptr %A, i32 4
5007  store ptr %tmp, ptr %ptr
5008  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
5009}
5010
5011define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
5012; CHECK-SD-LABEL: test_v16i8_post_reg_ld4r:
5013; CHECK-SD:       ; %bb.0:
5014; CHECK-SD-NEXT:    ld4r.16b { v0, v1, v2, v3 }, [x0], x2
5015; CHECK-SD-NEXT:    str x0, [x1]
5016; CHECK-SD-NEXT:    ret
5017;
5018; CHECK-GI-LABEL: test_v16i8_post_reg_ld4r:
5019; CHECK-GI:       ; %bb.0:
5020; CHECK-GI-NEXT:    ld4r.16b { v0, v1, v2, v3 }, [x0]
5021; CHECK-GI-NEXT:    add x8, x0, x2
5022; CHECK-GI-NEXT:    str x8, [x1]
5023; CHECK-GI-NEXT:    ret
5024  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr %A)
5025  %tmp = getelementptr i8, ptr %A, i64 %inc
5026  store ptr %tmp, ptr %ptr
5027  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
5028}
5029
5030declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr) nounwind readonly
5031
5032
5033define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
5034; CHECK-SD-LABEL: test_v8i8_post_imm_ld4r:
5035; CHECK-SD:       ; %bb.0:
5036; CHECK-SD-NEXT:    ld4r.8b { v0, v1, v2, v3 }, [x0], #4
5037; CHECK-SD-NEXT:    str x0, [x1]
5038; CHECK-SD-NEXT:    ret
5039;
5040; CHECK-GI-LABEL: test_v8i8_post_imm_ld4r:
5041; CHECK-GI:       ; %bb.0:
5042; CHECK-GI-NEXT:    ld4r.8b { v0, v1, v2, v3 }, [x0]
5043; CHECK-GI-NEXT:    add x8, x0, #4
5044; CHECK-GI-NEXT:    str x8, [x1]
5045; CHECK-GI-NEXT:    ret
5046  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr %A)
5047  %tmp = getelementptr i8, ptr %A, i32 4
5048  store ptr %tmp, ptr %ptr
5049  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
5050}
5051
5052define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
5053; CHECK-SD-LABEL: test_v8i8_post_reg_ld4r:
5054; CHECK-SD:       ; %bb.0:
5055; CHECK-SD-NEXT:    ld4r.8b { v0, v1, v2, v3 }, [x0], x2
5056; CHECK-SD-NEXT:    str x0, [x1]
5057; CHECK-SD-NEXT:    ret
5058;
5059; CHECK-GI-LABEL: test_v8i8_post_reg_ld4r:
5060; CHECK-GI:       ; %bb.0:
5061; CHECK-GI-NEXT:    ld4r.8b { v0, v1, v2, v3 }, [x0]
5062; CHECK-GI-NEXT:    add x8, x0, x2
5063; CHECK-GI-NEXT:    str x8, [x1]
5064; CHECK-GI-NEXT:    ret
5065  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr %A)
5066  %tmp = getelementptr i8, ptr %A, i64 %inc
5067  store ptr %tmp, ptr %ptr
5068  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
5069}
5070
5071declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr) nounwind readonly
5072
5073
5074define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
5075; CHECK-SD-LABEL: test_v8i16_post_imm_ld4r:
5076; CHECK-SD:       ; %bb.0:
5077; CHECK-SD-NEXT:    ld4r.8h { v0, v1, v2, v3 }, [x0], #8
5078; CHECK-SD-NEXT:    str x0, [x1]
5079; CHECK-SD-NEXT:    ret
5080;
5081; CHECK-GI-LABEL: test_v8i16_post_imm_ld4r:
5082; CHECK-GI:       ; %bb.0:
5083; CHECK-GI-NEXT:    ld4r.8h { v0, v1, v2, v3 }, [x0]
5084; CHECK-GI-NEXT:    add x8, x0, #8
5085; CHECK-GI-NEXT:    str x8, [x1]
5086; CHECK-GI-NEXT:    ret
5087  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0(ptr %A)
5088  %tmp = getelementptr i16, ptr %A, i32 4
5089  store ptr %tmp, ptr %ptr
5090  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
5091}
5092
5093define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
5094; CHECK-SD-LABEL: test_v8i16_post_reg_ld4r:
5095; CHECK-SD:       ; %bb.0:
5096; CHECK-SD-NEXT:    lsl x8, x2, #1
5097; CHECK-SD-NEXT:    ld4r.8h { v0, v1, v2, v3 }, [x0], x8
5098; CHECK-SD-NEXT:    str x0, [x1]
5099; CHECK-SD-NEXT:    ret
5100;
5101; CHECK-GI-LABEL: test_v8i16_post_reg_ld4r:
5102; CHECK-GI:       ; %bb.0:
5103; CHECK-GI-NEXT:    ld4r.8h { v0, v1, v2, v3 }, [x0]
5104; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
5105; CHECK-GI-NEXT:    str x8, [x1]
5106; CHECK-GI-NEXT:    ret
5107  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0(ptr %A)
5108  %tmp = getelementptr i16, ptr %A, i64 %inc
5109  store ptr %tmp, ptr %ptr
5110  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
5111}
5112
5113declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0(ptr) nounwind readonly
5114
5115
5116define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
5117; CHECK-SD-LABEL: test_v4i16_post_imm_ld4r:
5118; CHECK-SD:       ; %bb.0:
5119; CHECK-SD-NEXT:    ld4r.4h { v0, v1, v2, v3 }, [x0], #8
5120; CHECK-SD-NEXT:    str x0, [x1]
5121; CHECK-SD-NEXT:    ret
5122;
5123; CHECK-GI-LABEL: test_v4i16_post_imm_ld4r:
5124; CHECK-GI:       ; %bb.0:
5125; CHECK-GI-NEXT:    ld4r.4h { v0, v1, v2, v3 }, [x0]
5126; CHECK-GI-NEXT:    add x8, x0, #8
5127; CHECK-GI-NEXT:    str x8, [x1]
5128; CHECK-GI-NEXT:    ret
5129  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0(ptr %A)
5130  %tmp = getelementptr i16, ptr %A, i32 4
5131  store ptr %tmp, ptr %ptr
5132  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
5133}
5134
5135define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
5136; CHECK-SD-LABEL: test_v4i16_post_reg_ld4r:
5137; CHECK-SD:       ; %bb.0:
5138; CHECK-SD-NEXT:    lsl x8, x2, #1
5139; CHECK-SD-NEXT:    ld4r.4h { v0, v1, v2, v3 }, [x0], x8
5140; CHECK-SD-NEXT:    str x0, [x1]
5141; CHECK-SD-NEXT:    ret
5142;
5143; CHECK-GI-LABEL: test_v4i16_post_reg_ld4r:
5144; CHECK-GI:       ; %bb.0:
5145; CHECK-GI-NEXT:    ld4r.4h { v0, v1, v2, v3 }, [x0]
5146; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
5147; CHECK-GI-NEXT:    str x8, [x1]
5148; CHECK-GI-NEXT:    ret
5149  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0(ptr %A)
5150  %tmp = getelementptr i16, ptr %A, i64 %inc
5151  store ptr %tmp, ptr %ptr
5152  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
5153}
5154
5155declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0(ptr) nounwind readonly
5156
5157
5158define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
5159; CHECK-SD-LABEL: test_v4i32_post_imm_ld4r:
5160; CHECK-SD:       ; %bb.0:
5161; CHECK-SD-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0], #16
5162; CHECK-SD-NEXT:    str x0, [x1]
5163; CHECK-SD-NEXT:    ret
5164;
5165; CHECK-GI-LABEL: test_v4i32_post_imm_ld4r:
5166; CHECK-GI:       ; %bb.0:
5167; CHECK-GI-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0]
5168; CHECK-GI-NEXT:    add x8, x0, #16
5169; CHECK-GI-NEXT:    str x8, [x1]
5170; CHECK-GI-NEXT:    ret
5171  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0(ptr %A)
5172  %tmp = getelementptr i32, ptr %A, i32 4
5173  store ptr %tmp, ptr %ptr
5174  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
5175}
5176
5177define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
5178; CHECK-SD-LABEL: test_v4i32_post_reg_ld4r:
5179; CHECK-SD:       ; %bb.0:
5180; CHECK-SD-NEXT:    lsl x8, x2, #2
5181; CHECK-SD-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0], x8
5182; CHECK-SD-NEXT:    str x0, [x1]
5183; CHECK-SD-NEXT:    ret
5184;
5185; CHECK-GI-LABEL: test_v4i32_post_reg_ld4r:
5186; CHECK-GI:       ; %bb.0:
5187; CHECK-GI-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0]
5188; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
5189; CHECK-GI-NEXT:    str x8, [x1]
5190; CHECK-GI-NEXT:    ret
5191  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0(ptr %A)
5192  %tmp = getelementptr i32, ptr %A, i64 %inc
5193  store ptr %tmp, ptr %ptr
5194  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
5195}
5196
5197declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0(ptr) nounwind readonly
5198
5199define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
5200; CHECK-SD-LABEL: test_v2i32_post_imm_ld4r:
5201; CHECK-SD:       ; %bb.0:
5202; CHECK-SD-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0], #16
5203; CHECK-SD-NEXT:    str x0, [x1]
5204; CHECK-SD-NEXT:    ret
5205;
5206; CHECK-GI-LABEL: test_v2i32_post_imm_ld4r:
5207; CHECK-GI:       ; %bb.0:
5208; CHECK-GI-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0]
5209; CHECK-GI-NEXT:    add x8, x0, #16
5210; CHECK-GI-NEXT:    str x8, [x1]
5211; CHECK-GI-NEXT:    ret
5212  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0(ptr %A)
5213  %tmp = getelementptr i32, ptr %A, i32 4
5214  store ptr %tmp, ptr %ptr
5215  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
5216}
5217
5218define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
5219; CHECK-SD-LABEL: test_v2i32_post_reg_ld4r:
5220; CHECK-SD:       ; %bb.0:
5221; CHECK-SD-NEXT:    lsl x8, x2, #2
5222; CHECK-SD-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0], x8
5223; CHECK-SD-NEXT:    str x0, [x1]
5224; CHECK-SD-NEXT:    ret
5225;
5226; CHECK-GI-LABEL: test_v2i32_post_reg_ld4r:
5227; CHECK-GI:       ; %bb.0:
5228; CHECK-GI-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0]
5229; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
5230; CHECK-GI-NEXT:    str x8, [x1]
5231; CHECK-GI-NEXT:    ret
5232  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0(ptr %A)
5233  %tmp = getelementptr i32, ptr %A, i64 %inc
5234  store ptr %tmp, ptr %ptr
5235  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
5236}
5237
5238declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0(ptr) nounwind readonly
5239
5240
5241define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
5242; CHECK-SD-LABEL: test_v2i64_post_imm_ld4r:
5243; CHECK-SD:       ; %bb.0:
5244; CHECK-SD-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0], #32
5245; CHECK-SD-NEXT:    str x0, [x1]
5246; CHECK-SD-NEXT:    ret
5247;
5248; CHECK-GI-LABEL: test_v2i64_post_imm_ld4r:
5249; CHECK-GI:       ; %bb.0:
5250; CHECK-GI-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0]
5251; CHECK-GI-NEXT:    add x8, x0, #32
5252; CHECK-GI-NEXT:    str x8, [x1]
5253; CHECK-GI-NEXT:    ret
5254  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %A)
5255  %tmp = getelementptr i64, ptr %A, i32 4
5256  store ptr %tmp, ptr %ptr
5257  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
5258}
5259
5260define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
5261; CHECK-SD-LABEL: test_v2i64_post_reg_ld4r:
5262; CHECK-SD:       ; %bb.0:
5263; CHECK-SD-NEXT:    lsl x8, x2, #3
5264; CHECK-SD-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0], x8
5265; CHECK-SD-NEXT:    str x0, [x1]
5266; CHECK-SD-NEXT:    ret
5267;
5268; CHECK-GI-LABEL: test_v2i64_post_reg_ld4r:
5269; CHECK-GI:       ; %bb.0:
5270; CHECK-GI-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0]
5271; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
5272; CHECK-GI-NEXT:    str x8, [x1]
5273; CHECK-GI-NEXT:    ret
5274  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %A)
5275  %tmp = getelementptr i64, ptr %A, i64 %inc
5276  store ptr %tmp, ptr %ptr
5277  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
5278}
5279
5280declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly
5281
5282define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
5283; CHECK-SD-LABEL: test_v1i64_post_imm_ld4r:
5284; CHECK-SD:       ; %bb.0:
5285; CHECK-SD-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0], #32
5286; CHECK-SD-NEXT:    str x0, [x1]
5287; CHECK-SD-NEXT:    ret
5288;
5289; CHECK-GI-LABEL: test_v1i64_post_imm_ld4r:
5290; CHECK-GI:       ; %bb.0:
5291; CHECK-GI-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0]
5292; CHECK-GI-NEXT:    add x8, x0, #32
5293; CHECK-GI-NEXT:    str x8, [x1]
5294; CHECK-GI-NEXT:    ret
5295  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0(ptr %A)
5296  %tmp = getelementptr i64, ptr %A, i32 4
5297  store ptr %tmp, ptr %ptr
5298  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
5299}
5300
5301define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
5302; CHECK-SD-LABEL: test_v1i64_post_reg_ld4r:
5303; CHECK-SD:       ; %bb.0:
5304; CHECK-SD-NEXT:    lsl x8, x2, #3
5305; CHECK-SD-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0], x8
5306; CHECK-SD-NEXT:    str x0, [x1]
5307; CHECK-SD-NEXT:    ret
5308;
5309; CHECK-GI-LABEL: test_v1i64_post_reg_ld4r:
5310; CHECK-GI:       ; %bb.0:
5311; CHECK-GI-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0]
5312; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
5313; CHECK-GI-NEXT:    str x8, [x1]
5314; CHECK-GI-NEXT:    ret
5315  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0(ptr %A)
5316  %tmp = getelementptr i64, ptr %A, i64 %inc
5317  store ptr %tmp, ptr %ptr
5318  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
5319}
5320
5321declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0(ptr) nounwind readonly
5322
5323
5324define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
5325; CHECK-SD-LABEL: test_v4f32_post_imm_ld4r:
5326; CHECK-SD:       ; %bb.0:
5327; CHECK-SD-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0], #16
5328; CHECK-SD-NEXT:    str x0, [x1]
5329; CHECK-SD-NEXT:    ret
5330;
5331; CHECK-GI-LABEL: test_v4f32_post_imm_ld4r:
5332; CHECK-GI:       ; %bb.0:
5333; CHECK-GI-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0]
5334; CHECK-GI-NEXT:    add x8, x0, #16
5335; CHECK-GI-NEXT:    str x8, [x1]
5336; CHECK-GI-NEXT:    ret
5337  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0(ptr %A)
5338  %tmp = getelementptr float, ptr %A, i32 4
5339  store ptr %tmp, ptr %ptr
5340  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
5341}
5342
5343define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
5344; CHECK-SD-LABEL: test_v4f32_post_reg_ld4r:
5345; CHECK-SD:       ; %bb.0:
5346; CHECK-SD-NEXT:    lsl x8, x2, #2
5347; CHECK-SD-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0], x8
5348; CHECK-SD-NEXT:    str x0, [x1]
5349; CHECK-SD-NEXT:    ret
5350;
5351; CHECK-GI-LABEL: test_v4f32_post_reg_ld4r:
5352; CHECK-GI:       ; %bb.0:
5353; CHECK-GI-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0]
5354; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
5355; CHECK-GI-NEXT:    str x8, [x1]
5356; CHECK-GI-NEXT:    ret
5357  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0(ptr %A)
5358  %tmp = getelementptr float, ptr %A, i64 %inc
5359  store ptr %tmp, ptr %ptr
5360  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
5361}
5362
5363declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0(ptr) nounwind readonly
5364
5365define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
5366; CHECK-SD-LABEL: test_v2f32_post_imm_ld4r:
5367; CHECK-SD:       ; %bb.0:
5368; CHECK-SD-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0], #16
5369; CHECK-SD-NEXT:    str x0, [x1]
5370; CHECK-SD-NEXT:    ret
5371;
5372; CHECK-GI-LABEL: test_v2f32_post_imm_ld4r:
5373; CHECK-GI:       ; %bb.0:
5374; CHECK-GI-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0]
5375; CHECK-GI-NEXT:    add x8, x0, #16
5376; CHECK-GI-NEXT:    str x8, [x1]
5377; CHECK-GI-NEXT:    ret
5378  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0(ptr %A)
5379  %tmp = getelementptr float, ptr %A, i32 4
5380  store ptr %tmp, ptr %ptr
5381  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
5382}
5383
5384define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
5385; CHECK-SD-LABEL: test_v2f32_post_reg_ld4r:
5386; CHECK-SD:       ; %bb.0:
5387; CHECK-SD-NEXT:    lsl x8, x2, #2
5388; CHECK-SD-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0], x8
5389; CHECK-SD-NEXT:    str x0, [x1]
5390; CHECK-SD-NEXT:    ret
5391;
5392; CHECK-GI-LABEL: test_v2f32_post_reg_ld4r:
5393; CHECK-GI:       ; %bb.0:
5394; CHECK-GI-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0]
5395; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
5396; CHECK-GI-NEXT:    str x8, [x1]
5397; CHECK-GI-NEXT:    ret
5398  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0(ptr %A)
5399  %tmp = getelementptr float, ptr %A, i64 %inc
5400  store ptr %tmp, ptr %ptr
5401  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
5402}
5403
5404declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0(ptr) nounwind readonly
5405
5406
5407define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
5408; CHECK-SD-LABEL: test_v2f64_post_imm_ld4r:
5409; CHECK-SD:       ; %bb.0:
5410; CHECK-SD-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0], #32
5411; CHECK-SD-NEXT:    str x0, [x1]
5412; CHECK-SD-NEXT:    ret
5413;
5414; CHECK-GI-LABEL: test_v2f64_post_imm_ld4r:
5415; CHECK-GI:       ; %bb.0:
5416; CHECK-GI-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0]
5417; CHECK-GI-NEXT:    add x8, x0, #32
5418; CHECK-GI-NEXT:    str x8, [x1]
5419; CHECK-GI-NEXT:    ret
5420  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0(ptr %A)
5421  %tmp = getelementptr double, ptr %A, i32 4
5422  store ptr %tmp, ptr %ptr
5423  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
5424}
5425
5426define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
5427; CHECK-SD-LABEL: test_v2f64_post_reg_ld4r:
5428; CHECK-SD:       ; %bb.0:
5429; CHECK-SD-NEXT:    lsl x8, x2, #3
5430; CHECK-SD-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0], x8
5431; CHECK-SD-NEXT:    str x0, [x1]
5432; CHECK-SD-NEXT:    ret
5433;
5434; CHECK-GI-LABEL: test_v2f64_post_reg_ld4r:
5435; CHECK-GI:       ; %bb.0:
5436; CHECK-GI-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0]
5437; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
5438; CHECK-GI-NEXT:    str x8, [x1]
5439; CHECK-GI-NEXT:    ret
5440  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0(ptr %A)
5441  %tmp = getelementptr double, ptr %A, i64 %inc
5442  store ptr %tmp, ptr %ptr
5443  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
5444}
5445
5446declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0(ptr) nounwind readonly
5447
5448define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4r(ptr %A, ptr %ptr) nounwind {
5449; CHECK-SD-LABEL: test_v1f64_post_imm_ld4r:
5450; CHECK-SD:       ; %bb.0:
5451; CHECK-SD-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0], #32
5452; CHECK-SD-NEXT:    str x0, [x1]
5453; CHECK-SD-NEXT:    ret
5454;
5455; CHECK-GI-LABEL: test_v1f64_post_imm_ld4r:
5456; CHECK-GI:       ; %bb.0:
5457; CHECK-GI-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0]
5458; CHECK-GI-NEXT:    add x8, x0, #32
5459; CHECK-GI-NEXT:    str x8, [x1]
5460; CHECK-GI-NEXT:    ret
5461  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0(ptr %A)
5462  %tmp = getelementptr double, ptr %A, i32 4
5463  store ptr %tmp, ptr %ptr
5464  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
5465}
5466
5467define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4r(ptr %A, ptr %ptr, i64 %inc) nounwind {
5468; CHECK-SD-LABEL: test_v1f64_post_reg_ld4r:
5469; CHECK-SD:       ; %bb.0:
5470; CHECK-SD-NEXT:    lsl x8, x2, #3
5471; CHECK-SD-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0], x8
5472; CHECK-SD-NEXT:    str x0, [x1]
5473; CHECK-SD-NEXT:    ret
5474;
5475; CHECK-GI-LABEL: test_v1f64_post_reg_ld4r:
5476; CHECK-GI:       ; %bb.0:
5477; CHECK-GI-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0]
5478; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
5479; CHECK-GI-NEXT:    str x8, [x1]
5480; CHECK-GI-NEXT:    ret
5481  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0(ptr %A)
5482  %tmp = getelementptr double, ptr %A, i64 %inc
5483  store ptr %tmp, ptr %ptr
5484  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
5485}
5486
5487declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0(ptr) nounwind readonly
5488
5489
5490define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
5491; CHECK-SD-LABEL: test_v16i8_post_imm_ld2lane:
5492; CHECK-SD:       ; %bb.0:
5493; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5494; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5495; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], #2
5496; CHECK-SD-NEXT:    str x0, [x1]
5497; CHECK-SD-NEXT:    ret
5498;
5499; CHECK-GI-LABEL: test_v16i8_post_imm_ld2lane:
5500; CHECK-GI:       ; %bb.0:
5501; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5502; CHECK-GI-NEXT:    add x8, x0, #2
5503; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5504; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
5505; CHECK-GI-NEXT:    str x8, [x1]
5506; CHECK-GI-NEXT:    ret
5507  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
5508  %tmp = getelementptr i8, ptr %A, i32 2
5509  store ptr %tmp, ptr %ptr
5510  ret { <16 x i8>, <16 x i8> } %ld2
5511}
5512
5513define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind {
5514; CHECK-SD-LABEL: test_v16i8_post_reg_ld2lane:
5515; CHECK-SD:       ; %bb.0:
5516; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5517; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5518; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], x2
5519; CHECK-SD-NEXT:    str x0, [x1]
5520; CHECK-SD-NEXT:    ret
5521;
5522; CHECK-GI-LABEL: test_v16i8_post_reg_ld2lane:
5523; CHECK-GI:       ; %bb.0:
5524; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5525; CHECK-GI-NEXT:    add x8, x0, x2
5526; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5527; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
5528; CHECK-GI-NEXT:    str x8, [x1]
5529; CHECK-GI-NEXT:    ret
5530  %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
5531  %tmp = getelementptr i8, ptr %A, i64 %inc
5532  store ptr %tmp, ptr %ptr
5533  ret { <16 x i8>, <16 x i8> } %ld2
5534}
5535
5536declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr) nounwind readonly
5537
5538
5539define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
5540; CHECK-SD-LABEL: test_v8i8_post_imm_ld2lane:
5541; CHECK-SD:       ; %bb.0:
5542; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5543; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5544; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], #2
5545; CHECK-SD-NEXT:    str x0, [x1]
5546; CHECK-SD-NEXT:    ret
5547;
5548; CHECK-GI-LABEL: test_v8i8_post_imm_ld2lane:
5549; CHECK-GI:       ; %bb.0:
5550; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5551; CHECK-GI-NEXT:    add x8, x0, #2
5552; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5553; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
5554; CHECK-GI-NEXT:    str x8, [x1]
5555; CHECK-GI-NEXT:    ret
5556  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
5557  %tmp = getelementptr i8, ptr %A, i32 2
5558  store ptr %tmp, ptr %ptr
5559  ret { <8 x i8>, <8 x i8> } %ld2
5560}
5561
5562define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind {
5563; CHECK-SD-LABEL: test_v8i8_post_reg_ld2lane:
5564; CHECK-SD:       ; %bb.0:
5565; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5566; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5567; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], x2
5568; CHECK-SD-NEXT:    str x0, [x1]
5569; CHECK-SD-NEXT:    ret
5570;
5571; CHECK-GI-LABEL: test_v8i8_post_reg_ld2lane:
5572; CHECK-GI:       ; %bb.0:
5573; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5574; CHECK-GI-NEXT:    add x8, x0, x2
5575; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5576; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
5577; CHECK-GI-NEXT:    str x8, [x1]
5578; CHECK-GI-NEXT:    ret
5579  %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
5580  %tmp = getelementptr i8, ptr %A, i64 %inc
5581  store ptr %tmp, ptr %ptr
5582  ret { <8 x i8>, <8 x i8> } %ld2
5583}
5584
5585declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8>, <8 x i8>, i64, ptr) nounwind readonly
5586
5587
5588define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
5589; CHECK-SD-LABEL: test_v8i16_post_imm_ld2lane:
5590; CHECK-SD:       ; %bb.0:
5591; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5592; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5593; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], #4
5594; CHECK-SD-NEXT:    str x0, [x1]
5595; CHECK-SD-NEXT:    ret
5596;
5597; CHECK-GI-LABEL: test_v8i16_post_imm_ld2lane:
5598; CHECK-GI:       ; %bb.0:
5599; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5600; CHECK-GI-NEXT:    add x8, x0, #4
5601; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5602; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
5603; CHECK-GI-NEXT:    str x8, [x1]
5604; CHECK-GI-NEXT:    ret
5605  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
5606  %tmp = getelementptr i16, ptr %A, i32 2
5607  store ptr %tmp, ptr %ptr
5608  ret { <8 x i16>, <8 x i16> } %ld2
5609}
5610
5611define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind {
5612; CHECK-SD-LABEL: test_v8i16_post_reg_ld2lane:
5613; CHECK-SD:       ; %bb.0:
5614; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5615; CHECK-SD-NEXT:    lsl x8, x2, #1
5616; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5617; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], x8
5618; CHECK-SD-NEXT:    str x0, [x1]
5619; CHECK-SD-NEXT:    ret
5620;
5621; CHECK-GI-LABEL: test_v8i16_post_reg_ld2lane:
5622; CHECK-GI:       ; %bb.0:
5623; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5624; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
5625; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5626; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
5627; CHECK-GI-NEXT:    str x8, [x1]
5628; CHECK-GI-NEXT:    ret
5629  %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
5630  %tmp = getelementptr i16, ptr %A, i64 %inc
5631  store ptr %tmp, ptr %ptr
5632  ret { <8 x i16>, <8 x i16> } %ld2
5633}
5634
5635declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr) nounwind readonly
5636
5637
5638define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
5639; CHECK-SD-LABEL: test_v4i16_post_imm_ld2lane:
5640; CHECK-SD:       ; %bb.0:
5641; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5642; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5643; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], #4
5644; CHECK-SD-NEXT:    str x0, [x1]
5645; CHECK-SD-NEXT:    ret
5646;
5647; CHECK-GI-LABEL: test_v4i16_post_imm_ld2lane:
5648; CHECK-GI:       ; %bb.0:
5649; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5650; CHECK-GI-NEXT:    add x8, x0, #4
5651; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5652; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
5653; CHECK-GI-NEXT:    str x8, [x1]
5654; CHECK-GI-NEXT:    ret
5655  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
5656  %tmp = getelementptr i16, ptr %A, i32 2
5657  store ptr %tmp, ptr %ptr
5658  ret { <4 x i16>, <4 x i16> } %ld2
5659}
5660
5661define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind {
5662; CHECK-SD-LABEL: test_v4i16_post_reg_ld2lane:
5663; CHECK-SD:       ; %bb.0:
5664; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5665; CHECK-SD-NEXT:    lsl x8, x2, #1
5666; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5667; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], x8
5668; CHECK-SD-NEXT:    str x0, [x1]
5669; CHECK-SD-NEXT:    ret
5670;
5671; CHECK-GI-LABEL: test_v4i16_post_reg_ld2lane:
5672; CHECK-GI:       ; %bb.0:
5673; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5674; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
5675; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5676; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
5677; CHECK-GI-NEXT:    str x8, [x1]
5678; CHECK-GI-NEXT:    ret
5679  %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
5680  %tmp = getelementptr i16, ptr %A, i64 %inc
5681  store ptr %tmp, ptr %ptr
5682  ret { <4 x i16>, <4 x i16> } %ld2
5683}
5684
5685declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16>, <4 x i16>, i64, ptr) nounwind readonly
5686
5687
5688define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
5689; CHECK-SD-LABEL: test_v4i32_post_imm_ld2lane:
5690; CHECK-SD:       ; %bb.0:
5691; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5692; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5693; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
5694; CHECK-SD-NEXT:    str x0, [x1]
5695; CHECK-SD-NEXT:    ret
5696;
5697; CHECK-GI-LABEL: test_v4i32_post_imm_ld2lane:
5698; CHECK-GI:       ; %bb.0:
5699; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5700; CHECK-GI-NEXT:    add x8, x0, #8
5701; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5702; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
5703; CHECK-GI-NEXT:    str x8, [x1]
5704; CHECK-GI-NEXT:    ret
5705  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
5706  %tmp = getelementptr i32, ptr %A, i32 2
5707  store ptr %tmp, ptr %ptr
5708  ret { <4 x i32>, <4 x i32> } %ld2
5709}
5710
5711define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind {
5712; CHECK-SD-LABEL: test_v4i32_post_reg_ld2lane:
5713; CHECK-SD:       ; %bb.0:
5714; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5715; CHECK-SD-NEXT:    lsl x8, x2, #2
5716; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5717; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
5718; CHECK-SD-NEXT:    str x0, [x1]
5719; CHECK-SD-NEXT:    ret
5720;
5721; CHECK-GI-LABEL: test_v4i32_post_reg_ld2lane:
5722; CHECK-GI:       ; %bb.0:
5723; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5724; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
5725; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5726; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
5727; CHECK-GI-NEXT:    str x8, [x1]
5728; CHECK-GI-NEXT:    ret
5729  %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
5730  %tmp = getelementptr i32, ptr %A, i64 %inc
5731  store ptr %tmp, ptr %ptr
5732  ret { <4 x i32>, <4 x i32> } %ld2
5733}
5734
5735declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) nounwind readonly
5736
5737
5738define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
5739; CHECK-SD-LABEL: test_v2i32_post_imm_ld2lane:
5740; CHECK-SD:       ; %bb.0:
5741; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5742; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5743; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
5744; CHECK-SD-NEXT:    str x0, [x1]
5745; CHECK-SD-NEXT:    ret
5746;
5747; CHECK-GI-LABEL: test_v2i32_post_imm_ld2lane:
5748; CHECK-GI:       ; %bb.0:
5749; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5750; CHECK-GI-NEXT:    add x8, x0, #8
5751; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5752; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
5753; CHECK-GI-NEXT:    str x8, [x1]
5754; CHECK-GI-NEXT:    ret
5755  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
5756  %tmp = getelementptr i32, ptr %A, i32 2
5757  store ptr %tmp, ptr %ptr
5758  ret { <2 x i32>, <2 x i32> } %ld2
5759}
5760
5761define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind {
5762; CHECK-SD-LABEL: test_v2i32_post_reg_ld2lane:
5763; CHECK-SD:       ; %bb.0:
5764; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5765; CHECK-SD-NEXT:    lsl x8, x2, #2
5766; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5767; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
5768; CHECK-SD-NEXT:    str x0, [x1]
5769; CHECK-SD-NEXT:    ret
5770;
5771; CHECK-GI-LABEL: test_v2i32_post_reg_ld2lane:
5772; CHECK-GI:       ; %bb.0:
5773; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5774; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
5775; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5776; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
5777; CHECK-GI-NEXT:    str x8, [x1]
5778; CHECK-GI-NEXT:    ret
5779  %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
5780  %tmp = getelementptr i32, ptr %A, i64 %inc
5781  store ptr %tmp, ptr %ptr
5782  ret { <2 x i32>, <2 x i32> } %ld2
5783}
5784
5785declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32>, <2 x i32>, i64, ptr) nounwind readonly
5786
5787
5788define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
5789; CHECK-SD-LABEL: test_v2i64_post_imm_ld2lane:
5790; CHECK-SD:       ; %bb.0:
5791; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5792; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5793; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
5794; CHECK-SD-NEXT:    str x0, [x1]
5795; CHECK-SD-NEXT:    ret
5796;
5797; CHECK-GI-LABEL: test_v2i64_post_imm_ld2lane:
5798; CHECK-GI:       ; %bb.0:
5799; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5800; CHECK-GI-NEXT:    add x8, x0, #16
5801; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5802; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
5803; CHECK-GI-NEXT:    str x8, [x1]
5804; CHECK-GI-NEXT:    ret
5805  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
5806  %tmp = getelementptr i64, ptr %A, i32 2
5807  store ptr %tmp, ptr %ptr
5808  ret { <2 x i64>, <2 x i64> } %ld2
5809}
5810
5811define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind {
5812; CHECK-SD-LABEL: test_v2i64_post_reg_ld2lane:
5813; CHECK-SD:       ; %bb.0:
5814; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5815; CHECK-SD-NEXT:    lsl x8, x2, #3
5816; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5817; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
5818; CHECK-SD-NEXT:    str x0, [x1]
5819; CHECK-SD-NEXT:    ret
5820;
5821; CHECK-GI-LABEL: test_v2i64_post_reg_ld2lane:
5822; CHECK-GI:       ; %bb.0:
5823; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5824; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
5825; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5826; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
5827; CHECK-GI-NEXT:    str x8, [x1]
5828; CHECK-GI-NEXT:    ret
5829  %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
5830  %tmp = getelementptr i64, ptr %A, i64 %inc
5831  store ptr %tmp, ptr %ptr
5832  ret { <2 x i64>, <2 x i64> } %ld2
5833}
5834
5835declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr) nounwind readonly
5836
5837
5838define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
5839; CHECK-SD-LABEL: test_v1i64_post_imm_ld2lane:
5840; CHECK-SD:       ; %bb.0:
5841; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5842; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5843; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
5844; CHECK-SD-NEXT:    str x0, [x1]
5845; CHECK-SD-NEXT:    ret
5846;
5847; CHECK-GI-LABEL: test_v1i64_post_imm_ld2lane:
5848; CHECK-GI:       ; %bb.0:
5849; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5850; CHECK-GI-NEXT:    add x8, x0, #16
5851; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5852; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
5853; CHECK-GI-NEXT:    str x8, [x1]
5854; CHECK-GI-NEXT:    ret
5855  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
5856  %tmp = getelementptr i64, ptr %A, i32 2
5857  store ptr %tmp, ptr %ptr
5858  ret { <1 x i64>, <1 x i64> } %ld2
5859}
5860
5861define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind {
5862; CHECK-SD-LABEL: test_v1i64_post_reg_ld2lane:
5863; CHECK-SD:       ; %bb.0:
5864; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5865; CHECK-SD-NEXT:    lsl x8, x2, #3
5866; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5867; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
5868; CHECK-SD-NEXT:    str x0, [x1]
5869; CHECK-SD-NEXT:    ret
5870;
5871; CHECK-GI-LABEL: test_v1i64_post_reg_ld2lane:
5872; CHECK-GI:       ; %bb.0:
5873; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5874; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
5875; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5876; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
5877; CHECK-GI-NEXT:    str x8, [x1]
5878; CHECK-GI-NEXT:    ret
5879  %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
5880  %tmp = getelementptr i64, ptr %A, i64 %inc
5881  store ptr %tmp, ptr %ptr
5882  ret { <1 x i64>, <1 x i64> } %ld2
5883}
5884
5885declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64>, <1 x i64>, i64, ptr) nounwind readonly
5886
5887
5888define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
5889; CHECK-SD-LABEL: test_v4f32_post_imm_ld2lane:
5890; CHECK-SD:       ; %bb.0:
5891; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5892; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5893; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
5894; CHECK-SD-NEXT:    str x0, [x1]
5895; CHECK-SD-NEXT:    ret
5896;
5897; CHECK-GI-LABEL: test_v4f32_post_imm_ld2lane:
5898; CHECK-GI:       ; %bb.0:
5899; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5900; CHECK-GI-NEXT:    add x8, x0, #8
5901; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5902; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
5903; CHECK-GI-NEXT:    str x8, [x1]
5904; CHECK-GI-NEXT:    ret
5905  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
5906  %tmp = getelementptr float, ptr %A, i32 2
5907  store ptr %tmp, ptr %ptr
5908  ret { <4 x float>, <4 x float> } %ld2
5909}
5910
5911define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind {
5912; CHECK-SD-LABEL: test_v4f32_post_reg_ld2lane:
5913; CHECK-SD:       ; %bb.0:
5914; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5915; CHECK-SD-NEXT:    lsl x8, x2, #2
5916; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5917; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
5918; CHECK-SD-NEXT:    str x0, [x1]
5919; CHECK-SD-NEXT:    ret
5920;
5921; CHECK-GI-LABEL: test_v4f32_post_reg_ld2lane:
5922; CHECK-GI:       ; %bb.0:
5923; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5924; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
5925; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5926; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
5927; CHECK-GI-NEXT:    str x8, [x1]
5928; CHECK-GI-NEXT:    ret
5929  %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
5930  %tmp = getelementptr float, ptr %A, i64 %inc
5931  store ptr %tmp, ptr %ptr
5932  ret { <4 x float>, <4 x float> } %ld2
5933}
5934
5935declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float>, <4 x float>, i64, ptr) nounwind readonly
5936
5937
5938define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
5939; CHECK-SD-LABEL: test_v2f32_post_imm_ld2lane:
5940; CHECK-SD:       ; %bb.0:
5941; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5942; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5943; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
5944; CHECK-SD-NEXT:    str x0, [x1]
5945; CHECK-SD-NEXT:    ret
5946;
5947; CHECK-GI-LABEL: test_v2f32_post_imm_ld2lane:
5948; CHECK-GI:       ; %bb.0:
5949; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5950; CHECK-GI-NEXT:    add x8, x0, #8
5951; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5952; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
5953; CHECK-GI-NEXT:    str x8, [x1]
5954; CHECK-GI-NEXT:    ret
5955  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
5956  %tmp = getelementptr float, ptr %A, i32 2
5957  store ptr %tmp, ptr %ptr
5958  ret { <2 x float>, <2 x float> } %ld2
5959}
5960
5961define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind {
5962; CHECK-SD-LABEL: test_v2f32_post_reg_ld2lane:
5963; CHECK-SD:       ; %bb.0:
5964; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5965; CHECK-SD-NEXT:    lsl x8, x2, #2
5966; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5967; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
5968; CHECK-SD-NEXT:    str x0, [x1]
5969; CHECK-SD-NEXT:    ret
5970;
5971; CHECK-GI-LABEL: test_v2f32_post_reg_ld2lane:
5972; CHECK-GI:       ; %bb.0:
5973; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
5974; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
5975; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
5976; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
5977; CHECK-GI-NEXT:    str x8, [x1]
5978; CHECK-GI-NEXT:    ret
5979  %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
5980  %tmp = getelementptr float, ptr %A, i64 %inc
5981  store ptr %tmp, ptr %ptr
5982  ret { <2 x float>, <2 x float> } %ld2
5983}
5984
5985declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float>, <2 x float>, i64, ptr) nounwind readonly
5986
5987
5988define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
5989; CHECK-SD-LABEL: test_v2f64_post_imm_ld2lane:
5990; CHECK-SD:       ; %bb.0:
5991; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
5992; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
5993; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
5994; CHECK-SD-NEXT:    str x0, [x1]
5995; CHECK-SD-NEXT:    ret
5996;
5997; CHECK-GI-LABEL: test_v2f64_post_imm_ld2lane:
5998; CHECK-GI:       ; %bb.0:
5999; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
6000; CHECK-GI-NEXT:    add x8, x0, #16
6001; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
6002; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
6003; CHECK-GI-NEXT:    str x8, [x1]
6004; CHECK-GI-NEXT:    ret
6005  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
6006  %tmp = getelementptr double, ptr %A, i32 2
6007  store ptr %tmp, ptr %ptr
6008  ret { <2 x double>, <2 x double> } %ld2
6009}
6010
6011define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind {
6012; CHECK-SD-LABEL: test_v2f64_post_reg_ld2lane:
6013; CHECK-SD:       ; %bb.0:
6014; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
6015; CHECK-SD-NEXT:    lsl x8, x2, #3
6016; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
6017; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
6018; CHECK-SD-NEXT:    str x0, [x1]
6019; CHECK-SD-NEXT:    ret
6020;
6021; CHECK-GI-LABEL: test_v2f64_post_reg_ld2lane:
6022; CHECK-GI:       ; %bb.0:
6023; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
6024; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
6025; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
6026; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
6027; CHECK-GI-NEXT:    str x8, [x1]
6028; CHECK-GI-NEXT:    ret
6029  %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
6030  %tmp = getelementptr double, ptr %A, i64 %inc
6031  store ptr %tmp, ptr %ptr
6032  ret { <2 x double>, <2 x double> } %ld2
6033}
6034
6035declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double>, <2 x double>, i64, ptr) nounwind readonly
6036
6037
6038define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
6039; CHECK-SD-LABEL: test_v1f64_post_imm_ld2lane:
6040; CHECK-SD:       ; %bb.0:
6041; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
6042; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
6043; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
6044; CHECK-SD-NEXT:    str x0, [x1]
6045; CHECK-SD-NEXT:    ret
6046;
6047; CHECK-GI-LABEL: test_v1f64_post_imm_ld2lane:
6048; CHECK-GI:       ; %bb.0:
6049; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
6050; CHECK-GI-NEXT:    add x8, x0, #16
6051; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
6052; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
6053; CHECK-GI-NEXT:    str x8, [x1]
6054; CHECK-GI-NEXT:    ret
6055  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
6056  %tmp = getelementptr double, ptr %A, i32 2
6057  store ptr %tmp, ptr %ptr
6058  ret { <1 x double>, <1 x double> } %ld2
6059}
6060
6061define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind {
6062; CHECK-SD-LABEL: test_v1f64_post_reg_ld2lane:
6063; CHECK-SD:       ; %bb.0:
6064; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
6065; CHECK-SD-NEXT:    lsl x8, x2, #3
6066; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
6067; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
6068; CHECK-SD-NEXT:    str x0, [x1]
6069; CHECK-SD-NEXT:    ret
6070;
6071; CHECK-GI-LABEL: test_v1f64_post_reg_ld2lane:
6072; CHECK-GI:       ; %bb.0:
6073; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
6074; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
6075; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
6076; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
6077; CHECK-GI-NEXT:    str x8, [x1]
6078; CHECK-GI-NEXT:    ret
6079  %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
6080  %tmp = getelementptr double, ptr %A, i64 %inc
6081  store ptr %tmp, ptr %ptr
6082  ret { <1 x double>, <1 x double> } %ld2
6083}
6084
6085declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double>, <1 x double>, i64, ptr) nounwind readonly
6086
6087
6088define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
6089; CHECK-SD-LABEL: test_v16i8_post_imm_ld3lane:
6090; CHECK-SD:       ; %bb.0:
6091; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6092; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6093; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6094; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], #3
6095; CHECK-SD-NEXT:    str x0, [x1]
6096; CHECK-SD-NEXT:    ret
6097;
6098; CHECK-GI-LABEL: test_v16i8_post_imm_ld3lane:
6099; CHECK-GI:       ; %bb.0:
6100; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6101; CHECK-GI-NEXT:    add x8, x0, #3
6102; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6103; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6104; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
6105; CHECK-GI-NEXT:    str x8, [x1]
6106; CHECK-GI-NEXT:    ret
6107  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
6108  %tmp = getelementptr i8, ptr %A, i32 3
6109  store ptr %tmp, ptr %ptr
6110  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
6111}
6112
6113define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
6114; CHECK-SD-LABEL: test_v16i8_post_reg_ld3lane:
6115; CHECK-SD:       ; %bb.0:
6116; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6117; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6118; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6119; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], x2
6120; CHECK-SD-NEXT:    str x0, [x1]
6121; CHECK-SD-NEXT:    ret
6122;
6123; CHECK-GI-LABEL: test_v16i8_post_reg_ld3lane:
6124; CHECK-GI:       ; %bb.0:
6125; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6126; CHECK-GI-NEXT:    add x8, x0, x2
6127; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6128; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6129; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
6130; CHECK-GI-NEXT:    str x8, [x1]
6131; CHECK-GI-NEXT:    ret
6132  %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
6133  %tmp = getelementptr i8, ptr %A, i64 %inc
6134  store ptr %tmp, ptr %ptr
6135  ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
6136}
6137
6138declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readonly
6139
6140
6141define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
6142; CHECK-SD-LABEL: test_v8i8_post_imm_ld3lane:
6143; CHECK-SD:       ; %bb.0:
6144; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6145; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6146; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6147; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], #3
6148; CHECK-SD-NEXT:    str x0, [x1]
6149; CHECK-SD-NEXT:    ret
6150;
6151; CHECK-GI-LABEL: test_v8i8_post_imm_ld3lane:
6152; CHECK-GI:       ; %bb.0:
6153; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6154; CHECK-GI-NEXT:    add x8, x0, #3
6155; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6156; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6157; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
6158; CHECK-GI-NEXT:    str x8, [x1]
6159; CHECK-GI-NEXT:    ret
6160  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
6161  %tmp = getelementptr i8, ptr %A, i32 3
6162  store ptr %tmp, ptr %ptr
6163  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
6164}
6165
6166define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
6167; CHECK-SD-LABEL: test_v8i8_post_reg_ld3lane:
6168; CHECK-SD:       ; %bb.0:
6169; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6170; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6171; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6172; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], x2
6173; CHECK-SD-NEXT:    str x0, [x1]
6174; CHECK-SD-NEXT:    ret
6175;
6176; CHECK-GI-LABEL: test_v8i8_post_reg_ld3lane:
6177; CHECK-GI:       ; %bb.0:
6178; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6179; CHECK-GI-NEXT:    add x8, x0, x2
6180; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6181; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6182; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
6183; CHECK-GI-NEXT:    str x8, [x1]
6184; CHECK-GI-NEXT:    ret
6185  %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
6186  %tmp = getelementptr i8, ptr %A, i64 %inc
6187  store ptr %tmp, ptr %ptr
6188  ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
6189}
6190
6191declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, i64, ptr) nounwind readonly
6192
6193
6194define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
6195; CHECK-SD-LABEL: test_v8i16_post_imm_ld3lane:
6196; CHECK-SD:       ; %bb.0:
6197; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6198; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6199; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6200; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], #6
6201; CHECK-SD-NEXT:    str x0, [x1]
6202; CHECK-SD-NEXT:    ret
6203;
6204; CHECK-GI-LABEL: test_v8i16_post_imm_ld3lane:
6205; CHECK-GI:       ; %bb.0:
6206; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6207; CHECK-GI-NEXT:    add x8, x0, #6
6208; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6209; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6210; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
6211; CHECK-GI-NEXT:    str x8, [x1]
6212; CHECK-GI-NEXT:    ret
6213  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
6214  %tmp = getelementptr i16, ptr %A, i32 3
6215  store ptr %tmp, ptr %ptr
6216  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
6217}
6218
6219define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
6220; CHECK-SD-LABEL: test_v8i16_post_reg_ld3lane:
6221; CHECK-SD:       ; %bb.0:
6222; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6223; CHECK-SD-NEXT:    lsl x8, x2, #1
6224; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6225; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6226; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], x8
6227; CHECK-SD-NEXT:    str x0, [x1]
6228; CHECK-SD-NEXT:    ret
6229;
6230; CHECK-GI-LABEL: test_v8i16_post_reg_ld3lane:
6231; CHECK-GI:       ; %bb.0:
6232; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6233; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
6234; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6235; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6236; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
6237; CHECK-GI-NEXT:    str x8, [x1]
6238; CHECK-GI-NEXT:    ret
6239  %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
6240  %tmp = getelementptr i16, ptr %A, i64 %inc
6241  store ptr %tmp, ptr %ptr
6242  ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
6243}
6244
6245declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readonly
6246
6247
6248define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
6249; CHECK-SD-LABEL: test_v4i16_post_imm_ld3lane:
6250; CHECK-SD:       ; %bb.0:
6251; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6252; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6253; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6254; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], #6
6255; CHECK-SD-NEXT:    str x0, [x1]
6256; CHECK-SD-NEXT:    ret
6257;
6258; CHECK-GI-LABEL: test_v4i16_post_imm_ld3lane:
6259; CHECK-GI:       ; %bb.0:
6260; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6261; CHECK-GI-NEXT:    add x8, x0, #6
6262; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6263; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6264; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
6265; CHECK-GI-NEXT:    str x8, [x1]
6266; CHECK-GI-NEXT:    ret
6267  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
6268  %tmp = getelementptr i16, ptr %A, i32 3
6269  store ptr %tmp, ptr %ptr
6270  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
6271}
6272
6273define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
6274; CHECK-SD-LABEL: test_v4i16_post_reg_ld3lane:
6275; CHECK-SD:       ; %bb.0:
6276; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6277; CHECK-SD-NEXT:    lsl x8, x2, #1
6278; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6279; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6280; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], x8
6281; CHECK-SD-NEXT:    str x0, [x1]
6282; CHECK-SD-NEXT:    ret
6283;
6284; CHECK-GI-LABEL: test_v4i16_post_reg_ld3lane:
6285; CHECK-GI:       ; %bb.0:
6286; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6287; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
6288; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6289; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6290; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
6291; CHECK-GI-NEXT:    str x8, [x1]
6292; CHECK-GI-NEXT:    ret
6293  %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
6294  %tmp = getelementptr i16, ptr %A, i64 %inc
6295  store ptr %tmp, ptr %ptr
6296  ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
6297}
6298
6299declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, i64, ptr) nounwind readonly
6300
6301
6302define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
6303; CHECK-SD-LABEL: test_v4i32_post_imm_ld3lane:
6304; CHECK-SD:       ; %bb.0:
6305; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6306; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6307; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6308; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
6309; CHECK-SD-NEXT:    str x0, [x1]
6310; CHECK-SD-NEXT:    ret
6311;
6312; CHECK-GI-LABEL: test_v4i32_post_imm_ld3lane:
6313; CHECK-GI:       ; %bb.0:
6314; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6315; CHECK-GI-NEXT:    add x8, x0, #12
6316; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6317; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6318; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
6319; CHECK-GI-NEXT:    str x8, [x1]
6320; CHECK-GI-NEXT:    ret
6321  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
6322  %tmp = getelementptr i32, ptr %A, i32 3
6323  store ptr %tmp, ptr %ptr
6324  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
6325}
6326
6327define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
6328; CHECK-SD-LABEL: test_v4i32_post_reg_ld3lane:
6329; CHECK-SD:       ; %bb.0:
6330; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6331; CHECK-SD-NEXT:    lsl x8, x2, #2
6332; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6333; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6334; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
6335; CHECK-SD-NEXT:    str x0, [x1]
6336; CHECK-SD-NEXT:    ret
6337;
6338; CHECK-GI-LABEL: test_v4i32_post_reg_ld3lane:
6339; CHECK-GI:       ; %bb.0:
6340; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6341; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
6342; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6343; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6344; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
6345; CHECK-GI-NEXT:    str x8, [x1]
6346; CHECK-GI-NEXT:    ret
6347  %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
6348  %tmp = getelementptr i32, ptr %A, i64 %inc
6349  store ptr %tmp, ptr %ptr
6350  ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
6351}
6352
6353declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readonly
6354
6355
6356define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
6357; CHECK-SD-LABEL: test_v2i32_post_imm_ld3lane:
6358; CHECK-SD:       ; %bb.0:
6359; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6360; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6361; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6362; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
6363; CHECK-SD-NEXT:    str x0, [x1]
6364; CHECK-SD-NEXT:    ret
6365;
6366; CHECK-GI-LABEL: test_v2i32_post_imm_ld3lane:
6367; CHECK-GI:       ; %bb.0:
6368; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6369; CHECK-GI-NEXT:    add x8, x0, #12
6370; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6371; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6372; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
6373; CHECK-GI-NEXT:    str x8, [x1]
6374; CHECK-GI-NEXT:    ret
6375  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
6376  %tmp = getelementptr i32, ptr %A, i32 3
6377  store ptr %tmp, ptr %ptr
6378  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
6379}
6380
6381define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
6382; CHECK-SD-LABEL: test_v2i32_post_reg_ld3lane:
6383; CHECK-SD:       ; %bb.0:
6384; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6385; CHECK-SD-NEXT:    lsl x8, x2, #2
6386; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6387; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6388; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
6389; CHECK-SD-NEXT:    str x0, [x1]
6390; CHECK-SD-NEXT:    ret
6391;
6392; CHECK-GI-LABEL: test_v2i32_post_reg_ld3lane:
6393; CHECK-GI:       ; %bb.0:
6394; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6395; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
6396; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6397; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6398; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
6399; CHECK-GI-NEXT:    str x8, [x1]
6400; CHECK-GI-NEXT:    ret
6401  %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
6402  %tmp = getelementptr i32, ptr %A, i64 %inc
6403  store ptr %tmp, ptr %ptr
6404  ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
6405}
6406
6407declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, i64, ptr) nounwind readonly
6408
6409
6410define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
6411; CHECK-SD-LABEL: test_v2i64_post_imm_ld3lane:
6412; CHECK-SD:       ; %bb.0:
6413; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6414; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6415; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6416; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
6417; CHECK-SD-NEXT:    str x0, [x1]
6418; CHECK-SD-NEXT:    ret
6419;
6420; CHECK-GI-LABEL: test_v2i64_post_imm_ld3lane:
6421; CHECK-GI:       ; %bb.0:
6422; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6423; CHECK-GI-NEXT:    add x8, x0, #24
6424; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6425; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6426; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
6427; CHECK-GI-NEXT:    str x8, [x1]
6428; CHECK-GI-NEXT:    ret
6429  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
6430  %tmp = getelementptr i64, ptr %A, i32 3
6431  store ptr %tmp, ptr %ptr
6432  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
6433}
6434
6435define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
6436; CHECK-SD-LABEL: test_v2i64_post_reg_ld3lane:
6437; CHECK-SD:       ; %bb.0:
6438; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6439; CHECK-SD-NEXT:    lsl x8, x2, #3
6440; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6441; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6442; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
6443; CHECK-SD-NEXT:    str x0, [x1]
6444; CHECK-SD-NEXT:    ret
6445;
6446; CHECK-GI-LABEL: test_v2i64_post_reg_ld3lane:
6447; CHECK-GI:       ; %bb.0:
6448; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6449; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
6450; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6451; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6452; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
6453; CHECK-GI-NEXT:    str x8, [x1]
6454; CHECK-GI-NEXT:    ret
6455  %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
6456  %tmp = getelementptr i64, ptr %A, i64 %inc
6457  store ptr %tmp, ptr %ptr
6458  ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
6459}
6460
6461declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readonly
6462
6463
6464define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
6465; CHECK-SD-LABEL: test_v1i64_post_imm_ld3lane:
6466; CHECK-SD:       ; %bb.0:
6467; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6468; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6469; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6470; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
6471; CHECK-SD-NEXT:    str x0, [x1]
6472; CHECK-SD-NEXT:    ret
6473;
6474; CHECK-GI-LABEL: test_v1i64_post_imm_ld3lane:
6475; CHECK-GI:       ; %bb.0:
6476; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6477; CHECK-GI-NEXT:    add x8, x0, #24
6478; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6479; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6480; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
6481; CHECK-GI-NEXT:    str x8, [x1]
6482; CHECK-GI-NEXT:    ret
6483  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
6484  %tmp = getelementptr i64, ptr %A, i32 3
6485  store ptr %tmp, ptr %ptr
6486  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
6487}
6488
6489define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
6490; CHECK-SD-LABEL: test_v1i64_post_reg_ld3lane:
6491; CHECK-SD:       ; %bb.0:
6492; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6493; CHECK-SD-NEXT:    lsl x8, x2, #3
6494; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6495; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6496; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
6497; CHECK-SD-NEXT:    str x0, [x1]
6498; CHECK-SD-NEXT:    ret
6499;
6500; CHECK-GI-LABEL: test_v1i64_post_reg_ld3lane:
6501; CHECK-GI:       ; %bb.0:
6502; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6503; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
6504; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6505; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6506; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
6507; CHECK-GI-NEXT:    str x8, [x1]
6508; CHECK-GI-NEXT:    ret
6509  %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
6510  %tmp = getelementptr i64, ptr %A, i64 %inc
6511  store ptr %tmp, ptr %ptr
6512  ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
6513}
6514
6515declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, i64, ptr) nounwind readonly
6516
6517
6518define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
6519; CHECK-SD-LABEL: test_v4f32_post_imm_ld3lane:
6520; CHECK-SD:       ; %bb.0:
6521; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6522; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6523; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6524; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
6525; CHECK-SD-NEXT:    str x0, [x1]
6526; CHECK-SD-NEXT:    ret
6527;
6528; CHECK-GI-LABEL: test_v4f32_post_imm_ld3lane:
6529; CHECK-GI:       ; %bb.0:
6530; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6531; CHECK-GI-NEXT:    add x8, x0, #12
6532; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6533; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6534; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
6535; CHECK-GI-NEXT:    str x8, [x1]
6536; CHECK-GI-NEXT:    ret
6537  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
6538  %tmp = getelementptr float, ptr %A, i32 3
6539  store ptr %tmp, ptr %ptr
6540  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
6541}
6542
6543define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
6544; CHECK-SD-LABEL: test_v4f32_post_reg_ld3lane:
6545; CHECK-SD:       ; %bb.0:
6546; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6547; CHECK-SD-NEXT:    lsl x8, x2, #2
6548; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6549; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6550; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
6551; CHECK-SD-NEXT:    str x0, [x1]
6552; CHECK-SD-NEXT:    ret
6553;
6554; CHECK-GI-LABEL: test_v4f32_post_reg_ld3lane:
6555; CHECK-GI:       ; %bb.0:
6556; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6557; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
6558; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6559; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6560; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
6561; CHECK-GI-NEXT:    str x8, [x1]
6562; CHECK-GI-NEXT:    ret
6563  %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
6564  %tmp = getelementptr float, ptr %A, i64 %inc
6565  store ptr %tmp, ptr %ptr
6566  ret { <4 x float>, <4 x float>, <4 x float> } %ld3
6567}
6568
6569declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, i64, ptr) nounwind readonly
6570
6571
6572define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
6573; CHECK-SD-LABEL: test_v2f32_post_imm_ld3lane:
6574; CHECK-SD:       ; %bb.0:
6575; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6576; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6577; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6578; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
6579; CHECK-SD-NEXT:    str x0, [x1]
6580; CHECK-SD-NEXT:    ret
6581;
6582; CHECK-GI-LABEL: test_v2f32_post_imm_ld3lane:
6583; CHECK-GI:       ; %bb.0:
6584; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6585; CHECK-GI-NEXT:    add x8, x0, #12
6586; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6587; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6588; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
6589; CHECK-GI-NEXT:    str x8, [x1]
6590; CHECK-GI-NEXT:    ret
6591  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
6592  %tmp = getelementptr float, ptr %A, i32 3
6593  store ptr %tmp, ptr %ptr
6594  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
6595}
6596
6597define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
6598; CHECK-SD-LABEL: test_v2f32_post_reg_ld3lane:
6599; CHECK-SD:       ; %bb.0:
6600; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6601; CHECK-SD-NEXT:    lsl x8, x2, #2
6602; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6603; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6604; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
6605; CHECK-SD-NEXT:    str x0, [x1]
6606; CHECK-SD-NEXT:    ret
6607;
6608; CHECK-GI-LABEL: test_v2f32_post_reg_ld3lane:
6609; CHECK-GI:       ; %bb.0:
6610; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6611; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
6612; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6613; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6614; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
6615; CHECK-GI-NEXT:    str x8, [x1]
6616; CHECK-GI-NEXT:    ret
6617  %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
6618  %tmp = getelementptr float, ptr %A, i64 %inc
6619  store ptr %tmp, ptr %ptr
6620  ret { <2 x float>, <2 x float>, <2 x float> } %ld3
6621}
6622
6623declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, i64, ptr) nounwind readonly
6624
6625
6626define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
6627; CHECK-SD-LABEL: test_v2f64_post_imm_ld3lane:
6628; CHECK-SD:       ; %bb.0:
6629; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6630; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6631; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6632; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
6633; CHECK-SD-NEXT:    str x0, [x1]
6634; CHECK-SD-NEXT:    ret
6635;
6636; CHECK-GI-LABEL: test_v2f64_post_imm_ld3lane:
6637; CHECK-GI:       ; %bb.0:
6638; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6639; CHECK-GI-NEXT:    add x8, x0, #24
6640; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6641; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6642; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
6643; CHECK-GI-NEXT:    str x8, [x1]
6644; CHECK-GI-NEXT:    ret
6645  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
6646  %tmp = getelementptr double, ptr %A, i32 3
6647  store ptr %tmp, ptr %ptr
6648  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
6649}
6650
6651define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
6652; CHECK-SD-LABEL: test_v2f64_post_reg_ld3lane:
6653; CHECK-SD:       ; %bb.0:
6654; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6655; CHECK-SD-NEXT:    lsl x8, x2, #3
6656; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6657; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6658; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
6659; CHECK-SD-NEXT:    str x0, [x1]
6660; CHECK-SD-NEXT:    ret
6661;
6662; CHECK-GI-LABEL: test_v2f64_post_reg_ld3lane:
6663; CHECK-GI:       ; %bb.0:
6664; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
6665; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
6666; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
6667; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
6668; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
6669; CHECK-GI-NEXT:    str x8, [x1]
6670; CHECK-GI-NEXT:    ret
6671  %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
6672  %tmp = getelementptr double, ptr %A, i64 %inc
6673  store ptr %tmp, ptr %ptr
6674  ret { <2 x double>, <2 x double>, <2 x double> } %ld3
6675}
6676
6677declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, i64, ptr) nounwind readonly
6678
6679
6680define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
6681; CHECK-SD-LABEL: test_v1f64_post_imm_ld3lane:
6682; CHECK-SD:       ; %bb.0:
6683; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6684; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6685; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6686; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
6687; CHECK-SD-NEXT:    str x0, [x1]
6688; CHECK-SD-NEXT:    ret
6689;
6690; CHECK-GI-LABEL: test_v1f64_post_imm_ld3lane:
6691; CHECK-GI:       ; %bb.0:
6692; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6693; CHECK-GI-NEXT:    add x8, x0, #24
6694; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6695; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6696; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
6697; CHECK-GI-NEXT:    str x8, [x1]
6698; CHECK-GI-NEXT:    ret
6699  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
6700  %tmp = getelementptr double, ptr %A, i32 3
6701  store ptr %tmp, ptr %ptr
6702  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
6703}
6704
6705define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
6706; CHECK-SD-LABEL: test_v1f64_post_reg_ld3lane:
6707; CHECK-SD:       ; %bb.0:
6708; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6709; CHECK-SD-NEXT:    lsl x8, x2, #3
6710; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6711; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6712; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
6713; CHECK-SD-NEXT:    str x0, [x1]
6714; CHECK-SD-NEXT:    ret
6715;
6716; CHECK-GI-LABEL: test_v1f64_post_reg_ld3lane:
6717; CHECK-GI:       ; %bb.0:
6718; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
6719; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
6720; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
6721; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
6722; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
6723; CHECK-GI-NEXT:    str x8, [x1]
6724; CHECK-GI-NEXT:    ret
6725  %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
6726  %tmp = getelementptr double, ptr %A, i64 %inc
6727  store ptr %tmp, ptr %ptr
6728  ret { <1 x double>, <1 x double>, <1 x double> } %ld3
6729}
6730
6731declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, i64, ptr) nounwind readonly
6732
6733
6734define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
6735; CHECK-SD-LABEL: test_v16i8_post_imm_ld4lane:
6736; CHECK-SD:       ; %bb.0:
6737; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6738; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6739; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6740; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6741; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], #4
6742; CHECK-SD-NEXT:    str x0, [x1]
6743; CHECK-SD-NEXT:    ret
6744;
6745; CHECK-GI-LABEL: test_v16i8_post_imm_ld4lane:
6746; CHECK-GI:       ; %bb.0:
6747; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6748; CHECK-GI-NEXT:    add x8, x0, #4
6749; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6750; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6751; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6752; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
6753; CHECK-GI-NEXT:    str x8, [x1]
6754; CHECK-GI-NEXT:    ret
6755  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
6756  %tmp = getelementptr i8, ptr %A, i32 4
6757  store ptr %tmp, ptr %ptr
6758  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
6759}
6760
6761define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
6762; CHECK-SD-LABEL: test_v16i8_post_reg_ld4lane:
6763; CHECK-SD:       ; %bb.0:
6764; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6765; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6766; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6767; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6768; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], x2
6769; CHECK-SD-NEXT:    str x0, [x1]
6770; CHECK-SD-NEXT:    ret
6771;
6772; CHECK-GI-LABEL: test_v16i8_post_reg_ld4lane:
6773; CHECK-GI:       ; %bb.0:
6774; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6775; CHECK-GI-NEXT:    add x8, x0, x2
6776; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6777; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6778; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6779; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
6780; CHECK-GI-NEXT:    str x8, [x1]
6781; CHECK-GI-NEXT:    ret
6782  %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
6783  %tmp = getelementptr i8, ptr %A, i64 %inc
6784  store ptr %tmp, ptr %ptr
6785  ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
6786}
6787
6788declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readonly
6789
6790
6791define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
6792; CHECK-SD-LABEL: test_v8i8_post_imm_ld4lane:
6793; CHECK-SD:       ; %bb.0:
6794; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6795; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6796; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6797; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6798; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], #4
6799; CHECK-SD-NEXT:    str x0, [x1]
6800; CHECK-SD-NEXT:    ret
6801;
6802; CHECK-GI-LABEL: test_v8i8_post_imm_ld4lane:
6803; CHECK-GI:       ; %bb.0:
6804; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6805; CHECK-GI-NEXT:    add x8, x0, #4
6806; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6807; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6808; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6809; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
6810; CHECK-GI-NEXT:    str x8, [x1]
6811; CHECK-GI-NEXT:    ret
6812  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
6813  %tmp = getelementptr i8, ptr %A, i32 4
6814  store ptr %tmp, ptr %ptr
6815  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
6816}
6817
6818define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
6819; CHECK-SD-LABEL: test_v8i8_post_reg_ld4lane:
6820; CHECK-SD:       ; %bb.0:
6821; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6822; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6823; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6824; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6825; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], x2
6826; CHECK-SD-NEXT:    str x0, [x1]
6827; CHECK-SD-NEXT:    ret
6828;
6829; CHECK-GI-LABEL: test_v8i8_post_reg_ld4lane:
6830; CHECK-GI:       ; %bb.0:
6831; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6832; CHECK-GI-NEXT:    add x8, x0, x2
6833; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6834; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6835; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6836; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
6837; CHECK-GI-NEXT:    str x8, [x1]
6838; CHECK-GI-NEXT:    ret
6839  %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
6840  %tmp = getelementptr i8, ptr %A, i64 %inc
6841  store ptr %tmp, ptr %ptr
6842  ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
6843}
6844
6845declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, ptr) nounwind readonly
6846
6847
6848define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
6849; CHECK-SD-LABEL: test_v8i16_post_imm_ld4lane:
6850; CHECK-SD:       ; %bb.0:
6851; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6852; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6853; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6854; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6855; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], #8
6856; CHECK-SD-NEXT:    str x0, [x1]
6857; CHECK-SD-NEXT:    ret
6858;
6859; CHECK-GI-LABEL: test_v8i16_post_imm_ld4lane:
6860; CHECK-GI:       ; %bb.0:
6861; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6862; CHECK-GI-NEXT:    add x8, x0, #8
6863; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6864; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6865; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6866; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
6867; CHECK-GI-NEXT:    str x8, [x1]
6868; CHECK-GI-NEXT:    ret
6869  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
6870  %tmp = getelementptr i16, ptr %A, i32 4
6871  store ptr %tmp, ptr %ptr
6872  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
6873}
6874
6875define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
6876; CHECK-SD-LABEL: test_v8i16_post_reg_ld4lane:
6877; CHECK-SD:       ; %bb.0:
6878; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6879; CHECK-SD-NEXT:    lsl x8, x2, #1
6880; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6881; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6882; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6883; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], x8
6884; CHECK-SD-NEXT:    str x0, [x1]
6885; CHECK-SD-NEXT:    ret
6886;
6887; CHECK-GI-LABEL: test_v8i16_post_reg_ld4lane:
6888; CHECK-GI:       ; %bb.0:
6889; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6890; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
6891; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6892; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6893; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6894; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
6895; CHECK-GI-NEXT:    str x8, [x1]
6896; CHECK-GI-NEXT:    ret
6897  %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
6898  %tmp = getelementptr i16, ptr %A, i64 %inc
6899  store ptr %tmp, ptr %ptr
6900  ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
6901}
6902
6903declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readonly
6904
6905
6906define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
6907; CHECK-SD-LABEL: test_v4i16_post_imm_ld4lane:
6908; CHECK-SD:       ; %bb.0:
6909; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6910; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6911; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6912; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6913; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], #8
6914; CHECK-SD-NEXT:    str x0, [x1]
6915; CHECK-SD-NEXT:    ret
6916;
6917; CHECK-GI-LABEL: test_v4i16_post_imm_ld4lane:
6918; CHECK-GI:       ; %bb.0:
6919; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6920; CHECK-GI-NEXT:    add x8, x0, #8
6921; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6922; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6923; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6924; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
6925; CHECK-GI-NEXT:    str x8, [x1]
6926; CHECK-GI-NEXT:    ret
6927  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
6928  %tmp = getelementptr i16, ptr %A, i32 4
6929  store ptr %tmp, ptr %ptr
6930  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
6931}
6932
6933define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
6934; CHECK-SD-LABEL: test_v4i16_post_reg_ld4lane:
6935; CHECK-SD:       ; %bb.0:
6936; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6937; CHECK-SD-NEXT:    lsl x8, x2, #1
6938; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6939; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6940; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6941; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], x8
6942; CHECK-SD-NEXT:    str x0, [x1]
6943; CHECK-SD-NEXT:    ret
6944;
6945; CHECK-GI-LABEL: test_v4i16_post_reg_ld4lane:
6946; CHECK-GI:       ; %bb.0:
6947; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6948; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
6949; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6950; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6951; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6952; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
6953; CHECK-GI-NEXT:    str x8, [x1]
6954; CHECK-GI-NEXT:    ret
6955  %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
6956  %tmp = getelementptr i16, ptr %A, i64 %inc
6957  store ptr %tmp, ptr %ptr
6958  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
6959}
6960
6961declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, ptr) nounwind readonly
6962
6963
6964define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
6965; CHECK-SD-LABEL: test_v4i32_post_imm_ld4lane:
6966; CHECK-SD:       ; %bb.0:
6967; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6968; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6969; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6970; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6971; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
6972; CHECK-SD-NEXT:    str x0, [x1]
6973; CHECK-SD-NEXT:    ret
6974;
6975; CHECK-GI-LABEL: test_v4i32_post_imm_ld4lane:
6976; CHECK-GI:       ; %bb.0:
6977; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6978; CHECK-GI-NEXT:    add x8, x0, #16
6979; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6980; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6981; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6982; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
6983; CHECK-GI-NEXT:    str x8, [x1]
6984; CHECK-GI-NEXT:    ret
6985  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
6986  %tmp = getelementptr i32, ptr %A, i32 4
6987  store ptr %tmp, ptr %ptr
6988  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
6989}
6990
6991define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
6992; CHECK-SD-LABEL: test_v4i32_post_reg_ld4lane:
6993; CHECK-SD:       ; %bb.0:
6994; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6995; CHECK-SD-NEXT:    lsl x8, x2, #2
6996; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6997; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6998; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
6999; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
7000; CHECK-SD-NEXT:    str x0, [x1]
7001; CHECK-SD-NEXT:    ret
7002;
7003; CHECK-GI-LABEL: test_v4i32_post_reg_ld4lane:
7004; CHECK-GI:       ; %bb.0:
7005; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7006; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
7007; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7008; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7009; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7010; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
7011; CHECK-GI-NEXT:    str x8, [x1]
7012; CHECK-GI-NEXT:    ret
7013  %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
7014  %tmp = getelementptr i32, ptr %A, i64 %inc
7015  store ptr %tmp, ptr %ptr
7016  ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
7017}
7018
7019declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readonly
7020
7021
7022define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
7023; CHECK-SD-LABEL: test_v2i32_post_imm_ld4lane:
7024; CHECK-SD:       ; %bb.0:
7025; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7026; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7027; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7028; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7029; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
7030; CHECK-SD-NEXT:    str x0, [x1]
7031; CHECK-SD-NEXT:    ret
7032;
7033; CHECK-GI-LABEL: test_v2i32_post_imm_ld4lane:
7034; CHECK-GI:       ; %bb.0:
7035; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7036; CHECK-GI-NEXT:    add x8, x0, #16
7037; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7038; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7039; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7040; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
7041; CHECK-GI-NEXT:    str x8, [x1]
7042; CHECK-GI-NEXT:    ret
7043  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
7044  %tmp = getelementptr i32, ptr %A, i32 4
7045  store ptr %tmp, ptr %ptr
7046  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
7047}
7048
7049define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
7050; CHECK-SD-LABEL: test_v2i32_post_reg_ld4lane:
7051; CHECK-SD:       ; %bb.0:
7052; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7053; CHECK-SD-NEXT:    lsl x8, x2, #2
7054; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7055; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7056; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7057; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
7058; CHECK-SD-NEXT:    str x0, [x1]
7059; CHECK-SD-NEXT:    ret
7060;
7061; CHECK-GI-LABEL: test_v2i32_post_reg_ld4lane:
7062; CHECK-GI:       ; %bb.0:
7063; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7064; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
7065; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7066; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7067; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7068; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
7069; CHECK-GI-NEXT:    str x8, [x1]
7070; CHECK-GI-NEXT:    ret
7071  %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
7072  %tmp = getelementptr i32, ptr %A, i64 %inc
7073  store ptr %tmp, ptr %ptr
7074  ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
7075}
7076
7077declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, ptr) nounwind readonly
7078
7079
7080define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
7081; CHECK-SD-LABEL: test_v2i64_post_imm_ld4lane:
7082; CHECK-SD:       ; %bb.0:
7083; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7084; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7085; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7086; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7087; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
7088; CHECK-SD-NEXT:    str x0, [x1]
7089; CHECK-SD-NEXT:    ret
7090;
7091; CHECK-GI-LABEL: test_v2i64_post_imm_ld4lane:
7092; CHECK-GI:       ; %bb.0:
7093; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7094; CHECK-GI-NEXT:    add x8, x0, #32
7095; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7096; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7097; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7098; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
7099; CHECK-GI-NEXT:    str x8, [x1]
7100; CHECK-GI-NEXT:    ret
7101  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
7102  %tmp = getelementptr i64, ptr %A, i32 4
7103  store ptr %tmp, ptr %ptr
7104  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
7105}
7106
7107define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
7108; CHECK-SD-LABEL: test_v2i64_post_reg_ld4lane:
7109; CHECK-SD:       ; %bb.0:
7110; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7111; CHECK-SD-NEXT:    lsl x8, x2, #3
7112; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7113; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7114; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7115; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
7116; CHECK-SD-NEXT:    str x0, [x1]
7117; CHECK-SD-NEXT:    ret
7118;
7119; CHECK-GI-LABEL: test_v2i64_post_reg_ld4lane:
7120; CHECK-GI:       ; %bb.0:
7121; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7122; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
7123; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7124; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7125; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7126; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
7127; CHECK-GI-NEXT:    str x8, [x1]
7128; CHECK-GI-NEXT:    ret
7129  %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
7130  %tmp = getelementptr i64, ptr %A, i64 %inc
7131  store ptr %tmp, ptr %ptr
7132  ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
7133}
7134
7135declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readonly
7136
7137
7138define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
7139; CHECK-SD-LABEL: test_v1i64_post_imm_ld4lane:
7140; CHECK-SD:       ; %bb.0:
7141; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7142; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7143; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7144; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7145; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
7146; CHECK-SD-NEXT:    str x0, [x1]
7147; CHECK-SD-NEXT:    ret
7148;
7149; CHECK-GI-LABEL: test_v1i64_post_imm_ld4lane:
7150; CHECK-GI:       ; %bb.0:
7151; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7152; CHECK-GI-NEXT:    add x8, x0, #32
7153; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7154; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7155; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7156; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
7157; CHECK-GI-NEXT:    str x8, [x1]
7158; CHECK-GI-NEXT:    ret
7159  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
7160  %tmp = getelementptr i64, ptr %A, i32 4
7161  store ptr %tmp, ptr %ptr
7162  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
7163}
7164
7165define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
7166; CHECK-SD-LABEL: test_v1i64_post_reg_ld4lane:
7167; CHECK-SD:       ; %bb.0:
7168; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7169; CHECK-SD-NEXT:    lsl x8, x2, #3
7170; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7171; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7172; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7173; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
7174; CHECK-SD-NEXT:    str x0, [x1]
7175; CHECK-SD-NEXT:    ret
7176;
7177; CHECK-GI-LABEL: test_v1i64_post_reg_ld4lane:
7178; CHECK-GI:       ; %bb.0:
7179; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7180; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
7181; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7182; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7183; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7184; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
7185; CHECK-GI-NEXT:    str x8, [x1]
7186; CHECK-GI-NEXT:    ret
7187  %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
7188  %tmp = getelementptr i64, ptr %A, i64 %inc
7189  store ptr %tmp, ptr %ptr
7190  ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
7191}
7192
7193declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, ptr) nounwind readonly
7194
7195
7196define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
7197; CHECK-SD-LABEL: test_v4f32_post_imm_ld4lane:
7198; CHECK-SD:       ; %bb.0:
7199; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7200; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7201; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7202; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7203; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
7204; CHECK-SD-NEXT:    str x0, [x1]
7205; CHECK-SD-NEXT:    ret
7206;
7207; CHECK-GI-LABEL: test_v4f32_post_imm_ld4lane:
7208; CHECK-GI:       ; %bb.0:
7209; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7210; CHECK-GI-NEXT:    add x8, x0, #16
7211; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7212; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7213; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7214; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
7215; CHECK-GI-NEXT:    str x8, [x1]
7216; CHECK-GI-NEXT:    ret
7217  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
7218  %tmp = getelementptr float, ptr %A, i32 4
7219  store ptr %tmp, ptr %ptr
7220  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
7221}
7222
7223define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
7224; CHECK-SD-LABEL: test_v4f32_post_reg_ld4lane:
7225; CHECK-SD:       ; %bb.0:
7226; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7227; CHECK-SD-NEXT:    lsl x8, x2, #2
7228; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7229; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7230; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7231; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
7232; CHECK-SD-NEXT:    str x0, [x1]
7233; CHECK-SD-NEXT:    ret
7234;
7235; CHECK-GI-LABEL: test_v4f32_post_reg_ld4lane:
7236; CHECK-GI:       ; %bb.0:
7237; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7238; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
7239; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7240; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7241; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7242; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
7243; CHECK-GI-NEXT:    str x8, [x1]
7244; CHECK-GI-NEXT:    ret
7245  %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
7246  %tmp = getelementptr float, ptr %A, i64 %inc
7247  store ptr %tmp, ptr %ptr
7248  ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
7249}
7250
7251declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, ptr) nounwind readonly
7252
7253
7254define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
7255; CHECK-SD-LABEL: test_v2f32_post_imm_ld4lane:
7256; CHECK-SD:       ; %bb.0:
7257; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7258; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7259; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7260; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7261; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
7262; CHECK-SD-NEXT:    str x0, [x1]
7263; CHECK-SD-NEXT:    ret
7264;
7265; CHECK-GI-LABEL: test_v2f32_post_imm_ld4lane:
7266; CHECK-GI:       ; %bb.0:
7267; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7268; CHECK-GI-NEXT:    add x8, x0, #16
7269; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7270; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7271; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7272; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
7273; CHECK-GI-NEXT:    str x8, [x1]
7274; CHECK-GI-NEXT:    ret
7275  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
7276  %tmp = getelementptr float, ptr %A, i32 4
7277  store ptr %tmp, ptr %ptr
7278  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
7279}
7280
7281define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
7282; CHECK-SD-LABEL: test_v2f32_post_reg_ld4lane:
7283; CHECK-SD:       ; %bb.0:
7284; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7285; CHECK-SD-NEXT:    lsl x8, x2, #2
7286; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7287; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7288; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7289; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
7290; CHECK-SD-NEXT:    str x0, [x1]
7291; CHECK-SD-NEXT:    ret
7292;
7293; CHECK-GI-LABEL: test_v2f32_post_reg_ld4lane:
7294; CHECK-GI:       ; %bb.0:
7295; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7296; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
7297; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7298; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7299; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7300; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
7301; CHECK-GI-NEXT:    str x8, [x1]
7302; CHECK-GI-NEXT:    ret
7303  %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
7304  %tmp = getelementptr float, ptr %A, i64 %inc
7305  store ptr %tmp, ptr %ptr
7306  ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
7307}
7308
7309declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, ptr) nounwind readonly
7310
7311
7312define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
7313; CHECK-SD-LABEL: test_v2f64_post_imm_ld4lane:
7314; CHECK-SD:       ; %bb.0:
7315; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7316; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7317; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7318; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7319; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
7320; CHECK-SD-NEXT:    str x0, [x1]
7321; CHECK-SD-NEXT:    ret
7322;
7323; CHECK-GI-LABEL: test_v2f64_post_imm_ld4lane:
7324; CHECK-GI:       ; %bb.0:
7325; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7326; CHECK-GI-NEXT:    add x8, x0, #32
7327; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7328; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7329; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7330; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
7331; CHECK-GI-NEXT:    str x8, [x1]
7332; CHECK-GI-NEXT:    ret
7333  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
7334  %tmp = getelementptr double, ptr %A, i32 4
7335  store ptr %tmp, ptr %ptr
7336  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
7337}
7338
7339define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
7340; CHECK-SD-LABEL: test_v2f64_post_reg_ld4lane:
7341; CHECK-SD:       ; %bb.0:
7342; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7343; CHECK-SD-NEXT:    lsl x8, x2, #3
7344; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7345; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7346; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7347; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
7348; CHECK-SD-NEXT:    str x0, [x1]
7349; CHECK-SD-NEXT:    ret
7350;
7351; CHECK-GI-LABEL: test_v2f64_post_reg_ld4lane:
7352; CHECK-GI:       ; %bb.0:
7353; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7354; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
7355; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7356; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7357; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7358; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
7359; CHECK-GI-NEXT:    str x8, [x1]
7360; CHECK-GI-NEXT:    ret
7361  %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
7362  %tmp = getelementptr double, ptr %A, i64 %inc
7363  store ptr %tmp, ptr %ptr
7364  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
7365}
7366
7367declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, ptr) nounwind readonly
7368
7369
7370define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
7371; CHECK-SD-LABEL: test_v1f64_post_imm_ld4lane:
7372; CHECK-SD:       ; %bb.0:
7373; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7374; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7375; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7376; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7377; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
7378; CHECK-SD-NEXT:    str x0, [x1]
7379; CHECK-SD-NEXT:    ret
7380;
7381; CHECK-GI-LABEL: test_v1f64_post_imm_ld4lane:
7382; CHECK-GI:       ; %bb.0:
7383; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7384; CHECK-GI-NEXT:    add x8, x0, #32
7385; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7386; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7387; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7388; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
7389; CHECK-GI-NEXT:    str x8, [x1]
7390; CHECK-GI-NEXT:    ret
7391  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
7392  %tmp = getelementptr double, ptr %A, i32 4
7393  store ptr %tmp, ptr %ptr
7394  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
7395}
7396
7397define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
7398; CHECK-SD-LABEL: test_v1f64_post_reg_ld4lane:
7399; CHECK-SD:       ; %bb.0:
7400; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7401; CHECK-SD-NEXT:    lsl x8, x2, #3
7402; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7403; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7404; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7405; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
7406; CHECK-SD-NEXT:    str x0, [x1]
7407; CHECK-SD-NEXT:    ret
7408;
7409; CHECK-GI-LABEL: test_v1f64_post_reg_ld4lane:
7410; CHECK-GI:       ; %bb.0:
7411; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7412; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
7413; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7414; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7415; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
7416; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
7417; CHECK-GI-NEXT:    str x8, [x1]
7418; CHECK-GI-NEXT:    ret
7419  %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
7420  %tmp = getelementptr double, ptr %A, i64 %inc
7421  store ptr %tmp, ptr %ptr
7422  ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
7423}
7424
7425declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, ptr) nounwind readonly
7426
7427
7428define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
7429; CHECK-SD-LABEL: test_v16i8_post_imm_st2:
7430; CHECK-SD:       ; %bb.0:
7431; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7432; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7433; CHECK-SD-NEXT:    st2.16b { v0, v1 }, [x0], #32
7434; CHECK-SD-NEXT:    ret
7435;
7436; CHECK-GI-LABEL: test_v16i8_post_imm_st2:
7437; CHECK-GI:       ; %bb.0:
7438; CHECK-GI-NEXT:    mov x8, x0
7439; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7440; CHECK-GI-NEXT:    add x0, x0, #32
7441; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7442; CHECK-GI-NEXT:    st2.16b { v0, v1 }, [x8]
7443; CHECK-GI-NEXT:    ret
7444  call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
7445  %tmp = getelementptr i8, ptr %A, i32 32
7446  ret ptr %tmp
7447}
7448
7449define ptr @test_v16i8_post_reg_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
7450; CHECK-SD-LABEL: test_v16i8_post_reg_st2:
7451; CHECK-SD:       ; %bb.0:
7452; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7453; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7454; CHECK-SD-NEXT:    st2.16b { v0, v1 }, [x0], x2
7455; CHECK-SD-NEXT:    ret
7456;
7457; CHECK-GI-LABEL: test_v16i8_post_reg_st2:
7458; CHECK-GI:       ; %bb.0:
7459; CHECK-GI-NEXT:    mov x8, x0
7460; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7461; CHECK-GI-NEXT:    add x0, x0, x2
7462; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7463; CHECK-GI-NEXT:    st2.16b { v0, v1 }, [x8]
7464; CHECK-GI-NEXT:    ret
7465  call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
7466  %tmp = getelementptr i8, ptr %A, i64 %inc
7467  ret ptr %tmp
7468}
7469
7470declare void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8>, <16 x i8>, ptr)
7471
7472
7473define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
7474; CHECK-SD-LABEL: test_v8i8_post_imm_st2:
7475; CHECK-SD:       ; %bb.0:
7476; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7477; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7478; CHECK-SD-NEXT:    st2.8b { v0, v1 }, [x0], #16
7479; CHECK-SD-NEXT:    ret
7480;
7481; CHECK-GI-LABEL: test_v8i8_post_imm_st2:
7482; CHECK-GI:       ; %bb.0:
7483; CHECK-GI-NEXT:    mov x8, x0
7484; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7485; CHECK-GI-NEXT:    add x0, x0, #16
7486; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7487; CHECK-GI-NEXT:    st2.8b { v0, v1 }, [x8]
7488; CHECK-GI-NEXT:    ret
7489  call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
7490  %tmp = getelementptr i8, ptr %A, i32 16
7491  ret ptr %tmp
7492}
7493
7494define ptr @test_v8i8_post_reg_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
7495; CHECK-SD-LABEL: test_v8i8_post_reg_st2:
7496; CHECK-SD:       ; %bb.0:
7497; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7498; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7499; CHECK-SD-NEXT:    st2.8b { v0, v1 }, [x0], x2
7500; CHECK-SD-NEXT:    ret
7501;
7502; CHECK-GI-LABEL: test_v8i8_post_reg_st2:
7503; CHECK-GI:       ; %bb.0:
7504; CHECK-GI-NEXT:    mov x8, x0
7505; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7506; CHECK-GI-NEXT:    add x0, x0, x2
7507; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7508; CHECK-GI-NEXT:    st2.8b { v0, v1 }, [x8]
7509; CHECK-GI-NEXT:    ret
7510  call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
7511  %tmp = getelementptr i8, ptr %A, i64 %inc
7512  ret ptr %tmp
7513}
7514
7515declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr)
7516
7517
7518define ptr @test_v8i16_post_imm_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
7519; CHECK-SD-LABEL: test_v8i16_post_imm_st2:
7520; CHECK-SD:       ; %bb.0:
7521; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7522; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7523; CHECK-SD-NEXT:    st2.8h { v0, v1 }, [x0], #32
7524; CHECK-SD-NEXT:    ret
7525;
7526; CHECK-GI-LABEL: test_v8i16_post_imm_st2:
7527; CHECK-GI:       ; %bb.0:
7528; CHECK-GI-NEXT:    mov x8, x0
7529; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7530; CHECK-GI-NEXT:    add x0, x0, #32
7531; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7532; CHECK-GI-NEXT:    st2.8h { v0, v1 }, [x8]
7533; CHECK-GI-NEXT:    ret
7534  call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
7535  %tmp = getelementptr i16, ptr %A, i32 16
7536  ret ptr %tmp
7537}
7538
7539define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
7540; CHECK-SD-LABEL: test_v8i16_post_reg_st2:
7541; CHECK-SD:       ; %bb.0:
7542; CHECK-SD-NEXT:    lsl x8, x2, #1
7543; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7544; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7545; CHECK-SD-NEXT:    st2.8h { v0, v1 }, [x0], x8
7546; CHECK-SD-NEXT:    ret
7547;
7548; CHECK-GI-LABEL: test_v8i16_post_reg_st2:
7549; CHECK-GI:       ; %bb.0:
7550; CHECK-GI-NEXT:    mov x8, x0
7551; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
7552; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7553; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7554; CHECK-GI-NEXT:    st2.8h { v0, v1 }, [x8]
7555; CHECK-GI-NEXT:    ret
7556  call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
7557  %tmp = getelementptr i16, ptr %A, i64 %inc
7558  ret ptr %tmp
7559}
7560
7561declare void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16>, <8 x i16>, ptr)
7562
7563
7564define ptr @test_v4i16_post_imm_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
7565; CHECK-SD-LABEL: test_v4i16_post_imm_st2:
7566; CHECK-SD:       ; %bb.0:
7567; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7568; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7569; CHECK-SD-NEXT:    st2.4h { v0, v1 }, [x0], #16
7570; CHECK-SD-NEXT:    ret
7571;
7572; CHECK-GI-LABEL: test_v4i16_post_imm_st2:
7573; CHECK-GI:       ; %bb.0:
7574; CHECK-GI-NEXT:    mov x8, x0
7575; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7576; CHECK-GI-NEXT:    add x0, x0, #16
7577; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7578; CHECK-GI-NEXT:    st2.4h { v0, v1 }, [x8]
7579; CHECK-GI-NEXT:    ret
7580  call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
7581  %tmp = getelementptr i16, ptr %A, i32 8
7582  ret ptr %tmp
7583}
7584
7585define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
7586; CHECK-SD-LABEL: test_v4i16_post_reg_st2:
7587; CHECK-SD:       ; %bb.0:
7588; CHECK-SD-NEXT:    lsl x8, x2, #1
7589; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7590; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7591; CHECK-SD-NEXT:    st2.4h { v0, v1 }, [x0], x8
7592; CHECK-SD-NEXT:    ret
7593;
7594; CHECK-GI-LABEL: test_v4i16_post_reg_st2:
7595; CHECK-GI:       ; %bb.0:
7596; CHECK-GI-NEXT:    mov x8, x0
7597; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
7598; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7599; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7600; CHECK-GI-NEXT:    st2.4h { v0, v1 }, [x8]
7601; CHECK-GI-NEXT:    ret
7602  call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
7603  %tmp = getelementptr i16, ptr %A, i64 %inc
7604  ret ptr %tmp
7605}
7606
7607declare void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16>, <4 x i16>, ptr)
7608
7609
7610define ptr @test_v4i32_post_imm_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
7611; CHECK-SD-LABEL: test_v4i32_post_imm_st2:
7612; CHECK-SD:       ; %bb.0:
7613; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7614; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7615; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], #32
7616; CHECK-SD-NEXT:    ret
7617;
7618; CHECK-GI-LABEL: test_v4i32_post_imm_st2:
7619; CHECK-GI:       ; %bb.0:
7620; CHECK-GI-NEXT:    mov x8, x0
7621; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7622; CHECK-GI-NEXT:    add x0, x0, #32
7623; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7624; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
7625; CHECK-GI-NEXT:    ret
7626  call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
7627  %tmp = getelementptr i32, ptr %A, i32 8
7628  ret ptr %tmp
7629}
7630
7631define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
7632; CHECK-SD-LABEL: test_v4i32_post_reg_st2:
7633; CHECK-SD:       ; %bb.0:
7634; CHECK-SD-NEXT:    lsl x8, x2, #2
7635; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7636; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7637; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], x8
7638; CHECK-SD-NEXT:    ret
7639;
7640; CHECK-GI-LABEL: test_v4i32_post_reg_st2:
7641; CHECK-GI:       ; %bb.0:
7642; CHECK-GI-NEXT:    mov x8, x0
7643; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
7644; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7645; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7646; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
7647; CHECK-GI-NEXT:    ret
7648  call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
7649  %tmp = getelementptr i32, ptr %A, i64 %inc
7650  ret ptr %tmp
7651}
7652
7653declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr)
7654
7655
7656define ptr @test_v2i32_post_imm_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
7657; CHECK-SD-LABEL: test_v2i32_post_imm_st2:
7658; CHECK-SD:       ; %bb.0:
7659; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7660; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7661; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], #16
7662; CHECK-SD-NEXT:    ret
7663;
7664; CHECK-GI-LABEL: test_v2i32_post_imm_st2:
7665; CHECK-GI:       ; %bb.0:
7666; CHECK-GI-NEXT:    mov x8, x0
7667; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7668; CHECK-GI-NEXT:    add x0, x0, #16
7669; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7670; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
7671; CHECK-GI-NEXT:    ret
7672  call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
7673  %tmp = getelementptr i32, ptr %A, i32 4
7674  ret ptr %tmp
7675}
7676
7677define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
7678; CHECK-SD-LABEL: test_v2i32_post_reg_st2:
7679; CHECK-SD:       ; %bb.0:
7680; CHECK-SD-NEXT:    lsl x8, x2, #2
7681; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7682; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7683; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], x8
7684; CHECK-SD-NEXT:    ret
7685;
7686; CHECK-GI-LABEL: test_v2i32_post_reg_st2:
7687; CHECK-GI:       ; %bb.0:
7688; CHECK-GI-NEXT:    mov x8, x0
7689; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
7690; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7691; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7692; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
7693; CHECK-GI-NEXT:    ret
7694  call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
7695  %tmp = getelementptr i32, ptr %A, i64 %inc
7696  ret ptr %tmp
7697}
7698
7699declare void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32>, <2 x i32>, ptr)
7700
7701
7702define ptr @test_v2i64_post_imm_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
7703; CHECK-SD-LABEL: test_v2i64_post_imm_st2:
7704; CHECK-SD:       ; %bb.0:
7705; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7706; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7707; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], #32
7708; CHECK-SD-NEXT:    ret
7709;
7710; CHECK-GI-LABEL: test_v2i64_post_imm_st2:
7711; CHECK-GI:       ; %bb.0:
7712; CHECK-GI-NEXT:    mov x8, x0
7713; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7714; CHECK-GI-NEXT:    add x0, x0, #32
7715; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7716; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
7717; CHECK-GI-NEXT:    ret
7718  call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
7719  %tmp = getelementptr i64, ptr %A, i64 4
7720  ret ptr %tmp
7721}
7722
7723define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
7724; CHECK-SD-LABEL: test_v2i64_post_reg_st2:
7725; CHECK-SD:       ; %bb.0:
7726; CHECK-SD-NEXT:    lsl x8, x2, #3
7727; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7728; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7729; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], x8
7730; CHECK-SD-NEXT:    ret
7731;
7732; CHECK-GI-LABEL: test_v2i64_post_reg_st2:
7733; CHECK-GI:       ; %bb.0:
7734; CHECK-GI-NEXT:    mov x8, x0
7735; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
7736; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7737; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7738; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
7739; CHECK-GI-NEXT:    ret
7740  call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
7741  %tmp = getelementptr i64, ptr %A, i64 %inc
7742  ret ptr %tmp
7743}
7744
7745declare void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64>, <2 x i64>, ptr)
7746
7747
7748define ptr @test_v1i64_post_imm_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
7749; CHECK-SD-LABEL: test_v1i64_post_imm_st2:
7750; CHECK-SD:       ; %bb.0:
7751; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7752; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7753; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
7754; CHECK-SD-NEXT:    ret
7755;
7756; CHECK-GI-LABEL: test_v1i64_post_imm_st2:
7757; CHECK-GI:       ; %bb.0:
7758; CHECK-GI-NEXT:    mov x8, x0
7759; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7760; CHECK-GI-NEXT:    add x0, x0, #16
7761; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7762; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
7763; CHECK-GI-NEXT:    ret
7764  call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
7765  %tmp = getelementptr i64, ptr %A, i64 2
7766  ret ptr %tmp
7767}
7768
7769define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
7770; CHECK-SD-LABEL: test_v1i64_post_reg_st2:
7771; CHECK-SD:       ; %bb.0:
7772; CHECK-SD-NEXT:    lsl x8, x2, #3
7773; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7774; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7775; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
7776; CHECK-SD-NEXT:    ret
7777;
7778; CHECK-GI-LABEL: test_v1i64_post_reg_st2:
7779; CHECK-GI:       ; %bb.0:
7780; CHECK-GI-NEXT:    mov x8, x0
7781; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
7782; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7783; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7784; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
7785; CHECK-GI-NEXT:    ret
7786  call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
7787  %tmp = getelementptr i64, ptr %A, i64 %inc
7788  ret ptr %tmp
7789}
7790
7791declare void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64>, <1 x i64>, ptr)
7792
7793
7794define ptr @test_v4f32_post_imm_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
7795; CHECK-SD-LABEL: test_v4f32_post_imm_st2:
7796; CHECK-SD:       ; %bb.0:
7797; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7798; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7799; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], #32
7800; CHECK-SD-NEXT:    ret
7801;
7802; CHECK-GI-LABEL: test_v4f32_post_imm_st2:
7803; CHECK-GI:       ; %bb.0:
7804; CHECK-GI-NEXT:    mov x8, x0
7805; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7806; CHECK-GI-NEXT:    add x0, x0, #32
7807; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7808; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
7809; CHECK-GI-NEXT:    ret
7810  call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
7811  %tmp = getelementptr float, ptr %A, i32 8
7812  ret ptr %tmp
7813}
7814
7815define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
7816; CHECK-SD-LABEL: test_v4f32_post_reg_st2:
7817; CHECK-SD:       ; %bb.0:
7818; CHECK-SD-NEXT:    lsl x8, x2, #2
7819; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7820; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7821; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], x8
7822; CHECK-SD-NEXT:    ret
7823;
7824; CHECK-GI-LABEL: test_v4f32_post_reg_st2:
7825; CHECK-GI:       ; %bb.0:
7826; CHECK-GI-NEXT:    mov x8, x0
7827; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
7828; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7829; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7830; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
7831; CHECK-GI-NEXT:    ret
7832  call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
7833  %tmp = getelementptr float, ptr %A, i64 %inc
7834  ret ptr %tmp
7835}
7836
7837declare void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float>, <4 x float>, ptr)
7838
7839
7840define ptr @test_v2f32_post_imm_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
7841; CHECK-SD-LABEL: test_v2f32_post_imm_st2:
7842; CHECK-SD:       ; %bb.0:
7843; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7844; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7845; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], #16
7846; CHECK-SD-NEXT:    ret
7847;
7848; CHECK-GI-LABEL: test_v2f32_post_imm_st2:
7849; CHECK-GI:       ; %bb.0:
7850; CHECK-GI-NEXT:    mov x8, x0
7851; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7852; CHECK-GI-NEXT:    add x0, x0, #16
7853; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7854; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
7855; CHECK-GI-NEXT:    ret
7856  call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
7857  %tmp = getelementptr float, ptr %A, i32 4
7858  ret ptr %tmp
7859}
7860
7861define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
7862; CHECK-SD-LABEL: test_v2f32_post_reg_st2:
7863; CHECK-SD:       ; %bb.0:
7864; CHECK-SD-NEXT:    lsl x8, x2, #2
7865; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7866; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7867; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], x8
7868; CHECK-SD-NEXT:    ret
7869;
7870; CHECK-GI-LABEL: test_v2f32_post_reg_st2:
7871; CHECK-GI:       ; %bb.0:
7872; CHECK-GI-NEXT:    mov x8, x0
7873; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
7874; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7875; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7876; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
7877; CHECK-GI-NEXT:    ret
7878  call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
7879  %tmp = getelementptr float, ptr %A, i64 %inc
7880  ret ptr %tmp
7881}
7882
7883declare void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float>, <2 x float>, ptr)
7884
7885
7886define ptr @test_v2f64_post_imm_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
7887; CHECK-SD-LABEL: test_v2f64_post_imm_st2:
7888; CHECK-SD:       ; %bb.0:
7889; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7890; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7891; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], #32
7892; CHECK-SD-NEXT:    ret
7893;
7894; CHECK-GI-LABEL: test_v2f64_post_imm_st2:
7895; CHECK-GI:       ; %bb.0:
7896; CHECK-GI-NEXT:    mov x8, x0
7897; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7898; CHECK-GI-NEXT:    add x0, x0, #32
7899; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7900; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
7901; CHECK-GI-NEXT:    ret
7902  call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
7903  %tmp = getelementptr double, ptr %A, i64 4
7904  ret ptr %tmp
7905}
7906
7907define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
7908; CHECK-SD-LABEL: test_v2f64_post_reg_st2:
7909; CHECK-SD:       ; %bb.0:
7910; CHECK-SD-NEXT:    lsl x8, x2, #3
7911; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7912; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7913; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], x8
7914; CHECK-SD-NEXT:    ret
7915;
7916; CHECK-GI-LABEL: test_v2f64_post_reg_st2:
7917; CHECK-GI:       ; %bb.0:
7918; CHECK-GI-NEXT:    mov x8, x0
7919; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
7920; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
7921; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
7922; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
7923; CHECK-GI-NEXT:    ret
7924  call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
7925  %tmp = getelementptr double, ptr %A, i64 %inc
7926  ret ptr %tmp
7927}
7928
7929declare void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double>, <2 x double>, ptr)
7930
7931
7932define ptr @test_v1f64_post_imm_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
7933; CHECK-SD-LABEL: test_v1f64_post_imm_st2:
7934; CHECK-SD:       ; %bb.0:
7935; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7936; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7937; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
7938; CHECK-SD-NEXT:    ret
7939;
7940; CHECK-GI-LABEL: test_v1f64_post_imm_st2:
7941; CHECK-GI:       ; %bb.0:
7942; CHECK-GI-NEXT:    mov x8, x0
7943; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7944; CHECK-GI-NEXT:    add x0, x0, #16
7945; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7946; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
7947; CHECK-GI-NEXT:    ret
7948  call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
7949  %tmp = getelementptr double, ptr %A, i64 2
7950  ret ptr %tmp
7951}
7952
7953define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
7954; CHECK-SD-LABEL: test_v1f64_post_reg_st2:
7955; CHECK-SD:       ; %bb.0:
7956; CHECK-SD-NEXT:    lsl x8, x2, #3
7957; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7958; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7959; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
7960; CHECK-SD-NEXT:    ret
7961;
7962; CHECK-GI-LABEL: test_v1f64_post_reg_st2:
7963; CHECK-GI:       ; %bb.0:
7964; CHECK-GI-NEXT:    mov x8, x0
7965; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
7966; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
7967; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
7968; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
7969; CHECK-GI-NEXT:    ret
7970  call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
7971  %tmp = getelementptr double, ptr %A, i64 %inc
7972  ret ptr %tmp
7973}
7974
7975declare void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double>, <1 x double>, ptr)
7976
7977
7978define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
7979; CHECK-SD-LABEL: test_v16i8_post_imm_st3:
7980; CHECK-SD:       ; %bb.0:
7981; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
7982; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
7983; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
7984; CHECK-SD-NEXT:    st3.16b { v0, v1, v2 }, [x0], #48
7985; CHECK-SD-NEXT:    ret
7986;
7987; CHECK-GI-LABEL: test_v16i8_post_imm_st3:
7988; CHECK-GI:       ; %bb.0:
7989; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
7990; CHECK-GI-NEXT:    mov x8, x0
7991; CHECK-GI-NEXT:    add x0, x0, #48
7992; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
7993; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
7994; CHECK-GI-NEXT:    st3.16b { v0, v1, v2 }, [x8]
7995; CHECK-GI-NEXT:    ret
7996  call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
7997  %tmp = getelementptr i8, ptr %A, i32 48
7998  ret ptr %tmp
7999}
8000
8001define ptr @test_v16i8_post_reg_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
8002; CHECK-SD-LABEL: test_v16i8_post_reg_st3:
8003; CHECK-SD:       ; %bb.0:
8004; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8005; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8006; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8007; CHECK-SD-NEXT:    st3.16b { v0, v1, v2 }, [x0], x2
8008; CHECK-SD-NEXT:    ret
8009;
8010; CHECK-GI-LABEL: test_v16i8_post_reg_st3:
8011; CHECK-GI:       ; %bb.0:
8012; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8013; CHECK-GI-NEXT:    mov x8, x0
8014; CHECK-GI-NEXT:    add x0, x0, x2
8015; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8016; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8017; CHECK-GI-NEXT:    st3.16b { v0, v1, v2 }, [x8]
8018; CHECK-GI-NEXT:    ret
8019  call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
8020  %tmp = getelementptr i8, ptr %A, i64 %inc
8021  ret ptr %tmp
8022}
8023
8024declare void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, ptr)
8025
8026
8027define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
8028; CHECK-SD-LABEL: test_v8i8_post_imm_st3:
8029; CHECK-SD:       ; %bb.0:
8030; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8031; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8032; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8033; CHECK-SD-NEXT:    st3.8b { v0, v1, v2 }, [x0], #24
8034; CHECK-SD-NEXT:    ret
8035;
8036; CHECK-GI-LABEL: test_v8i8_post_imm_st3:
8037; CHECK-GI:       ; %bb.0:
8038; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8039; CHECK-GI-NEXT:    mov x8, x0
8040; CHECK-GI-NEXT:    add x0, x0, #24
8041; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8042; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8043; CHECK-GI-NEXT:    st3.8b { v0, v1, v2 }, [x8]
8044; CHECK-GI-NEXT:    ret
8045  call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
8046  %tmp = getelementptr i8, ptr %A, i32 24
8047  ret ptr %tmp
8048}
8049
8050define ptr @test_v8i8_post_reg_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
8051; CHECK-SD-LABEL: test_v8i8_post_reg_st3:
8052; CHECK-SD:       ; %bb.0:
8053; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8054; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8055; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8056; CHECK-SD-NEXT:    st3.8b { v0, v1, v2 }, [x0], x2
8057; CHECK-SD-NEXT:    ret
8058;
8059; CHECK-GI-LABEL: test_v8i8_post_reg_st3:
8060; CHECK-GI:       ; %bb.0:
8061; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8062; CHECK-GI-NEXT:    mov x8, x0
8063; CHECK-GI-NEXT:    add x0, x0, x2
8064; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8065; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8066; CHECK-GI-NEXT:    st3.8b { v0, v1, v2 }, [x8]
8067; CHECK-GI-NEXT:    ret
8068  call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
8069  %tmp = getelementptr i8, ptr %A, i64 %inc
8070  ret ptr %tmp
8071}
8072
8073declare void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr)
8074
8075
8076define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
8077; CHECK-SD-LABEL: test_v8i16_post_imm_st3:
8078; CHECK-SD:       ; %bb.0:
8079; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8080; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8081; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8082; CHECK-SD-NEXT:    st3.8h { v0, v1, v2 }, [x0], #48
8083; CHECK-SD-NEXT:    ret
8084;
8085; CHECK-GI-LABEL: test_v8i16_post_imm_st3:
8086; CHECK-GI:       ; %bb.0:
8087; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8088; CHECK-GI-NEXT:    mov x8, x0
8089; CHECK-GI-NEXT:    add x0, x0, #48
8090; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8091; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8092; CHECK-GI-NEXT:    st3.8h { v0, v1, v2 }, [x8]
8093; CHECK-GI-NEXT:    ret
8094  call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
8095  %tmp = getelementptr i16, ptr %A, i32 24
8096  ret ptr %tmp
8097}
8098
8099define ptr @test_v8i16_post_reg_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
8100; CHECK-SD-LABEL: test_v8i16_post_reg_st3:
8101; CHECK-SD:       ; %bb.0:
8102; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8103; CHECK-SD-NEXT:    lsl x8, x2, #1
8104; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8105; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8106; CHECK-SD-NEXT:    st3.8h { v0, v1, v2 }, [x0], x8
8107; CHECK-SD-NEXT:    ret
8108;
8109; CHECK-GI-LABEL: test_v8i16_post_reg_st3:
8110; CHECK-GI:       ; %bb.0:
8111; CHECK-GI-NEXT:    mov x8, x0
8112; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8113; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
8114; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8115; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8116; CHECK-GI-NEXT:    st3.8h { v0, v1, v2 }, [x8]
8117; CHECK-GI-NEXT:    ret
8118  call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
8119  %tmp = getelementptr i16, ptr %A, i64 %inc
8120  ret ptr %tmp
8121}
8122
8123declare void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, ptr)
8124
8125
8126define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
8127; CHECK-SD-LABEL: test_v4i16_post_imm_st3:
8128; CHECK-SD:       ; %bb.0:
8129; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8130; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8131; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8132; CHECK-SD-NEXT:    st3.4h { v0, v1, v2 }, [x0], #24
8133; CHECK-SD-NEXT:    ret
8134;
8135; CHECK-GI-LABEL: test_v4i16_post_imm_st3:
8136; CHECK-GI:       ; %bb.0:
8137; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8138; CHECK-GI-NEXT:    mov x8, x0
8139; CHECK-GI-NEXT:    add x0, x0, #24
8140; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8141; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8142; CHECK-GI-NEXT:    st3.4h { v0, v1, v2 }, [x8]
8143; CHECK-GI-NEXT:    ret
8144  call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
8145  %tmp = getelementptr i16, ptr %A, i32 12
8146  ret ptr %tmp
8147}
8148
8149define ptr @test_v4i16_post_reg_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
8150; CHECK-SD-LABEL: test_v4i16_post_reg_st3:
8151; CHECK-SD:       ; %bb.0:
8152; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8153; CHECK-SD-NEXT:    lsl x8, x2, #1
8154; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8155; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8156; CHECK-SD-NEXT:    st3.4h { v0, v1, v2 }, [x0], x8
8157; CHECK-SD-NEXT:    ret
8158;
8159; CHECK-GI-LABEL: test_v4i16_post_reg_st3:
8160; CHECK-GI:       ; %bb.0:
8161; CHECK-GI-NEXT:    mov x8, x0
8162; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8163; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
8164; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8165; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8166; CHECK-GI-NEXT:    st3.4h { v0, v1, v2 }, [x8]
8167; CHECK-GI-NEXT:    ret
8168  call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
8169  %tmp = getelementptr i16, ptr %A, i64 %inc
8170  ret ptr %tmp
8171}
8172
8173declare void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, ptr)
8174
8175
8176define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
8177; CHECK-SD-LABEL: test_v4i32_post_imm_st3:
8178; CHECK-SD:       ; %bb.0:
8179; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8180; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8181; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8182; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], #48
8183; CHECK-SD-NEXT:    ret
8184;
8185; CHECK-GI-LABEL: test_v4i32_post_imm_st3:
8186; CHECK-GI:       ; %bb.0:
8187; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8188; CHECK-GI-NEXT:    mov x8, x0
8189; CHECK-GI-NEXT:    add x0, x0, #48
8190; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8191; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8192; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
8193; CHECK-GI-NEXT:    ret
8194  call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
8195  %tmp = getelementptr i32, ptr %A, i32 12
8196  ret ptr %tmp
8197}
8198
8199define ptr @test_v4i32_post_reg_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
8200; CHECK-SD-LABEL: test_v4i32_post_reg_st3:
8201; CHECK-SD:       ; %bb.0:
8202; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8203; CHECK-SD-NEXT:    lsl x8, x2, #2
8204; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8205; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8206; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], x8
8207; CHECK-SD-NEXT:    ret
8208;
8209; CHECK-GI-LABEL: test_v4i32_post_reg_st3:
8210; CHECK-GI:       ; %bb.0:
8211; CHECK-GI-NEXT:    mov x8, x0
8212; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8213; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
8214; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8215; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8216; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
8217; CHECK-GI-NEXT:    ret
8218  call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
8219  %tmp = getelementptr i32, ptr %A, i64 %inc
8220  ret ptr %tmp
8221}
8222
8223declare void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, ptr)
8224
8225
8226define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
8227; CHECK-SD-LABEL: test_v2i32_post_imm_st3:
8228; CHECK-SD:       ; %bb.0:
8229; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8230; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8231; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8232; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], #24
8233; CHECK-SD-NEXT:    ret
8234;
8235; CHECK-GI-LABEL: test_v2i32_post_imm_st3:
8236; CHECK-GI:       ; %bb.0:
8237; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8238; CHECK-GI-NEXT:    mov x8, x0
8239; CHECK-GI-NEXT:    add x0, x0, #24
8240; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8241; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8242; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
8243; CHECK-GI-NEXT:    ret
8244  call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
8245  %tmp = getelementptr i32, ptr %A, i32 6
8246  ret ptr %tmp
8247}
8248
8249define ptr @test_v2i32_post_reg_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
8250; CHECK-SD-LABEL: test_v2i32_post_reg_st3:
8251; CHECK-SD:       ; %bb.0:
8252; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8253; CHECK-SD-NEXT:    lsl x8, x2, #2
8254; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8255; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8256; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], x8
8257; CHECK-SD-NEXT:    ret
8258;
8259; CHECK-GI-LABEL: test_v2i32_post_reg_st3:
8260; CHECK-GI:       ; %bb.0:
8261; CHECK-GI-NEXT:    mov x8, x0
8262; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8263; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
8264; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8265; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8266; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
8267; CHECK-GI-NEXT:    ret
8268  call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
8269  %tmp = getelementptr i32, ptr %A, i64 %inc
8270  ret ptr %tmp
8271}
8272
8273declare void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, ptr)
8274
8275
8276define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
8277; CHECK-SD-LABEL: test_v2i64_post_imm_st3:
8278; CHECK-SD:       ; %bb.0:
8279; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8280; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8281; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8282; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], #48
8283; CHECK-SD-NEXT:    ret
8284;
8285; CHECK-GI-LABEL: test_v2i64_post_imm_st3:
8286; CHECK-GI:       ; %bb.0:
8287; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8288; CHECK-GI-NEXT:    mov x8, x0
8289; CHECK-GI-NEXT:    add x0, x0, #48
8290; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8291; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8292; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
8293; CHECK-GI-NEXT:    ret
8294  call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
8295  %tmp = getelementptr i64, ptr %A, i64 6
8296  ret ptr %tmp
8297}
8298
8299define ptr @test_v2i64_post_reg_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
8300; CHECK-SD-LABEL: test_v2i64_post_reg_st3:
8301; CHECK-SD:       ; %bb.0:
8302; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8303; CHECK-SD-NEXT:    lsl x8, x2, #3
8304; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8305; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8306; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], x8
8307; CHECK-SD-NEXT:    ret
8308;
8309; CHECK-GI-LABEL: test_v2i64_post_reg_st3:
8310; CHECK-GI:       ; %bb.0:
8311; CHECK-GI-NEXT:    mov x8, x0
8312; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8313; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
8314; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8315; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8316; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
8317; CHECK-GI-NEXT:    ret
8318  call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
8319  %tmp = getelementptr i64, ptr %A, i64 %inc
8320  ret ptr %tmp
8321}
8322
8323declare void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, ptr)
8324
8325
8326define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
8327; CHECK-SD-LABEL: test_v1i64_post_imm_st3:
8328; CHECK-SD:       ; %bb.0:
8329; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8330; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8331; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8332; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
8333; CHECK-SD-NEXT:    ret
8334;
8335; CHECK-GI-LABEL: test_v1i64_post_imm_st3:
8336; CHECK-GI:       ; %bb.0:
8337; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8338; CHECK-GI-NEXT:    mov x8, x0
8339; CHECK-GI-NEXT:    add x0, x0, #24
8340; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8341; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8342; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
8343; CHECK-GI-NEXT:    ret
8344  call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
8345  %tmp = getelementptr i64, ptr %A, i64 3
8346  ret ptr %tmp
8347}
8348
8349define ptr @test_v1i64_post_reg_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
8350; CHECK-SD-LABEL: test_v1i64_post_reg_st3:
8351; CHECK-SD:       ; %bb.0:
8352; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8353; CHECK-SD-NEXT:    lsl x8, x2, #3
8354; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8355; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8356; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
8357; CHECK-SD-NEXT:    ret
8358;
8359; CHECK-GI-LABEL: test_v1i64_post_reg_st3:
8360; CHECK-GI:       ; %bb.0:
8361; CHECK-GI-NEXT:    mov x8, x0
8362; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8363; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
8364; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8365; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8366; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
8367; CHECK-GI-NEXT:    ret
8368  call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
8369  %tmp = getelementptr i64, ptr %A, i64 %inc
8370  ret ptr %tmp
8371}
8372
8373declare void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, ptr)
8374
8375
8376define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
8377; CHECK-SD-LABEL: test_v4f32_post_imm_st3:
8378; CHECK-SD:       ; %bb.0:
8379; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8380; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8381; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8382; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], #48
8383; CHECK-SD-NEXT:    ret
8384;
8385; CHECK-GI-LABEL: test_v4f32_post_imm_st3:
8386; CHECK-GI:       ; %bb.0:
8387; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8388; CHECK-GI-NEXT:    mov x8, x0
8389; CHECK-GI-NEXT:    add x0, x0, #48
8390; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8391; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8392; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
8393; CHECK-GI-NEXT:    ret
8394  call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
8395  %tmp = getelementptr float, ptr %A, i32 12
8396  ret ptr %tmp
8397}
8398
8399define ptr @test_v4f32_post_reg_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
8400; CHECK-SD-LABEL: test_v4f32_post_reg_st3:
8401; CHECK-SD:       ; %bb.0:
8402; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8403; CHECK-SD-NEXT:    lsl x8, x2, #2
8404; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8405; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8406; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], x8
8407; CHECK-SD-NEXT:    ret
8408;
8409; CHECK-GI-LABEL: test_v4f32_post_reg_st3:
8410; CHECK-GI:       ; %bb.0:
8411; CHECK-GI-NEXT:    mov x8, x0
8412; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8413; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
8414; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8415; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8416; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
8417; CHECK-GI-NEXT:    ret
8418  call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
8419  %tmp = getelementptr float, ptr %A, i64 %inc
8420  ret ptr %tmp
8421}
8422
8423declare void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, ptr)
8424
8425
8426define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
8427; CHECK-SD-LABEL: test_v2f32_post_imm_st3:
8428; CHECK-SD:       ; %bb.0:
8429; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8430; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8431; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8432; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], #24
8433; CHECK-SD-NEXT:    ret
8434;
8435; CHECK-GI-LABEL: test_v2f32_post_imm_st3:
8436; CHECK-GI:       ; %bb.0:
8437; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8438; CHECK-GI-NEXT:    mov x8, x0
8439; CHECK-GI-NEXT:    add x0, x0, #24
8440; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8441; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8442; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
8443; CHECK-GI-NEXT:    ret
8444  call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
8445  %tmp = getelementptr float, ptr %A, i32 6
8446  ret ptr %tmp
8447}
8448
8449define ptr @test_v2f32_post_reg_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
8450; CHECK-SD-LABEL: test_v2f32_post_reg_st3:
8451; CHECK-SD:       ; %bb.0:
8452; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8453; CHECK-SD-NEXT:    lsl x8, x2, #2
8454; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8455; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8456; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], x8
8457; CHECK-SD-NEXT:    ret
8458;
8459; CHECK-GI-LABEL: test_v2f32_post_reg_st3:
8460; CHECK-GI:       ; %bb.0:
8461; CHECK-GI-NEXT:    mov x8, x0
8462; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8463; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
8464; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8465; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8466; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
8467; CHECK-GI-NEXT:    ret
8468  call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
8469  %tmp = getelementptr float, ptr %A, i64 %inc
8470  ret ptr %tmp
8471}
8472
8473declare void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, ptr)
8474
8475
8476define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
8477; CHECK-SD-LABEL: test_v2f64_post_imm_st3:
8478; CHECK-SD:       ; %bb.0:
8479; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8480; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8481; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8482; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], #48
8483; CHECK-SD-NEXT:    ret
8484;
8485; CHECK-GI-LABEL: test_v2f64_post_imm_st3:
8486; CHECK-GI:       ; %bb.0:
8487; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8488; CHECK-GI-NEXT:    mov x8, x0
8489; CHECK-GI-NEXT:    add x0, x0, #48
8490; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8491; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8492; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
8493; CHECK-GI-NEXT:    ret
8494  call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
8495  %tmp = getelementptr double, ptr %A, i64 6
8496  ret ptr %tmp
8497}
8498
8499define ptr @test_v2f64_post_reg_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
8500; CHECK-SD-LABEL: test_v2f64_post_reg_st3:
8501; CHECK-SD:       ; %bb.0:
8502; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8503; CHECK-SD-NEXT:    lsl x8, x2, #3
8504; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8505; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8506; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], x8
8507; CHECK-SD-NEXT:    ret
8508;
8509; CHECK-GI-LABEL: test_v2f64_post_reg_st3:
8510; CHECK-GI:       ; %bb.0:
8511; CHECK-GI-NEXT:    mov x8, x0
8512; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
8513; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
8514; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
8515; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
8516; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
8517; CHECK-GI-NEXT:    ret
8518  call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
8519  %tmp = getelementptr double, ptr %A, i64 %inc
8520  ret ptr %tmp
8521}
8522
8523declare void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, ptr)
8524
8525
8526define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
8527; CHECK-SD-LABEL: test_v1f64_post_imm_st3:
8528; CHECK-SD:       ; %bb.0:
8529; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8530; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8531; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8532; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
8533; CHECK-SD-NEXT:    ret
8534;
8535; CHECK-GI-LABEL: test_v1f64_post_imm_st3:
8536; CHECK-GI:       ; %bb.0:
8537; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8538; CHECK-GI-NEXT:    mov x8, x0
8539; CHECK-GI-NEXT:    add x0, x0, #24
8540; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8541; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8542; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
8543; CHECK-GI-NEXT:    ret
8544  call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
8545  %tmp = getelementptr double, ptr %A, i64 3
8546  ret ptr %tmp
8547}
8548
8549define ptr @test_v1f64_post_reg_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
8550; CHECK-SD-LABEL: test_v1f64_post_reg_st3:
8551; CHECK-SD:       ; %bb.0:
8552; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8553; CHECK-SD-NEXT:    lsl x8, x2, #3
8554; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8555; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8556; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
8557; CHECK-SD-NEXT:    ret
8558;
8559; CHECK-GI-LABEL: test_v1f64_post_reg_st3:
8560; CHECK-GI:       ; %bb.0:
8561; CHECK-GI-NEXT:    mov x8, x0
8562; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
8563; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
8564; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
8565; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
8566; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
8567; CHECK-GI-NEXT:    ret
8568  call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
8569  %tmp = getelementptr double, ptr %A, i64 %inc
8570  ret ptr %tmp
8571}
8572
8573declare void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, ptr)
8574
8575
8576define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
8577; CHECK-SD-LABEL: test_v16i8_post_imm_st4:
8578; CHECK-SD:       ; %bb.0:
8579; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8580; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8581; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8582; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8583; CHECK-SD-NEXT:    st4.16b { v0, v1, v2, v3 }, [x0], #64
8584; CHECK-SD-NEXT:    ret
8585;
8586; CHECK-GI-LABEL: test_v16i8_post_imm_st4:
8587; CHECK-GI:       ; %bb.0:
8588; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8589; CHECK-GI-NEXT:    mov x8, x0
8590; CHECK-GI-NEXT:    add x0, x0, #64
8591; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8592; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8593; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8594; CHECK-GI-NEXT:    st4.16b { v0, v1, v2, v3 }, [x8]
8595; CHECK-GI-NEXT:    ret
8596  call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
8597  %tmp = getelementptr i8, ptr %A, i32 64
8598  ret ptr %tmp
8599}
8600
8601define ptr @test_v16i8_post_reg_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
8602; CHECK-SD-LABEL: test_v16i8_post_reg_st4:
8603; CHECK-SD:       ; %bb.0:
8604; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8605; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8606; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8607; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8608; CHECK-SD-NEXT:    st4.16b { v0, v1, v2, v3 }, [x0], x2
8609; CHECK-SD-NEXT:    ret
8610;
8611; CHECK-GI-LABEL: test_v16i8_post_reg_st4:
8612; CHECK-GI:       ; %bb.0:
8613; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8614; CHECK-GI-NEXT:    mov x8, x0
8615; CHECK-GI-NEXT:    add x0, x0, x2
8616; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8617; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8618; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8619; CHECK-GI-NEXT:    st4.16b { v0, v1, v2, v3 }, [x8]
8620; CHECK-GI-NEXT:    ret
8621  call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
8622  %tmp = getelementptr i8, ptr %A, i64 %inc
8623  ret ptr %tmp
8624}
8625
8626declare void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, ptr)
8627
8628
8629define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
8630; CHECK-SD-LABEL: test_v8i8_post_imm_st4:
8631; CHECK-SD:       ; %bb.0:
8632; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8633; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8634; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8635; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8636; CHECK-SD-NEXT:    st4.8b { v0, v1, v2, v3 }, [x0], #32
8637; CHECK-SD-NEXT:    ret
8638;
8639; CHECK-GI-LABEL: test_v8i8_post_imm_st4:
8640; CHECK-GI:       ; %bb.0:
8641; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8642; CHECK-GI-NEXT:    mov x8, x0
8643; CHECK-GI-NEXT:    add x0, x0, #32
8644; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8645; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8646; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8647; CHECK-GI-NEXT:    st4.8b { v0, v1, v2, v3 }, [x8]
8648; CHECK-GI-NEXT:    ret
8649  call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
8650  %tmp = getelementptr i8, ptr %A, i32 32
8651  ret ptr %tmp
8652}
8653
8654define ptr @test_v8i8_post_reg_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
8655; CHECK-SD-LABEL: test_v8i8_post_reg_st4:
8656; CHECK-SD:       ; %bb.0:
8657; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8658; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8659; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8660; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8661; CHECK-SD-NEXT:    st4.8b { v0, v1, v2, v3 }, [x0], x2
8662; CHECK-SD-NEXT:    ret
8663;
8664; CHECK-GI-LABEL: test_v8i8_post_reg_st4:
8665; CHECK-GI:       ; %bb.0:
8666; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8667; CHECK-GI-NEXT:    mov x8, x0
8668; CHECK-GI-NEXT:    add x0, x0, x2
8669; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8670; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8671; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8672; CHECK-GI-NEXT:    st4.8b { v0, v1, v2, v3 }, [x8]
8673; CHECK-GI-NEXT:    ret
8674  call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
8675  %tmp = getelementptr i8, ptr %A, i64 %inc
8676  ret ptr %tmp
8677}
8678
8679declare void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, ptr)
8680
8681
8682define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
8683; CHECK-SD-LABEL: test_v8i16_post_imm_st4:
8684; CHECK-SD:       ; %bb.0:
8685; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8686; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8687; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8688; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8689; CHECK-SD-NEXT:    st4.8h { v0, v1, v2, v3 }, [x0], #64
8690; CHECK-SD-NEXT:    ret
8691;
8692; CHECK-GI-LABEL: test_v8i16_post_imm_st4:
8693; CHECK-GI:       ; %bb.0:
8694; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8695; CHECK-GI-NEXT:    mov x8, x0
8696; CHECK-GI-NEXT:    add x0, x0, #64
8697; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8698; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8699; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8700; CHECK-GI-NEXT:    st4.8h { v0, v1, v2, v3 }, [x8]
8701; CHECK-GI-NEXT:    ret
8702  call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
8703  %tmp = getelementptr i16, ptr %A, i32 32
8704  ret ptr %tmp
8705}
8706
8707define ptr @test_v8i16_post_reg_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
8708; CHECK-SD-LABEL: test_v8i16_post_reg_st4:
8709; CHECK-SD:       ; %bb.0:
8710; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8711; CHECK-SD-NEXT:    lsl x8, x2, #1
8712; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8713; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8714; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8715; CHECK-SD-NEXT:    st4.8h { v0, v1, v2, v3 }, [x0], x8
8716; CHECK-SD-NEXT:    ret
8717;
8718; CHECK-GI-LABEL: test_v8i16_post_reg_st4:
8719; CHECK-GI:       ; %bb.0:
8720; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8721; CHECK-GI-NEXT:    mov x8, x0
8722; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
8723; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8724; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8725; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8726; CHECK-GI-NEXT:    st4.8h { v0, v1, v2, v3 }, [x8]
8727; CHECK-GI-NEXT:    ret
8728  call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
8729  %tmp = getelementptr i16, ptr %A, i64 %inc
8730  ret ptr %tmp
8731}
8732
8733declare void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, ptr)
8734
8735
8736define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
8737; CHECK-SD-LABEL: test_v4i16_post_imm_st4:
8738; CHECK-SD:       ; %bb.0:
8739; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8740; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8741; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8742; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8743; CHECK-SD-NEXT:    st4.4h { v0, v1, v2, v3 }, [x0], #32
8744; CHECK-SD-NEXT:    ret
8745;
8746; CHECK-GI-LABEL: test_v4i16_post_imm_st4:
8747; CHECK-GI:       ; %bb.0:
8748; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8749; CHECK-GI-NEXT:    mov x8, x0
8750; CHECK-GI-NEXT:    add x0, x0, #32
8751; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8752; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8753; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8754; CHECK-GI-NEXT:    st4.4h { v0, v1, v2, v3 }, [x8]
8755; CHECK-GI-NEXT:    ret
8756  call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
8757  %tmp = getelementptr i16, ptr %A, i32 16
8758  ret ptr %tmp
8759}
8760
8761define ptr @test_v4i16_post_reg_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
8762; CHECK-SD-LABEL: test_v4i16_post_reg_st4:
8763; CHECK-SD:       ; %bb.0:
8764; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8765; CHECK-SD-NEXT:    lsl x8, x2, #1
8766; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8767; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8768; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8769; CHECK-SD-NEXT:    st4.4h { v0, v1, v2, v3 }, [x0], x8
8770; CHECK-SD-NEXT:    ret
8771;
8772; CHECK-GI-LABEL: test_v4i16_post_reg_st4:
8773; CHECK-GI:       ; %bb.0:
8774; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8775; CHECK-GI-NEXT:    mov x8, x0
8776; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
8777; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8778; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8779; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8780; CHECK-GI-NEXT:    st4.4h { v0, v1, v2, v3 }, [x8]
8781; CHECK-GI-NEXT:    ret
8782  call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
8783  %tmp = getelementptr i16, ptr %A, i64 %inc
8784  ret ptr %tmp
8785}
8786
8787declare void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>,  ptr)
8788
8789
8790define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
8791; CHECK-SD-LABEL: test_v4i32_post_imm_st4:
8792; CHECK-SD:       ; %bb.0:
8793; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8794; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8795; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8796; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8797; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], #64
8798; CHECK-SD-NEXT:    ret
8799;
8800; CHECK-GI-LABEL: test_v4i32_post_imm_st4:
8801; CHECK-GI:       ; %bb.0:
8802; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8803; CHECK-GI-NEXT:    mov x8, x0
8804; CHECK-GI-NEXT:    add x0, x0, #64
8805; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8806; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8807; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8808; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
8809; CHECK-GI-NEXT:    ret
8810  call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
8811  %tmp = getelementptr i32, ptr %A, i32 16
8812  ret ptr %tmp
8813}
8814
8815define ptr @test_v4i32_post_reg_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
8816; CHECK-SD-LABEL: test_v4i32_post_reg_st4:
8817; CHECK-SD:       ; %bb.0:
8818; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8819; CHECK-SD-NEXT:    lsl x8, x2, #2
8820; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8821; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8822; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8823; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], x8
8824; CHECK-SD-NEXT:    ret
8825;
8826; CHECK-GI-LABEL: test_v4i32_post_reg_st4:
8827; CHECK-GI:       ; %bb.0:
8828; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8829; CHECK-GI-NEXT:    mov x8, x0
8830; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
8831; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8832; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8833; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8834; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
8835; CHECK-GI-NEXT:    ret
8836  call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
8837  %tmp = getelementptr i32, ptr %A, i64 %inc
8838  ret ptr %tmp
8839}
8840
8841declare void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>,  ptr)
8842
8843
8844define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
8845; CHECK-SD-LABEL: test_v2i32_post_imm_st4:
8846; CHECK-SD:       ; %bb.0:
8847; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8848; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8849; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8850; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8851; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], #32
8852; CHECK-SD-NEXT:    ret
8853;
8854; CHECK-GI-LABEL: test_v2i32_post_imm_st4:
8855; CHECK-GI:       ; %bb.0:
8856; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8857; CHECK-GI-NEXT:    mov x8, x0
8858; CHECK-GI-NEXT:    add x0, x0, #32
8859; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8860; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8861; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8862; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
8863; CHECK-GI-NEXT:    ret
8864  call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
8865  %tmp = getelementptr i32, ptr %A, i32 8
8866  ret ptr %tmp
8867}
8868
8869define ptr @test_v2i32_post_reg_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
8870; CHECK-SD-LABEL: test_v2i32_post_reg_st4:
8871; CHECK-SD:       ; %bb.0:
8872; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8873; CHECK-SD-NEXT:    lsl x8, x2, #2
8874; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8875; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8876; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8877; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], x8
8878; CHECK-SD-NEXT:    ret
8879;
8880; CHECK-GI-LABEL: test_v2i32_post_reg_st4:
8881; CHECK-GI:       ; %bb.0:
8882; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8883; CHECK-GI-NEXT:    mov x8, x0
8884; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
8885; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8886; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8887; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8888; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
8889; CHECK-GI-NEXT:    ret
8890  call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
8891  %tmp = getelementptr i32, ptr %A, i64 %inc
8892  ret ptr %tmp
8893}
8894
8895declare void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, ptr)
8896
8897
8898define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
8899; CHECK-SD-LABEL: test_v2i64_post_imm_st4:
8900; CHECK-SD:       ; %bb.0:
8901; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8902; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8903; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8904; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8905; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], #64
8906; CHECK-SD-NEXT:    ret
8907;
8908; CHECK-GI-LABEL: test_v2i64_post_imm_st4:
8909; CHECK-GI:       ; %bb.0:
8910; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8911; CHECK-GI-NEXT:    mov x8, x0
8912; CHECK-GI-NEXT:    add x0, x0, #64
8913; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8914; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8915; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8916; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
8917; CHECK-GI-NEXT:    ret
8918  call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
8919  %tmp = getelementptr i64, ptr %A, i64 8
8920  ret ptr %tmp
8921}
8922
8923define ptr @test_v2i64_post_reg_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
8924; CHECK-SD-LABEL: test_v2i64_post_reg_st4:
8925; CHECK-SD:       ; %bb.0:
8926; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8927; CHECK-SD-NEXT:    lsl x8, x2, #3
8928; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8929; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8930; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8931; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], x8
8932; CHECK-SD-NEXT:    ret
8933;
8934; CHECK-GI-LABEL: test_v2i64_post_reg_st4:
8935; CHECK-GI:       ; %bb.0:
8936; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8937; CHECK-GI-NEXT:    mov x8, x0
8938; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
8939; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8940; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8941; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
8942; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
8943; CHECK-GI-NEXT:    ret
8944  call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
8945  %tmp = getelementptr i64, ptr %A, i64 %inc
8946  ret ptr %tmp
8947}
8948
8949declare void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>,  ptr)
8950
8951
8952define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
8953; CHECK-SD-LABEL: test_v1i64_post_imm_st4:
8954; CHECK-SD:       ; %bb.0:
8955; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8956; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8957; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8958; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8959; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
8960; CHECK-SD-NEXT:    ret
8961;
8962; CHECK-GI-LABEL: test_v1i64_post_imm_st4:
8963; CHECK-GI:       ; %bb.0:
8964; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8965; CHECK-GI-NEXT:    mov x8, x0
8966; CHECK-GI-NEXT:    add x0, x0, #32
8967; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8968; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8969; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8970; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
8971; CHECK-GI-NEXT:    ret
8972  call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
8973  %tmp = getelementptr i64, ptr %A, i64 4
8974  ret ptr %tmp
8975}
8976
8977define ptr @test_v1i64_post_reg_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
8978; CHECK-SD-LABEL: test_v1i64_post_reg_st4:
8979; CHECK-SD:       ; %bb.0:
8980; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8981; CHECK-SD-NEXT:    lsl x8, x2, #3
8982; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8983; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8984; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8985; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
8986; CHECK-SD-NEXT:    ret
8987;
8988; CHECK-GI-LABEL: test_v1i64_post_reg_st4:
8989; CHECK-GI:       ; %bb.0:
8990; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8991; CHECK-GI-NEXT:    mov x8, x0
8992; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
8993; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8994; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8995; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
8996; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
8997; CHECK-GI-NEXT:    ret
8998  call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
8999  %tmp = getelementptr i64, ptr %A, i64 %inc
9000  ret ptr %tmp
9001}
9002
9003declare void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>,  ptr)
9004
9005
9006define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
9007; CHECK-SD-LABEL: test_v4f32_post_imm_st4:
9008; CHECK-SD:       ; %bb.0:
9009; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9010; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9011; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9012; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9013; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], #64
9014; CHECK-SD-NEXT:    ret
9015;
9016; CHECK-GI-LABEL: test_v4f32_post_imm_st4:
9017; CHECK-GI:       ; %bb.0:
9018; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9019; CHECK-GI-NEXT:    mov x8, x0
9020; CHECK-GI-NEXT:    add x0, x0, #64
9021; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9022; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9023; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9024; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
9025; CHECK-GI-NEXT:    ret
9026  call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
9027  %tmp = getelementptr float, ptr %A, i32 16
9028  ret ptr %tmp
9029}
9030
9031define ptr @test_v4f32_post_reg_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
9032; CHECK-SD-LABEL: test_v4f32_post_reg_st4:
9033; CHECK-SD:       ; %bb.0:
9034; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9035; CHECK-SD-NEXT:    lsl x8, x2, #2
9036; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9037; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9038; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9039; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], x8
9040; CHECK-SD-NEXT:    ret
9041;
9042; CHECK-GI-LABEL: test_v4f32_post_reg_st4:
9043; CHECK-GI:       ; %bb.0:
9044; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9045; CHECK-GI-NEXT:    mov x8, x0
9046; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
9047; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9048; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9049; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9050; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
9051; CHECK-GI-NEXT:    ret
9052  call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
9053  %tmp = getelementptr float, ptr %A, i64 %inc
9054  ret ptr %tmp
9055}
9056
9057declare void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, <4 x float>, ptr)
9058
9059
9060define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
9061; CHECK-SD-LABEL: test_v2f32_post_imm_st4:
9062; CHECK-SD:       ; %bb.0:
9063; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9064; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9065; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9066; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9067; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], #32
9068; CHECK-SD-NEXT:    ret
9069;
9070; CHECK-GI-LABEL: test_v2f32_post_imm_st4:
9071; CHECK-GI:       ; %bb.0:
9072; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9073; CHECK-GI-NEXT:    mov x8, x0
9074; CHECK-GI-NEXT:    add x0, x0, #32
9075; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9076; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9077; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9078; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
9079; CHECK-GI-NEXT:    ret
9080  call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
9081  %tmp = getelementptr float, ptr %A, i32 8
9082  ret ptr %tmp
9083}
9084
9085define ptr @test_v2f32_post_reg_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
9086; CHECK-SD-LABEL: test_v2f32_post_reg_st4:
9087; CHECK-SD:       ; %bb.0:
9088; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9089; CHECK-SD-NEXT:    lsl x8, x2, #2
9090; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9091; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9092; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9093; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], x8
9094; CHECK-SD-NEXT:    ret
9095;
9096; CHECK-GI-LABEL: test_v2f32_post_reg_st4:
9097; CHECK-GI:       ; %bb.0:
9098; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9099; CHECK-GI-NEXT:    mov x8, x0
9100; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
9101; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9102; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9103; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9104; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
9105; CHECK-GI-NEXT:    ret
9106  call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
9107  %tmp = getelementptr float, ptr %A, i64 %inc
9108  ret ptr %tmp
9109}
9110
9111declare void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, <2 x float>, ptr)
9112
9113
9114define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
9115; CHECK-SD-LABEL: test_v2f64_post_imm_st4:
9116; CHECK-SD:       ; %bb.0:
9117; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9118; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9119; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9120; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9121; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], #64
9122; CHECK-SD-NEXT:    ret
9123;
9124; CHECK-GI-LABEL: test_v2f64_post_imm_st4:
9125; CHECK-GI:       ; %bb.0:
9126; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9127; CHECK-GI-NEXT:    mov x8, x0
9128; CHECK-GI-NEXT:    add x0, x0, #64
9129; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9130; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9131; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9132; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
9133; CHECK-GI-NEXT:    ret
9134  call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
9135  %tmp = getelementptr double, ptr %A, i64 8
9136  ret ptr %tmp
9137}
9138
9139define ptr @test_v2f64_post_reg_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
9140; CHECK-SD-LABEL: test_v2f64_post_reg_st4:
9141; CHECK-SD:       ; %bb.0:
9142; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9143; CHECK-SD-NEXT:    lsl x8, x2, #3
9144; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9145; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9146; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9147; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], x8
9148; CHECK-SD-NEXT:    ret
9149;
9150; CHECK-GI-LABEL: test_v2f64_post_reg_st4:
9151; CHECK-GI:       ; %bb.0:
9152; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9153; CHECK-GI-NEXT:    mov x8, x0
9154; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
9155; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9156; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9157; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
9158; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
9159; CHECK-GI-NEXT:    ret
9160  call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
9161  %tmp = getelementptr double, ptr %A, i64 %inc
9162  ret ptr %tmp
9163}
9164
9165declare void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double>, <2 x double>, <2 x double>,<2 x double>,  ptr)
9166
9167
9168define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
9169; CHECK-SD-LABEL: test_v1f64_post_imm_st4:
9170; CHECK-SD:       ; %bb.0:
9171; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9172; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9173; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9174; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9175; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
9176; CHECK-SD-NEXT:    ret
9177;
9178; CHECK-GI-LABEL: test_v1f64_post_imm_st4:
9179; CHECK-GI:       ; %bb.0:
9180; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9181; CHECK-GI-NEXT:    mov x8, x0
9182; CHECK-GI-NEXT:    add x0, x0, #32
9183; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9184; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9185; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9186; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
9187; CHECK-GI-NEXT:    ret
9188  call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
9189  %tmp = getelementptr double, ptr %A, i64 4
9190  ret ptr %tmp
9191}
9192
9193define ptr @test_v1f64_post_reg_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
9194; CHECK-SD-LABEL: test_v1f64_post_reg_st4:
9195; CHECK-SD:       ; %bb.0:
9196; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9197; CHECK-SD-NEXT:    lsl x8, x2, #3
9198; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9199; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9200; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9201; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
9202; CHECK-SD-NEXT:    ret
9203;
9204; CHECK-GI-LABEL: test_v1f64_post_reg_st4:
9205; CHECK-GI:       ; %bb.0:
9206; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9207; CHECK-GI-NEXT:    mov x8, x0
9208; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
9209; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9210; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9211; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
9212; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
9213; CHECK-GI-NEXT:    ret
9214  call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
9215  %tmp = getelementptr double, ptr %A, i64 %inc
9216  ret ptr %tmp
9217}
9218
9219declare void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, <1 x double>, ptr)
9220
9221
9222define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
9223; CHECK-SD-LABEL: test_v16i8_post_imm_st1x2:
9224; CHECK-SD:       ; %bb.0:
9225; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9226; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9227; CHECK-SD-NEXT:    st1.16b { v0, v1 }, [x0], #32
9228; CHECK-SD-NEXT:    ret
9229;
9230; CHECK-GI-LABEL: test_v16i8_post_imm_st1x2:
9231; CHECK-GI:       ; %bb.0:
9232; CHECK-GI-NEXT:    mov x8, x0
9233; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9234; CHECK-GI-NEXT:    add x0, x0, #32
9235; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9236; CHECK-GI-NEXT:    st1.16b { v0, v1 }, [x8]
9237; CHECK-GI-NEXT:    ret
9238  call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
9239  %tmp = getelementptr i8, ptr %A, i32 32
9240  ret ptr %tmp
9241}
9242
9243define ptr @test_v16i8_post_reg_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
9244; CHECK-SD-LABEL: test_v16i8_post_reg_st1x2:
9245; CHECK-SD:       ; %bb.0:
9246; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9247; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9248; CHECK-SD-NEXT:    st1.16b { v0, v1 }, [x0], x2
9249; CHECK-SD-NEXT:    ret
9250;
9251; CHECK-GI-LABEL: test_v16i8_post_reg_st1x2:
9252; CHECK-GI:       ; %bb.0:
9253; CHECK-GI-NEXT:    mov x8, x0
9254; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9255; CHECK-GI-NEXT:    add x0, x0, x2
9256; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9257; CHECK-GI-NEXT:    st1.16b { v0, v1 }, [x8]
9258; CHECK-GI-NEXT:    ret
9259  call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
9260  %tmp = getelementptr i8, ptr %A, i64 %inc
9261  ret ptr %tmp
9262}
9263
9264declare void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8>, <16 x i8>, ptr)
9265
9266
9267define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
9268; CHECK-SD-LABEL: test_v8i8_post_imm_st1x2:
9269; CHECK-SD:       ; %bb.0:
9270; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9271; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9272; CHECK-SD-NEXT:    st1.8b { v0, v1 }, [x0], #16
9273; CHECK-SD-NEXT:    ret
9274;
9275; CHECK-GI-LABEL: test_v8i8_post_imm_st1x2:
9276; CHECK-GI:       ; %bb.0:
9277; CHECK-GI-NEXT:    mov x8, x0
9278; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9279; CHECK-GI-NEXT:    add x0, x0, #16
9280; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9281; CHECK-GI-NEXT:    st1.8b { v0, v1 }, [x8]
9282; CHECK-GI-NEXT:    ret
9283  call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
9284  %tmp = getelementptr i8, ptr %A, i32 16
9285  ret ptr %tmp
9286}
9287
9288define ptr @test_v8i8_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
9289; CHECK-SD-LABEL: test_v8i8_post_reg_st1x2:
9290; CHECK-SD:       ; %bb.0:
9291; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9292; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9293; CHECK-SD-NEXT:    st1.8b { v0, v1 }, [x0], x2
9294; CHECK-SD-NEXT:    ret
9295;
9296; CHECK-GI-LABEL: test_v8i8_post_reg_st1x2:
9297; CHECK-GI:       ; %bb.0:
9298; CHECK-GI-NEXT:    mov x8, x0
9299; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9300; CHECK-GI-NEXT:    add x0, x0, x2
9301; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9302; CHECK-GI-NEXT:    st1.8b { v0, v1 }, [x8]
9303; CHECK-GI-NEXT:    ret
9304  call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
9305  %tmp = getelementptr i8, ptr %A, i64 %inc
9306  ret ptr %tmp
9307}
9308
9309declare void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8>, <8 x i8>, ptr)
9310
9311
9312define ptr @test_v8i16_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
9313; CHECK-SD-LABEL: test_v8i16_post_imm_st1x2:
9314; CHECK-SD:       ; %bb.0:
9315; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9316; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9317; CHECK-SD-NEXT:    st1.8h { v0, v1 }, [x0], #32
9318; CHECK-SD-NEXT:    ret
9319;
9320; CHECK-GI-LABEL: test_v8i16_post_imm_st1x2:
9321; CHECK-GI:       ; %bb.0:
9322; CHECK-GI-NEXT:    mov x8, x0
9323; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9324; CHECK-GI-NEXT:    add x0, x0, #32
9325; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9326; CHECK-GI-NEXT:    st1.8h { v0, v1 }, [x8]
9327; CHECK-GI-NEXT:    ret
9328  call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
9329  %tmp = getelementptr i16, ptr %A, i32 16
9330  ret ptr %tmp
9331}
9332
9333define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
9334; CHECK-SD-LABEL: test_v8i16_post_reg_st1x2:
9335; CHECK-SD:       ; %bb.0:
9336; CHECK-SD-NEXT:    lsl x8, x2, #1
9337; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9338; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9339; CHECK-SD-NEXT:    st1.8h { v0, v1 }, [x0], x8
9340; CHECK-SD-NEXT:    ret
9341;
9342; CHECK-GI-LABEL: test_v8i16_post_reg_st1x2:
9343; CHECK-GI:       ; %bb.0:
9344; CHECK-GI-NEXT:    mov x8, x0
9345; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
9346; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9347; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9348; CHECK-GI-NEXT:    st1.8h { v0, v1 }, [x8]
9349; CHECK-GI-NEXT:    ret
9350  call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
9351  %tmp = getelementptr i16, ptr %A, i64 %inc
9352  ret ptr %tmp
9353}
9354
9355declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr)
9356
9357
9358define ptr @test_v4i16_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
9359; CHECK-SD-LABEL: test_v4i16_post_imm_st1x2:
9360; CHECK-SD:       ; %bb.0:
9361; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9362; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9363; CHECK-SD-NEXT:    st1.4h { v0, v1 }, [x0], #16
9364; CHECK-SD-NEXT:    ret
9365;
9366; CHECK-GI-LABEL: test_v4i16_post_imm_st1x2:
9367; CHECK-GI:       ; %bb.0:
9368; CHECK-GI-NEXT:    mov x8, x0
9369; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9370; CHECK-GI-NEXT:    add x0, x0, #16
9371; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9372; CHECK-GI-NEXT:    st1.4h { v0, v1 }, [x8]
9373; CHECK-GI-NEXT:    ret
9374  call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
9375  %tmp = getelementptr i16, ptr %A, i32 8
9376  ret ptr %tmp
9377}
9378
9379define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
9380; CHECK-SD-LABEL: test_v4i16_post_reg_st1x2:
9381; CHECK-SD:       ; %bb.0:
9382; CHECK-SD-NEXT:    lsl x8, x2, #1
9383; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9384; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9385; CHECK-SD-NEXT:    st1.4h { v0, v1 }, [x0], x8
9386; CHECK-SD-NEXT:    ret
9387;
9388; CHECK-GI-LABEL: test_v4i16_post_reg_st1x2:
9389; CHECK-GI:       ; %bb.0:
9390; CHECK-GI-NEXT:    mov x8, x0
9391; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
9392; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9393; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9394; CHECK-GI-NEXT:    st1.4h { v0, v1 }, [x8]
9395; CHECK-GI-NEXT:    ret
9396  call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
9397  %tmp = getelementptr i16, ptr %A, i64 %inc
9398  ret ptr %tmp
9399}
9400
9401declare void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16>, <4 x i16>, ptr)
9402
9403
9404define ptr @test_v4i32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
9405; CHECK-SD-LABEL: test_v4i32_post_imm_st1x2:
9406; CHECK-SD:       ; %bb.0:
9407; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9408; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9409; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], #32
9410; CHECK-SD-NEXT:    ret
9411;
9412; CHECK-GI-LABEL: test_v4i32_post_imm_st1x2:
9413; CHECK-GI:       ; %bb.0:
9414; CHECK-GI-NEXT:    mov x8, x0
9415; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9416; CHECK-GI-NEXT:    add x0, x0, #32
9417; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9418; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
9419; CHECK-GI-NEXT:    ret
9420  call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
9421  %tmp = getelementptr i32, ptr %A, i32 8
9422  ret ptr %tmp
9423}
9424
9425define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
9426; CHECK-SD-LABEL: test_v4i32_post_reg_st1x2:
9427; CHECK-SD:       ; %bb.0:
9428; CHECK-SD-NEXT:    lsl x8, x2, #2
9429; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9430; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9431; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], x8
9432; CHECK-SD-NEXT:    ret
9433;
9434; CHECK-GI-LABEL: test_v4i32_post_reg_st1x2:
9435; CHECK-GI:       ; %bb.0:
9436; CHECK-GI-NEXT:    mov x8, x0
9437; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
9438; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9439; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9440; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
9441; CHECK-GI-NEXT:    ret
9442  call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
9443  %tmp = getelementptr i32, ptr %A, i64 %inc
9444  ret ptr %tmp
9445}
9446
9447declare void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32>, <4 x i32>, ptr)
9448
9449
9450define ptr @test_v2i32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
9451; CHECK-SD-LABEL: test_v2i32_post_imm_st1x2:
9452; CHECK-SD:       ; %bb.0:
9453; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9454; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9455; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], #16
9456; CHECK-SD-NEXT:    ret
9457;
9458; CHECK-GI-LABEL: test_v2i32_post_imm_st1x2:
9459; CHECK-GI:       ; %bb.0:
9460; CHECK-GI-NEXT:    mov x8, x0
9461; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9462; CHECK-GI-NEXT:    add x0, x0, #16
9463; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9464; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
9465; CHECK-GI-NEXT:    ret
9466  call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
9467  %tmp = getelementptr i32, ptr %A, i32 4
9468  ret ptr %tmp
9469}
9470
9471define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
9472; CHECK-SD-LABEL: test_v2i32_post_reg_st1x2:
9473; CHECK-SD:       ; %bb.0:
9474; CHECK-SD-NEXT:    lsl x8, x2, #2
9475; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9476; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9477; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], x8
9478; CHECK-SD-NEXT:    ret
9479;
9480; CHECK-GI-LABEL: test_v2i32_post_reg_st1x2:
9481; CHECK-GI:       ; %bb.0:
9482; CHECK-GI-NEXT:    mov x8, x0
9483; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
9484; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9485; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9486; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
9487; CHECK-GI-NEXT:    ret
9488  call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
9489  %tmp = getelementptr i32, ptr %A, i64 %inc
9490  ret ptr %tmp
9491}
9492
9493declare void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32>, <2 x i32>, ptr)
9494
9495
9496define ptr @test_v2i64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
9497; CHECK-SD-LABEL: test_v2i64_post_imm_st1x2:
9498; CHECK-SD:       ; %bb.0:
9499; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9500; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9501; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], #32
9502; CHECK-SD-NEXT:    ret
9503;
9504; CHECK-GI-LABEL: test_v2i64_post_imm_st1x2:
9505; CHECK-GI:       ; %bb.0:
9506; CHECK-GI-NEXT:    mov x8, x0
9507; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9508; CHECK-GI-NEXT:    add x0, x0, #32
9509; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9510; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
9511; CHECK-GI-NEXT:    ret
9512  call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
9513  %tmp = getelementptr i64, ptr %A, i64 4
9514  ret ptr %tmp
9515}
9516
9517define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
9518; CHECK-SD-LABEL: test_v2i64_post_reg_st1x2:
9519; CHECK-SD:       ; %bb.0:
9520; CHECK-SD-NEXT:    lsl x8, x2, #3
9521; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9522; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9523; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], x8
9524; CHECK-SD-NEXT:    ret
9525;
9526; CHECK-GI-LABEL: test_v2i64_post_reg_st1x2:
9527; CHECK-GI:       ; %bb.0:
9528; CHECK-GI-NEXT:    mov x8, x0
9529; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
9530; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9531; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9532; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
9533; CHECK-GI-NEXT:    ret
9534  call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
9535  %tmp = getelementptr i64, ptr %A, i64 %inc
9536  ret ptr %tmp
9537}
9538
9539declare void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64>, <2 x i64>, ptr)
9540
9541
9542define ptr @test_v1i64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
9543; CHECK-SD-LABEL: test_v1i64_post_imm_st1x2:
9544; CHECK-SD:       ; %bb.0:
9545; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9546; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9547; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
9548; CHECK-SD-NEXT:    ret
9549;
9550; CHECK-GI-LABEL: test_v1i64_post_imm_st1x2:
9551; CHECK-GI:       ; %bb.0:
9552; CHECK-GI-NEXT:    mov x8, x0
9553; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9554; CHECK-GI-NEXT:    add x0, x0, #16
9555; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9556; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
9557; CHECK-GI-NEXT:    ret
9558  call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
9559  %tmp = getelementptr i64, ptr %A, i64 2
9560  ret ptr %tmp
9561}
9562
9563define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
9564; CHECK-SD-LABEL: test_v1i64_post_reg_st1x2:
9565; CHECK-SD:       ; %bb.0:
9566; CHECK-SD-NEXT:    lsl x8, x2, #3
9567; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9568; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9569; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
9570; CHECK-SD-NEXT:    ret
9571;
9572; CHECK-GI-LABEL: test_v1i64_post_reg_st1x2:
9573; CHECK-GI:       ; %bb.0:
9574; CHECK-GI-NEXT:    mov x8, x0
9575; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
9576; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9577; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9578; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
9579; CHECK-GI-NEXT:    ret
9580  call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
9581  %tmp = getelementptr i64, ptr %A, i64 %inc
9582  ret ptr %tmp
9583}
9584
9585declare void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64>, <1 x i64>, ptr)
9586
9587
9588define ptr @test_v4f32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
9589; CHECK-SD-LABEL: test_v4f32_post_imm_st1x2:
9590; CHECK-SD:       ; %bb.0:
9591; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9592; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9593; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], #32
9594; CHECK-SD-NEXT:    ret
9595;
9596; CHECK-GI-LABEL: test_v4f32_post_imm_st1x2:
9597; CHECK-GI:       ; %bb.0:
9598; CHECK-GI-NEXT:    mov x8, x0
9599; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9600; CHECK-GI-NEXT:    add x0, x0, #32
9601; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9602; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
9603; CHECK-GI-NEXT:    ret
9604  call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
9605  %tmp = getelementptr float, ptr %A, i32 8
9606  ret ptr %tmp
9607}
9608
9609define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
9610; CHECK-SD-LABEL: test_v4f32_post_reg_st1x2:
9611; CHECK-SD:       ; %bb.0:
9612; CHECK-SD-NEXT:    lsl x8, x2, #2
9613; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9614; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9615; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], x8
9616; CHECK-SD-NEXT:    ret
9617;
9618; CHECK-GI-LABEL: test_v4f32_post_reg_st1x2:
9619; CHECK-GI:       ; %bb.0:
9620; CHECK-GI-NEXT:    mov x8, x0
9621; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
9622; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9623; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9624; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
9625; CHECK-GI-NEXT:    ret
9626  call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
9627  %tmp = getelementptr float, ptr %A, i64 %inc
9628  ret ptr %tmp
9629}
9630
9631declare void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float>, <4 x float>, ptr)
9632
9633
9634define ptr @test_v2f32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
9635; CHECK-SD-LABEL: test_v2f32_post_imm_st1x2:
9636; CHECK-SD:       ; %bb.0:
9637; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9638; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9639; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], #16
9640; CHECK-SD-NEXT:    ret
9641;
9642; CHECK-GI-LABEL: test_v2f32_post_imm_st1x2:
9643; CHECK-GI:       ; %bb.0:
9644; CHECK-GI-NEXT:    mov x8, x0
9645; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9646; CHECK-GI-NEXT:    add x0, x0, #16
9647; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9648; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
9649; CHECK-GI-NEXT:    ret
9650  call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
9651  %tmp = getelementptr float, ptr %A, i32 4
9652  ret ptr %tmp
9653}
9654
9655define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
9656; CHECK-SD-LABEL: test_v2f32_post_reg_st1x2:
9657; CHECK-SD:       ; %bb.0:
9658; CHECK-SD-NEXT:    lsl x8, x2, #2
9659; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9660; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9661; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], x8
9662; CHECK-SD-NEXT:    ret
9663;
9664; CHECK-GI-LABEL: test_v2f32_post_reg_st1x2:
9665; CHECK-GI:       ; %bb.0:
9666; CHECK-GI-NEXT:    mov x8, x0
9667; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
9668; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9669; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9670; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
9671; CHECK-GI-NEXT:    ret
9672  call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
9673  %tmp = getelementptr float, ptr %A, i64 %inc
9674  ret ptr %tmp
9675}
9676
9677declare void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float>, <2 x float>, ptr)
9678
9679
9680define ptr @test_v2f64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
9681; CHECK-SD-LABEL: test_v2f64_post_imm_st1x2:
9682; CHECK-SD:       ; %bb.0:
9683; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9684; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9685; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], #32
9686; CHECK-SD-NEXT:    ret
9687;
9688; CHECK-GI-LABEL: test_v2f64_post_imm_st1x2:
9689; CHECK-GI:       ; %bb.0:
9690; CHECK-GI-NEXT:    mov x8, x0
9691; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9692; CHECK-GI-NEXT:    add x0, x0, #32
9693; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9694; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
9695; CHECK-GI-NEXT:    ret
9696  call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
9697  %tmp = getelementptr double, ptr %A, i64 4
9698  ret ptr %tmp
9699}
9700
9701define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
9702; CHECK-SD-LABEL: test_v2f64_post_reg_st1x2:
9703; CHECK-SD:       ; %bb.0:
9704; CHECK-SD-NEXT:    lsl x8, x2, #3
9705; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9706; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9707; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], x8
9708; CHECK-SD-NEXT:    ret
9709;
9710; CHECK-GI-LABEL: test_v2f64_post_reg_st1x2:
9711; CHECK-GI:       ; %bb.0:
9712; CHECK-GI-NEXT:    mov x8, x0
9713; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
9714; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
9715; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
9716; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
9717; CHECK-GI-NEXT:    ret
9718  call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
9719  %tmp = getelementptr double, ptr %A, i64 %inc
9720  ret ptr %tmp
9721}
9722
9723declare void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double>, <2 x double>, ptr)
9724
9725
9726define ptr @test_v1f64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
9727; CHECK-SD-LABEL: test_v1f64_post_imm_st1x2:
9728; CHECK-SD:       ; %bb.0:
9729; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9730; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9731; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
9732; CHECK-SD-NEXT:    ret
9733;
9734; CHECK-GI-LABEL: test_v1f64_post_imm_st1x2:
9735; CHECK-GI:       ; %bb.0:
9736; CHECK-GI-NEXT:    mov x8, x0
9737; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9738; CHECK-GI-NEXT:    add x0, x0, #16
9739; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9740; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
9741; CHECK-GI-NEXT:    ret
9742  call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
9743  %tmp = getelementptr double, ptr %A, i64 2
9744  ret ptr %tmp
9745}
9746
9747define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
9748; CHECK-SD-LABEL: test_v1f64_post_reg_st1x2:
9749; CHECK-SD:       ; %bb.0:
9750; CHECK-SD-NEXT:    lsl x8, x2, #3
9751; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9752; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9753; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
9754; CHECK-SD-NEXT:    ret
9755;
9756; CHECK-GI-LABEL: test_v1f64_post_reg_st1x2:
9757; CHECK-GI:       ; %bb.0:
9758; CHECK-GI-NEXT:    mov x8, x0
9759; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
9760; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
9761; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
9762; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
9763; CHECK-GI-NEXT:    ret
9764  call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
9765  %tmp = getelementptr double, ptr %A, i64 %inc
9766  ret ptr %tmp
9767}
9768
9769declare void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double>, <1 x double>, ptr)
9770
9771
9772define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
9773; CHECK-SD-LABEL: test_v16i8_post_imm_st1x3:
9774; CHECK-SD:       ; %bb.0:
9775; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
9776; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
9777; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
9778; CHECK-SD-NEXT:    st1.16b { v0, v1, v2 }, [x0], #48
9779; CHECK-SD-NEXT:    ret
9780;
9781; CHECK-GI-LABEL: test_v16i8_post_imm_st1x3:
9782; CHECK-GI:       ; %bb.0:
9783; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
9784; CHECK-GI-NEXT:    mov x8, x0
9785; CHECK-GI-NEXT:    add x0, x0, #48
9786; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
9787; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
9788; CHECK-GI-NEXT:    st1.16b { v0, v1, v2 }, [x8]
9789; CHECK-GI-NEXT:    ret
9790  call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
9791  %tmp = getelementptr i8, ptr %A, i32 48
9792  ret ptr %tmp
9793}
9794
9795define ptr @test_v16i8_post_reg_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
9796; CHECK-SD-LABEL: test_v16i8_post_reg_st1x3:
9797; CHECK-SD:       ; %bb.0:
9798; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
9799; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
9800; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
9801; CHECK-SD-NEXT:    st1.16b { v0, v1, v2 }, [x0], x2
9802; CHECK-SD-NEXT:    ret
9803;
9804; CHECK-GI-LABEL: test_v16i8_post_reg_st1x3:
9805; CHECK-GI:       ; %bb.0:
9806; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
9807; CHECK-GI-NEXT:    mov x8, x0
9808; CHECK-GI-NEXT:    add x0, x0, x2
9809; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
9810; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
9811; CHECK-GI-NEXT:    st1.16b { v0, v1, v2 }, [x8]
9812; CHECK-GI-NEXT:    ret
9813  call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
9814  %tmp = getelementptr i8, ptr %A, i64 %inc
9815  ret ptr %tmp
9816}
9817
9818declare void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, ptr)
9819
9820
9821define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
9822; CHECK-SD-LABEL: test_v8i8_post_imm_st1x3:
9823; CHECK-SD:       ; %bb.0:
9824; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
9825; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
9826; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
9827; CHECK-SD-NEXT:    st1.8b { v0, v1, v2 }, [x0], #24
9828; CHECK-SD-NEXT:    ret
9829;
9830; CHECK-GI-LABEL: test_v8i8_post_imm_st1x3:
9831; CHECK-GI:       ; %bb.0:
9832; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
9833; CHECK-GI-NEXT:    mov x8, x0
9834; CHECK-GI-NEXT:    add x0, x0, #24
9835; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
9836; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
9837; CHECK-GI-NEXT:    st1.8b { v0, v1, v2 }, [x8]
9838; CHECK-GI-NEXT:    ret
9839  call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
9840  %tmp = getelementptr i8, ptr %A, i32 24
9841  ret ptr %tmp
9842}
9843
9844define ptr @test_v8i8_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
9845; CHECK-SD-LABEL: test_v8i8_post_reg_st1x3:
9846; CHECK-SD:       ; %bb.0:
9847; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
9848; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
9849; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
9850; CHECK-SD-NEXT:    st1.8b { v0, v1, v2 }, [x0], x2
9851; CHECK-SD-NEXT:    ret
9852;
9853; CHECK-GI-LABEL: test_v8i8_post_reg_st1x3:
9854; CHECK-GI:       ; %bb.0:
9855; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
9856; CHECK-GI-NEXT:    mov x8, x0
9857; CHECK-GI-NEXT:    add x0, x0, x2
9858; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
9859; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
9860; CHECK-GI-NEXT:    st1.8b { v0, v1, v2 }, [x8]
9861; CHECK-GI-NEXT:    ret
9862  call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
9863  %tmp = getelementptr i8, ptr %A, i64 %inc
9864  ret ptr %tmp
9865}
9866
9867declare void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr)
9868
9869
9870define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
9871; CHECK-SD-LABEL: test_v8i16_post_imm_st1x3:
9872; CHECK-SD:       ; %bb.0:
9873; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
9874; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
9875; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
9876; CHECK-SD-NEXT:    st1.8h { v0, v1, v2 }, [x0], #48
9877; CHECK-SD-NEXT:    ret
9878;
9879; CHECK-GI-LABEL: test_v8i16_post_imm_st1x3:
9880; CHECK-GI:       ; %bb.0:
9881; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
9882; CHECK-GI-NEXT:    mov x8, x0
9883; CHECK-GI-NEXT:    add x0, x0, #48
9884; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
9885; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
9886; CHECK-GI-NEXT:    st1.8h { v0, v1, v2 }, [x8]
9887; CHECK-GI-NEXT:    ret
9888  call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
9889  %tmp = getelementptr i16, ptr %A, i32 24
9890  ret ptr %tmp
9891}
9892
9893define ptr @test_v8i16_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
9894; CHECK-SD-LABEL: test_v8i16_post_reg_st1x3:
9895; CHECK-SD:       ; %bb.0:
9896; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
9897; CHECK-SD-NEXT:    lsl x8, x2, #1
9898; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
9899; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
9900; CHECK-SD-NEXT:    st1.8h { v0, v1, v2 }, [x0], x8
9901; CHECK-SD-NEXT:    ret
9902;
9903; CHECK-GI-LABEL: test_v8i16_post_reg_st1x3:
9904; CHECK-GI:       ; %bb.0:
9905; CHECK-GI-NEXT:    mov x8, x0
9906; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
9907; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
9908; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
9909; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
9910; CHECK-GI-NEXT:    st1.8h { v0, v1, v2 }, [x8]
9911; CHECK-GI-NEXT:    ret
9912  call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
9913  %tmp = getelementptr i16, ptr %A, i64 %inc
9914  ret ptr %tmp
9915}
9916
9917declare void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, ptr)
9918
9919
9920define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
9921; CHECK-SD-LABEL: test_v4i16_post_imm_st1x3:
9922; CHECK-SD:       ; %bb.0:
9923; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
9924; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
9925; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
9926; CHECK-SD-NEXT:    st1.4h { v0, v1, v2 }, [x0], #24
9927; CHECK-SD-NEXT:    ret
9928;
9929; CHECK-GI-LABEL: test_v4i16_post_imm_st1x3:
9930; CHECK-GI:       ; %bb.0:
9931; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
9932; CHECK-GI-NEXT:    mov x8, x0
9933; CHECK-GI-NEXT:    add x0, x0, #24
9934; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
9935; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
9936; CHECK-GI-NEXT:    st1.4h { v0, v1, v2 }, [x8]
9937; CHECK-GI-NEXT:    ret
9938  call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
9939  %tmp = getelementptr i16, ptr %A, i32 12
9940  ret ptr %tmp
9941}
9942
9943define ptr @test_v4i16_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
9944; CHECK-SD-LABEL: test_v4i16_post_reg_st1x3:
9945; CHECK-SD:       ; %bb.0:
9946; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
9947; CHECK-SD-NEXT:    lsl x8, x2, #1
9948; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
9949; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
9950; CHECK-SD-NEXT:    st1.4h { v0, v1, v2 }, [x0], x8
9951; CHECK-SD-NEXT:    ret
9952;
9953; CHECK-GI-LABEL: test_v4i16_post_reg_st1x3:
9954; CHECK-GI:       ; %bb.0:
9955; CHECK-GI-NEXT:    mov x8, x0
9956; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
9957; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
9958; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
9959; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
9960; CHECK-GI-NEXT:    st1.4h { v0, v1, v2 }, [x8]
9961; CHECK-GI-NEXT:    ret
9962  call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
9963  %tmp = getelementptr i16, ptr %A, i64 %inc
9964  ret ptr %tmp
9965}
9966
9967declare void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, ptr)
9968
9969
9970define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
9971; CHECK-SD-LABEL: test_v4i32_post_imm_st1x3:
9972; CHECK-SD:       ; %bb.0:
9973; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
9974; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
9975; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
9976; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], #48
9977; CHECK-SD-NEXT:    ret
9978;
9979; CHECK-GI-LABEL: test_v4i32_post_imm_st1x3:
9980; CHECK-GI:       ; %bb.0:
9981; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
9982; CHECK-GI-NEXT:    mov x8, x0
9983; CHECK-GI-NEXT:    add x0, x0, #48
9984; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
9985; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
9986; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
9987; CHECK-GI-NEXT:    ret
9988  call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
9989  %tmp = getelementptr i32, ptr %A, i32 12
9990  ret ptr %tmp
9991}
9992
9993define ptr @test_v4i32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
9994; CHECK-SD-LABEL: test_v4i32_post_reg_st1x3:
9995; CHECK-SD:       ; %bb.0:
9996; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
9997; CHECK-SD-NEXT:    lsl x8, x2, #2
9998; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
9999; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
10000; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], x8
10001; CHECK-SD-NEXT:    ret
10002;
10003; CHECK-GI-LABEL: test_v4i32_post_reg_st1x3:
10004; CHECK-GI:       ; %bb.0:
10005; CHECK-GI-NEXT:    mov x8, x0
10006; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
10007; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
10008; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
10009; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
10010; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
10011; CHECK-GI-NEXT:    ret
10012  call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
10013  %tmp = getelementptr i32, ptr %A, i64 %inc
10014  ret ptr %tmp
10015}
10016
10017declare void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, ptr)
10018
10019
10020define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
10021; CHECK-SD-LABEL: test_v2i32_post_imm_st1x3:
10022; CHECK-SD:       ; %bb.0:
10023; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10024; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10025; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10026; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], #24
10027; CHECK-SD-NEXT:    ret
10028;
10029; CHECK-GI-LABEL: test_v2i32_post_imm_st1x3:
10030; CHECK-GI:       ; %bb.0:
10031; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10032; CHECK-GI-NEXT:    mov x8, x0
10033; CHECK-GI-NEXT:    add x0, x0, #24
10034; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10035; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10036; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
10037; CHECK-GI-NEXT:    ret
10038  call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
10039  %tmp = getelementptr i32, ptr %A, i32 6
10040  ret ptr %tmp
10041}
10042
10043define ptr @test_v2i32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
10044; CHECK-SD-LABEL: test_v2i32_post_reg_st1x3:
10045; CHECK-SD:       ; %bb.0:
10046; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10047; CHECK-SD-NEXT:    lsl x8, x2, #2
10048; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10049; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10050; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], x8
10051; CHECK-SD-NEXT:    ret
10052;
10053; CHECK-GI-LABEL: test_v2i32_post_reg_st1x3:
10054; CHECK-GI:       ; %bb.0:
10055; CHECK-GI-NEXT:    mov x8, x0
10056; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10057; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
10058; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10059; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10060; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
10061; CHECK-GI-NEXT:    ret
10062  call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
10063  %tmp = getelementptr i32, ptr %A, i64 %inc
10064  ret ptr %tmp
10065}
10066
10067declare void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, ptr)
10068
10069
10070define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
10071; CHECK-SD-LABEL: test_v2i64_post_imm_st1x3:
10072; CHECK-SD:       ; %bb.0:
10073; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
10074; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
10075; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
10076; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], #48
10077; CHECK-SD-NEXT:    ret
10078;
10079; CHECK-GI-LABEL: test_v2i64_post_imm_st1x3:
10080; CHECK-GI:       ; %bb.0:
10081; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
10082; CHECK-GI-NEXT:    mov x8, x0
10083; CHECK-GI-NEXT:    add x0, x0, #48
10084; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
10085; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
10086; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
10087; CHECK-GI-NEXT:    ret
10088  call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
10089  %tmp = getelementptr i64, ptr %A, i64 6
10090  ret ptr %tmp
10091}
10092
10093define ptr @test_v2i64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
10094; CHECK-SD-LABEL: test_v2i64_post_reg_st1x3:
10095; CHECK-SD:       ; %bb.0:
10096; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
10097; CHECK-SD-NEXT:    lsl x8, x2, #3
10098; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
10099; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
10100; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], x8
10101; CHECK-SD-NEXT:    ret
10102;
10103; CHECK-GI-LABEL: test_v2i64_post_reg_st1x3:
10104; CHECK-GI:       ; %bb.0:
10105; CHECK-GI-NEXT:    mov x8, x0
10106; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
10107; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
10108; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
10109; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
10110; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
10111; CHECK-GI-NEXT:    ret
10112  call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
10113  %tmp = getelementptr i64, ptr %A, i64 %inc
10114  ret ptr %tmp
10115}
10116
10117declare void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, ptr)
10118
10119
10120define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
10121; CHECK-SD-LABEL: test_v1i64_post_imm_st1x3:
10122; CHECK-SD:       ; %bb.0:
10123; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10124; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10125; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10126; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
10127; CHECK-SD-NEXT:    ret
10128;
10129; CHECK-GI-LABEL: test_v1i64_post_imm_st1x3:
10130; CHECK-GI:       ; %bb.0:
10131; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10132; CHECK-GI-NEXT:    mov x8, x0
10133; CHECK-GI-NEXT:    add x0, x0, #24
10134; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10135; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10136; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
10137; CHECK-GI-NEXT:    ret
10138  call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
10139  %tmp = getelementptr i64, ptr %A, i64 3
10140  ret ptr %tmp
10141}
10142
10143define ptr @test_v1i64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
10144; CHECK-SD-LABEL: test_v1i64_post_reg_st1x3:
10145; CHECK-SD:       ; %bb.0:
10146; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10147; CHECK-SD-NEXT:    lsl x8, x2, #3
10148; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10149; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10150; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
10151; CHECK-SD-NEXT:    ret
10152;
10153; CHECK-GI-LABEL: test_v1i64_post_reg_st1x3:
10154; CHECK-GI:       ; %bb.0:
10155; CHECK-GI-NEXT:    mov x8, x0
10156; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10157; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
10158; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10159; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10160; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
10161; CHECK-GI-NEXT:    ret
10162  call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
10163  %tmp = getelementptr i64, ptr %A, i64 %inc
10164  ret ptr %tmp
10165}
10166
10167declare void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, ptr)
10168
10169
10170define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
10171; CHECK-SD-LABEL: test_v4f32_post_imm_st1x3:
10172; CHECK-SD:       ; %bb.0:
10173; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
10174; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
10175; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
10176; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], #48
10177; CHECK-SD-NEXT:    ret
10178;
10179; CHECK-GI-LABEL: test_v4f32_post_imm_st1x3:
10180; CHECK-GI:       ; %bb.0:
10181; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
10182; CHECK-GI-NEXT:    mov x8, x0
10183; CHECK-GI-NEXT:    add x0, x0, #48
10184; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
10185; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
10186; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
10187; CHECK-GI-NEXT:    ret
10188  call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
10189  %tmp = getelementptr float, ptr %A, i32 12
10190  ret ptr %tmp
10191}
10192
10193define ptr @test_v4f32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
10194; CHECK-SD-LABEL: test_v4f32_post_reg_st1x3:
10195; CHECK-SD:       ; %bb.0:
10196; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
10197; CHECK-SD-NEXT:    lsl x8, x2, #2
10198; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
10199; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
10200; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], x8
10201; CHECK-SD-NEXT:    ret
10202;
10203; CHECK-GI-LABEL: test_v4f32_post_reg_st1x3:
10204; CHECK-GI:       ; %bb.0:
10205; CHECK-GI-NEXT:    mov x8, x0
10206; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
10207; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
10208; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
10209; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
10210; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
10211; CHECK-GI-NEXT:    ret
10212  call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
10213  %tmp = getelementptr float, ptr %A, i64 %inc
10214  ret ptr %tmp
10215}
10216
10217declare void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, ptr)
10218
10219
10220define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
10221; CHECK-SD-LABEL: test_v2f32_post_imm_st1x3:
10222; CHECK-SD:       ; %bb.0:
10223; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10224; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10225; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10226; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], #24
10227; CHECK-SD-NEXT:    ret
10228;
10229; CHECK-GI-LABEL: test_v2f32_post_imm_st1x3:
10230; CHECK-GI:       ; %bb.0:
10231; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10232; CHECK-GI-NEXT:    mov x8, x0
10233; CHECK-GI-NEXT:    add x0, x0, #24
10234; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10235; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10236; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
10237; CHECK-GI-NEXT:    ret
10238  call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
10239  %tmp = getelementptr float, ptr %A, i32 6
10240  ret ptr %tmp
10241}
10242
10243define ptr @test_v2f32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
10244; CHECK-SD-LABEL: test_v2f32_post_reg_st1x3:
10245; CHECK-SD:       ; %bb.0:
10246; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10247; CHECK-SD-NEXT:    lsl x8, x2, #2
10248; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10249; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10250; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], x8
10251; CHECK-SD-NEXT:    ret
10252;
10253; CHECK-GI-LABEL: test_v2f32_post_reg_st1x3:
10254; CHECK-GI:       ; %bb.0:
10255; CHECK-GI-NEXT:    mov x8, x0
10256; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10257; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
10258; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10259; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10260; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
10261; CHECK-GI-NEXT:    ret
10262  call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
10263  %tmp = getelementptr float, ptr %A, i64 %inc
10264  ret ptr %tmp
10265}
10266
10267declare void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, ptr)
10268
10269
10270define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
10271; CHECK-SD-LABEL: test_v2f64_post_imm_st1x3:
10272; CHECK-SD:       ; %bb.0:
10273; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
10274; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
10275; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
10276; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], #48
10277; CHECK-SD-NEXT:    ret
10278;
10279; CHECK-GI-LABEL: test_v2f64_post_imm_st1x3:
10280; CHECK-GI:       ; %bb.0:
10281; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
10282; CHECK-GI-NEXT:    mov x8, x0
10283; CHECK-GI-NEXT:    add x0, x0, #48
10284; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
10285; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
10286; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
10287; CHECK-GI-NEXT:    ret
10288  call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
10289  %tmp = getelementptr double, ptr %A, i64 6
10290  ret ptr %tmp
10291}
10292
10293define ptr @test_v2f64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
10294; CHECK-SD-LABEL: test_v2f64_post_reg_st1x3:
10295; CHECK-SD:       ; %bb.0:
10296; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
10297; CHECK-SD-NEXT:    lsl x8, x2, #3
10298; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
10299; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
10300; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], x8
10301; CHECK-SD-NEXT:    ret
10302;
10303; CHECK-GI-LABEL: test_v2f64_post_reg_st1x3:
10304; CHECK-GI:       ; %bb.0:
10305; CHECK-GI-NEXT:    mov x8, x0
10306; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
10307; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
10308; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
10309; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
10310; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
10311; CHECK-GI-NEXT:    ret
10312  call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
10313  %tmp = getelementptr double, ptr %A, i64 %inc
10314  ret ptr %tmp
10315}
10316
10317declare void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, ptr)
10318
10319
10320define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
10321; CHECK-SD-LABEL: test_v1f64_post_imm_st1x3:
10322; CHECK-SD:       ; %bb.0:
10323; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10324; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10325; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10326; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
10327; CHECK-SD-NEXT:    ret
10328;
10329; CHECK-GI-LABEL: test_v1f64_post_imm_st1x3:
10330; CHECK-GI:       ; %bb.0:
10331; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10332; CHECK-GI-NEXT:    mov x8, x0
10333; CHECK-GI-NEXT:    add x0, x0, #24
10334; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10335; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10336; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
10337; CHECK-GI-NEXT:    ret
10338  call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
10339  %tmp = getelementptr double, ptr %A, i64 3
10340  ret ptr %tmp
10341}
10342
10343define ptr @test_v1f64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
10344; CHECK-SD-LABEL: test_v1f64_post_reg_st1x3:
10345; CHECK-SD:       ; %bb.0:
10346; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10347; CHECK-SD-NEXT:    lsl x8, x2, #3
10348; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10349; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10350; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
10351; CHECK-SD-NEXT:    ret
10352;
10353; CHECK-GI-LABEL: test_v1f64_post_reg_st1x3:
10354; CHECK-GI:       ; %bb.0:
10355; CHECK-GI-NEXT:    mov x8, x0
10356; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
10357; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
10358; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
10359; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
10360; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
10361; CHECK-GI-NEXT:    ret
10362  call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
10363  %tmp = getelementptr double, ptr %A, i64 %inc
10364  ret ptr %tmp
10365}
10366
10367declare void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, ptr)
10368
10369
10370define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
10371; CHECK-SD-LABEL: test_v16i8_post_imm_st1x4:
10372; CHECK-SD:       ; %bb.0:
10373; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10374; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10375; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10376; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10377; CHECK-SD-NEXT:    st1.16b { v0, v1, v2, v3 }, [x0], #64
10378; CHECK-SD-NEXT:    ret
10379;
10380; CHECK-GI-LABEL: test_v16i8_post_imm_st1x4:
10381; CHECK-GI:       ; %bb.0:
10382; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10383; CHECK-GI-NEXT:    mov x8, x0
10384; CHECK-GI-NEXT:    add x0, x0, #64
10385; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10386; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10387; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10388; CHECK-GI-NEXT:    st1.16b { v0, v1, v2, v3 }, [x8]
10389; CHECK-GI-NEXT:    ret
10390  call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
10391  %tmp = getelementptr i8, ptr %A, i32 64
10392  ret ptr %tmp
10393}
10394
10395define ptr @test_v16i8_post_reg_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
10396; CHECK-SD-LABEL: test_v16i8_post_reg_st1x4:
10397; CHECK-SD:       ; %bb.0:
10398; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10399; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10400; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10401; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10402; CHECK-SD-NEXT:    st1.16b { v0, v1, v2, v3 }, [x0], x2
10403; CHECK-SD-NEXT:    ret
10404;
10405; CHECK-GI-LABEL: test_v16i8_post_reg_st1x4:
10406; CHECK-GI:       ; %bb.0:
10407; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10408; CHECK-GI-NEXT:    mov x8, x0
10409; CHECK-GI-NEXT:    add x0, x0, x2
10410; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10411; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10412; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10413; CHECK-GI-NEXT:    st1.16b { v0, v1, v2, v3 }, [x8]
10414; CHECK-GI-NEXT:    ret
10415  call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
10416  %tmp = getelementptr i8, ptr %A, i64 %inc
10417  ret ptr %tmp
10418}
10419
10420declare void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, ptr)
10421
10422
10423define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
10424; CHECK-SD-LABEL: test_v8i8_post_imm_st1x4:
10425; CHECK-SD:       ; %bb.0:
10426; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10427; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10428; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10429; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10430; CHECK-SD-NEXT:    st1.8b { v0, v1, v2, v3 }, [x0], #32
10431; CHECK-SD-NEXT:    ret
10432;
10433; CHECK-GI-LABEL: test_v8i8_post_imm_st1x4:
10434; CHECK-GI:       ; %bb.0:
10435; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10436; CHECK-GI-NEXT:    mov x8, x0
10437; CHECK-GI-NEXT:    add x0, x0, #32
10438; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10439; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10440; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10441; CHECK-GI-NEXT:    st1.8b { v0, v1, v2, v3 }, [x8]
10442; CHECK-GI-NEXT:    ret
10443  call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
10444  %tmp = getelementptr i8, ptr %A, i32 32
10445  ret ptr %tmp
10446}
10447
10448define ptr @test_v8i8_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
10449; CHECK-SD-LABEL: test_v8i8_post_reg_st1x4:
10450; CHECK-SD:       ; %bb.0:
10451; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10452; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10453; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10454; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10455; CHECK-SD-NEXT:    st1.8b { v0, v1, v2, v3 }, [x0], x2
10456; CHECK-SD-NEXT:    ret
10457;
10458; CHECK-GI-LABEL: test_v8i8_post_reg_st1x4:
10459; CHECK-GI:       ; %bb.0:
10460; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10461; CHECK-GI-NEXT:    mov x8, x0
10462; CHECK-GI-NEXT:    add x0, x0, x2
10463; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10464; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10465; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10466; CHECK-GI-NEXT:    st1.8b { v0, v1, v2, v3 }, [x8]
10467; CHECK-GI-NEXT:    ret
10468  call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
10469  %tmp = getelementptr i8, ptr %A, i64 %inc
10470  ret ptr %tmp
10471}
10472
10473declare void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, ptr)
10474
10475
10476define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
10477; CHECK-SD-LABEL: test_v8i16_post_imm_st1x4:
10478; CHECK-SD:       ; %bb.0:
10479; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10480; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10481; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10482; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10483; CHECK-SD-NEXT:    st1.8h { v0, v1, v2, v3 }, [x0], #64
10484; CHECK-SD-NEXT:    ret
10485;
10486; CHECK-GI-LABEL: test_v8i16_post_imm_st1x4:
10487; CHECK-GI:       ; %bb.0:
10488; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10489; CHECK-GI-NEXT:    mov x8, x0
10490; CHECK-GI-NEXT:    add x0, x0, #64
10491; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10492; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10493; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10494; CHECK-GI-NEXT:    st1.8h { v0, v1, v2, v3 }, [x8]
10495; CHECK-GI-NEXT:    ret
10496  call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
10497  %tmp = getelementptr i16, ptr %A, i32 32
10498  ret ptr %tmp
10499}
10500
10501define ptr @test_v8i16_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
10502; CHECK-SD-LABEL: test_v8i16_post_reg_st1x4:
10503; CHECK-SD:       ; %bb.0:
10504; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10505; CHECK-SD-NEXT:    lsl x8, x2, #1
10506; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10507; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10508; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10509; CHECK-SD-NEXT:    st1.8h { v0, v1, v2, v3 }, [x0], x8
10510; CHECK-SD-NEXT:    ret
10511;
10512; CHECK-GI-LABEL: test_v8i16_post_reg_st1x4:
10513; CHECK-GI:       ; %bb.0:
10514; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10515; CHECK-GI-NEXT:    mov x8, x0
10516; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
10517; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10518; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10519; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10520; CHECK-GI-NEXT:    st1.8h { v0, v1, v2, v3 }, [x8]
10521; CHECK-GI-NEXT:    ret
10522  call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
10523  %tmp = getelementptr i16, ptr %A, i64 %inc
10524  ret ptr %tmp
10525}
10526
10527declare void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, ptr)
10528
10529
10530define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
10531; CHECK-SD-LABEL: test_v4i16_post_imm_st1x4:
10532; CHECK-SD:       ; %bb.0:
10533; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10534; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10535; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10536; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10537; CHECK-SD-NEXT:    st1.4h { v0, v1, v2, v3 }, [x0], #32
10538; CHECK-SD-NEXT:    ret
10539;
10540; CHECK-GI-LABEL: test_v4i16_post_imm_st1x4:
10541; CHECK-GI:       ; %bb.0:
10542; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10543; CHECK-GI-NEXT:    mov x8, x0
10544; CHECK-GI-NEXT:    add x0, x0, #32
10545; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10546; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10547; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10548; CHECK-GI-NEXT:    st1.4h { v0, v1, v2, v3 }, [x8]
10549; CHECK-GI-NEXT:    ret
10550  call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
10551  %tmp = getelementptr i16, ptr %A, i32 16
10552  ret ptr %tmp
10553}
10554
10555define ptr @test_v4i16_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
10556; CHECK-SD-LABEL: test_v4i16_post_reg_st1x4:
10557; CHECK-SD:       ; %bb.0:
10558; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10559; CHECK-SD-NEXT:    lsl x8, x2, #1
10560; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10561; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10562; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10563; CHECK-SD-NEXT:    st1.4h { v0, v1, v2, v3 }, [x0], x8
10564; CHECK-SD-NEXT:    ret
10565;
10566; CHECK-GI-LABEL: test_v4i16_post_reg_st1x4:
10567; CHECK-GI:       ; %bb.0:
10568; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10569; CHECK-GI-NEXT:    mov x8, x0
10570; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
10571; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10572; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10573; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10574; CHECK-GI-NEXT:    st1.4h { v0, v1, v2, v3 }, [x8]
10575; CHECK-GI-NEXT:    ret
10576  call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
10577  %tmp = getelementptr i16, ptr %A, i64 %inc
10578  ret ptr %tmp
10579}
10580
10581declare void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>,  ptr)
10582
10583
10584define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
10585; CHECK-SD-LABEL: test_v4i32_post_imm_st1x4:
10586; CHECK-SD:       ; %bb.0:
10587; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10588; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10589; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10590; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10591; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], #64
10592; CHECK-SD-NEXT:    ret
10593;
10594; CHECK-GI-LABEL: test_v4i32_post_imm_st1x4:
10595; CHECK-GI:       ; %bb.0:
10596; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10597; CHECK-GI-NEXT:    mov x8, x0
10598; CHECK-GI-NEXT:    add x0, x0, #64
10599; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10600; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10601; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10602; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
10603; CHECK-GI-NEXT:    ret
10604  call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
10605  %tmp = getelementptr i32, ptr %A, i32 16
10606  ret ptr %tmp
10607}
10608
10609define ptr @test_v4i32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
10610; CHECK-SD-LABEL: test_v4i32_post_reg_st1x4:
10611; CHECK-SD:       ; %bb.0:
10612; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10613; CHECK-SD-NEXT:    lsl x8, x2, #2
10614; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10615; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10616; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10617; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], x8
10618; CHECK-SD-NEXT:    ret
10619;
10620; CHECK-GI-LABEL: test_v4i32_post_reg_st1x4:
10621; CHECK-GI:       ; %bb.0:
10622; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10623; CHECK-GI-NEXT:    mov x8, x0
10624; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
10625; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10626; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10627; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10628; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
10629; CHECK-GI-NEXT:    ret
10630  call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
10631  %tmp = getelementptr i32, ptr %A, i64 %inc
10632  ret ptr %tmp
10633}
10634
10635declare void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>,  ptr)
10636
10637
10638define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
10639; CHECK-SD-LABEL: test_v2i32_post_imm_st1x4:
10640; CHECK-SD:       ; %bb.0:
10641; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10642; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10643; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10644; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10645; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], #32
10646; CHECK-SD-NEXT:    ret
10647;
10648; CHECK-GI-LABEL: test_v2i32_post_imm_st1x4:
10649; CHECK-GI:       ; %bb.0:
10650; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10651; CHECK-GI-NEXT:    mov x8, x0
10652; CHECK-GI-NEXT:    add x0, x0, #32
10653; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10654; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10655; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10656; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
10657; CHECK-GI-NEXT:    ret
10658  call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
10659  %tmp = getelementptr i32, ptr %A, i32 8
10660  ret ptr %tmp
10661}
10662
10663define ptr @test_v2i32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
10664; CHECK-SD-LABEL: test_v2i32_post_reg_st1x4:
10665; CHECK-SD:       ; %bb.0:
10666; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10667; CHECK-SD-NEXT:    lsl x8, x2, #2
10668; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10669; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10670; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10671; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], x8
10672; CHECK-SD-NEXT:    ret
10673;
10674; CHECK-GI-LABEL: test_v2i32_post_reg_st1x4:
10675; CHECK-GI:       ; %bb.0:
10676; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10677; CHECK-GI-NEXT:    mov x8, x0
10678; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
10679; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10680; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10681; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10682; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
10683; CHECK-GI-NEXT:    ret
10684  call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
10685  %tmp = getelementptr i32, ptr %A, i64 %inc
10686  ret ptr %tmp
10687}
10688
10689declare void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, ptr)
10690
10691
10692define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
10693; CHECK-SD-LABEL: test_v2i64_post_imm_st1x4:
10694; CHECK-SD:       ; %bb.0:
10695; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10696; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10697; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10698; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10699; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], #64
10700; CHECK-SD-NEXT:    ret
10701;
10702; CHECK-GI-LABEL: test_v2i64_post_imm_st1x4:
10703; CHECK-GI:       ; %bb.0:
10704; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10705; CHECK-GI-NEXT:    mov x8, x0
10706; CHECK-GI-NEXT:    add x0, x0, #64
10707; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10708; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10709; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10710; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
10711; CHECK-GI-NEXT:    ret
10712  call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
10713  %tmp = getelementptr i64, ptr %A, i64 8
10714  ret ptr %tmp
10715}
10716
10717define ptr @test_v2i64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
10718; CHECK-SD-LABEL: test_v2i64_post_reg_st1x4:
10719; CHECK-SD:       ; %bb.0:
10720; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10721; CHECK-SD-NEXT:    lsl x8, x2, #3
10722; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10723; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10724; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10725; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], x8
10726; CHECK-SD-NEXT:    ret
10727;
10728; CHECK-GI-LABEL: test_v2i64_post_reg_st1x4:
10729; CHECK-GI:       ; %bb.0:
10730; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10731; CHECK-GI-NEXT:    mov x8, x0
10732; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
10733; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10734; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10735; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10736; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
10737; CHECK-GI-NEXT:    ret
10738  call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
10739  %tmp = getelementptr i64, ptr %A, i64 %inc
10740  ret ptr %tmp
10741}
10742
10743declare void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>,  ptr)
10744
10745
10746define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
10747; CHECK-SD-LABEL: test_v1i64_post_imm_st1x4:
10748; CHECK-SD:       ; %bb.0:
10749; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10750; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10751; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10752; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10753; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
10754; CHECK-SD-NEXT:    ret
10755;
10756; CHECK-GI-LABEL: test_v1i64_post_imm_st1x4:
10757; CHECK-GI:       ; %bb.0:
10758; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10759; CHECK-GI-NEXT:    mov x8, x0
10760; CHECK-GI-NEXT:    add x0, x0, #32
10761; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10762; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10763; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10764; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
10765; CHECK-GI-NEXT:    ret
10766  call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
10767  %tmp = getelementptr i64, ptr %A, i64 4
10768  ret ptr %tmp
10769}
10770
10771define ptr @test_v1i64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
10772; CHECK-SD-LABEL: test_v1i64_post_reg_st1x4:
10773; CHECK-SD:       ; %bb.0:
10774; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10775; CHECK-SD-NEXT:    lsl x8, x2, #3
10776; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10777; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10778; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10779; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
10780; CHECK-SD-NEXT:    ret
10781;
10782; CHECK-GI-LABEL: test_v1i64_post_reg_st1x4:
10783; CHECK-GI:       ; %bb.0:
10784; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10785; CHECK-GI-NEXT:    mov x8, x0
10786; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
10787; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10788; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10789; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10790; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
10791; CHECK-GI-NEXT:    ret
10792  call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
10793  %tmp = getelementptr i64, ptr %A, i64 %inc
10794  ret ptr %tmp
10795}
10796
10797declare void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>,  ptr)
10798
10799
10800define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
10801; CHECK-SD-LABEL: test_v4f32_post_imm_st1x4:
10802; CHECK-SD:       ; %bb.0:
10803; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10804; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10805; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10806; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10807; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], #64
10808; CHECK-SD-NEXT:    ret
10809;
10810; CHECK-GI-LABEL: test_v4f32_post_imm_st1x4:
10811; CHECK-GI:       ; %bb.0:
10812; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10813; CHECK-GI-NEXT:    mov x8, x0
10814; CHECK-GI-NEXT:    add x0, x0, #64
10815; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10816; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10817; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10818; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
10819; CHECK-GI-NEXT:    ret
10820  call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
10821  %tmp = getelementptr float, ptr %A, i32 16
10822  ret ptr %tmp
10823}
10824
10825define ptr @test_v4f32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
10826; CHECK-SD-LABEL: test_v4f32_post_reg_st1x4:
10827; CHECK-SD:       ; %bb.0:
10828; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10829; CHECK-SD-NEXT:    lsl x8, x2, #2
10830; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10831; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10832; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10833; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], x8
10834; CHECK-SD-NEXT:    ret
10835;
10836; CHECK-GI-LABEL: test_v4f32_post_reg_st1x4:
10837; CHECK-GI:       ; %bb.0:
10838; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10839; CHECK-GI-NEXT:    mov x8, x0
10840; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
10841; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10842; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10843; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10844; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
10845; CHECK-GI-NEXT:    ret
10846  call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
10847  %tmp = getelementptr float, ptr %A, i64 %inc
10848  ret ptr %tmp
10849}
10850
10851declare void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, <4 x float>, ptr)
10852
10853
10854define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
10855; CHECK-SD-LABEL: test_v2f32_post_imm_st1x4:
10856; CHECK-SD:       ; %bb.0:
10857; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10858; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10859; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10860; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10861; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], #32
10862; CHECK-SD-NEXT:    ret
10863;
10864; CHECK-GI-LABEL: test_v2f32_post_imm_st1x4:
10865; CHECK-GI:       ; %bb.0:
10866; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10867; CHECK-GI-NEXT:    mov x8, x0
10868; CHECK-GI-NEXT:    add x0, x0, #32
10869; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10870; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10871; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10872; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
10873; CHECK-GI-NEXT:    ret
10874  call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
10875  %tmp = getelementptr float, ptr %A, i32 8
10876  ret ptr %tmp
10877}
10878
10879define ptr @test_v2f32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
10880; CHECK-SD-LABEL: test_v2f32_post_reg_st1x4:
10881; CHECK-SD:       ; %bb.0:
10882; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10883; CHECK-SD-NEXT:    lsl x8, x2, #2
10884; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10885; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10886; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10887; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], x8
10888; CHECK-SD-NEXT:    ret
10889;
10890; CHECK-GI-LABEL: test_v2f32_post_reg_st1x4:
10891; CHECK-GI:       ; %bb.0:
10892; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10893; CHECK-GI-NEXT:    mov x8, x0
10894; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
10895; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10896; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10897; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10898; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
10899; CHECK-GI-NEXT:    ret
10900  call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
10901  %tmp = getelementptr float, ptr %A, i64 %inc
10902  ret ptr %tmp
10903}
10904
10905declare void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, <2 x float>, ptr)
10906
10907
10908define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
10909; CHECK-SD-LABEL: test_v2f64_post_imm_st1x4:
10910; CHECK-SD:       ; %bb.0:
10911; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10912; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10913; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10914; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10915; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], #64
10916; CHECK-SD-NEXT:    ret
10917;
10918; CHECK-GI-LABEL: test_v2f64_post_imm_st1x4:
10919; CHECK-GI:       ; %bb.0:
10920; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10921; CHECK-GI-NEXT:    mov x8, x0
10922; CHECK-GI-NEXT:    add x0, x0, #64
10923; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10924; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10925; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10926; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
10927; CHECK-GI-NEXT:    ret
10928  call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
10929  %tmp = getelementptr double, ptr %A, i64 8
10930  ret ptr %tmp
10931}
10932
10933define ptr @test_v2f64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
10934; CHECK-SD-LABEL: test_v2f64_post_reg_st1x4:
10935; CHECK-SD:       ; %bb.0:
10936; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10937; CHECK-SD-NEXT:    lsl x8, x2, #3
10938; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10939; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10940; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10941; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], x8
10942; CHECK-SD-NEXT:    ret
10943;
10944; CHECK-GI-LABEL: test_v2f64_post_reg_st1x4:
10945; CHECK-GI:       ; %bb.0:
10946; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10947; CHECK-GI-NEXT:    mov x8, x0
10948; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
10949; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10950; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10951; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
10952; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
10953; CHECK-GI-NEXT:    ret
10954  call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
10955  %tmp = getelementptr double, ptr %A, i64 %inc
10956  ret ptr %tmp
10957}
10958
10959declare void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double>, <2 x double>, <2 x double>,<2 x double>,  ptr)
10960
10961
10962define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
10963; CHECK-SD-LABEL: test_v1f64_post_imm_st1x4:
10964; CHECK-SD:       ; %bb.0:
10965; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10966; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10967; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10968; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10969; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
10970; CHECK-SD-NEXT:    ret
10971;
10972; CHECK-GI-LABEL: test_v1f64_post_imm_st1x4:
10973; CHECK-GI:       ; %bb.0:
10974; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10975; CHECK-GI-NEXT:    mov x8, x0
10976; CHECK-GI-NEXT:    add x0, x0, #32
10977; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10978; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10979; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10980; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
10981; CHECK-GI-NEXT:    ret
10982  call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
10983  %tmp = getelementptr double, ptr %A, i64 4
10984  ret ptr %tmp
10985}
10986
10987define ptr @test_v1f64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
10988; CHECK-SD-LABEL: test_v1f64_post_reg_st1x4:
10989; CHECK-SD:       ; %bb.0:
10990; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10991; CHECK-SD-NEXT:    lsl x8, x2, #3
10992; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10993; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10994; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
10995; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
10996; CHECK-SD-NEXT:    ret
10997;
10998; CHECK-GI-LABEL: test_v1f64_post_reg_st1x4:
10999; CHECK-GI:       ; %bb.0:
11000; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
11001; CHECK-GI-NEXT:    mov x8, x0
11002; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
11003; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
11004; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
11005; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
11006; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
11007; CHECK-GI-NEXT:    ret
11008  call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
11009  %tmp = getelementptr double, ptr %A, i64 %inc
11010  ret ptr %tmp
11011}
11012
11013declare void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, <1 x double>, ptr)
11014
11015define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
11016; CHECK-SD-LABEL: test_v16i8_post_imm_st2lane:
11017; CHECK-SD:       ; %bb.0:
11018; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11019; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11020; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], #2
11021; CHECK-SD-NEXT:    ret
11022;
11023; CHECK-GI-LABEL: test_v16i8_post_imm_st2lane:
11024; CHECK-GI:       ; %bb.0:
11025; CHECK-GI-NEXT:    mov x8, x0
11026; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11027; CHECK-GI-NEXT:    add x0, x0, #2
11028; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11029; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
11030; CHECK-GI-NEXT:    ret
11031  call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
11032  %tmp = getelementptr i8, ptr %A, i32 2
11033  ret ptr %tmp
11034}
11035
11036define ptr @test_v16i8_post_reg_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
11037; CHECK-SD-LABEL: test_v16i8_post_reg_st2lane:
11038; CHECK-SD:       ; %bb.0:
11039; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11040; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11041; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], x2
11042; CHECK-SD-NEXT:    ret
11043;
11044; CHECK-GI-LABEL: test_v16i8_post_reg_st2lane:
11045; CHECK-GI:       ; %bb.0:
11046; CHECK-GI-NEXT:    mov x8, x0
11047; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11048; CHECK-GI-NEXT:    add x0, x0, x2
11049; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11050; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
11051; CHECK-GI-NEXT:    ret
11052  call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
11053  %tmp = getelementptr i8, ptr %A, i64 %inc
11054  ret ptr %tmp
11055}
11056
11057declare void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr)
11058
11059
11060define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
11061; CHECK-SD-LABEL: test_v8i8_post_imm_st2lane:
11062; CHECK-SD:       ; %bb.0:
11063; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11064; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11065; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], #2
11066; CHECK-SD-NEXT:    ret
11067;
11068; CHECK-GI-LABEL: test_v8i8_post_imm_st2lane:
11069; CHECK-GI:       ; %bb.0:
11070; CHECK-GI-NEXT:    mov x8, x0
11071; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11072; CHECK-GI-NEXT:    add x0, x0, #2
11073; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11074; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
11075; CHECK-GI-NEXT:    ret
11076  call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
11077  %tmp = getelementptr i8, ptr %A, i32 2
11078  ret ptr %tmp
11079}
11080
11081define ptr @test_v8i8_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
11082; CHECK-SD-LABEL: test_v8i8_post_reg_st2lane:
11083; CHECK-SD:       ; %bb.0:
11084; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11085; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11086; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], x2
11087; CHECK-SD-NEXT:    ret
11088;
11089; CHECK-GI-LABEL: test_v8i8_post_reg_st2lane:
11090; CHECK-GI:       ; %bb.0:
11091; CHECK-GI-NEXT:    mov x8, x0
11092; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11093; CHECK-GI-NEXT:    add x0, x0, x2
11094; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11095; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
11096; CHECK-GI-NEXT:    ret
11097  call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
11098  %tmp = getelementptr i8, ptr %A, i64 %inc
11099  ret ptr %tmp
11100}
11101
11102declare void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8>, <8 x i8>, i64, ptr)
11103
11104
11105define ptr @test_v8i16_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
11106; CHECK-SD-LABEL: test_v8i16_post_imm_st2lane:
11107; CHECK-SD:       ; %bb.0:
11108; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11109; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11110; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], #4
11111; CHECK-SD-NEXT:    ret
11112;
11113; CHECK-GI-LABEL: test_v8i16_post_imm_st2lane:
11114; CHECK-GI:       ; %bb.0:
11115; CHECK-GI-NEXT:    mov x8, x0
11116; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11117; CHECK-GI-NEXT:    add x0, x0, #4
11118; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11119; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
11120; CHECK-GI-NEXT:    ret
11121  call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
11122  %tmp = getelementptr i16, ptr %A, i32 2
11123  ret ptr %tmp
11124}
11125
11126define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
11127; CHECK-SD-LABEL: test_v8i16_post_reg_st2lane:
11128; CHECK-SD:       ; %bb.0:
11129; CHECK-SD-NEXT:    lsl x8, x2, #1
11130; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11131; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11132; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], x8
11133; CHECK-SD-NEXT:    ret
11134;
11135; CHECK-GI-LABEL: test_v8i16_post_reg_st2lane:
11136; CHECK-GI:       ; %bb.0:
11137; CHECK-GI-NEXT:    mov x8, x0
11138; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
11139; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11140; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11141; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
11142; CHECK-GI-NEXT:    ret
11143  call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
11144  %tmp = getelementptr i16, ptr %A, i64 %inc
11145  ret ptr %tmp
11146}
11147
11148declare void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr)
11149
11150
11151define ptr @test_v4i16_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
11152; CHECK-SD-LABEL: test_v4i16_post_imm_st2lane:
11153; CHECK-SD:       ; %bb.0:
11154; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11155; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11156; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], #4
11157; CHECK-SD-NEXT:    ret
11158;
11159; CHECK-GI-LABEL: test_v4i16_post_imm_st2lane:
11160; CHECK-GI:       ; %bb.0:
11161; CHECK-GI-NEXT:    mov x8, x0
11162; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11163; CHECK-GI-NEXT:    add x0, x0, #4
11164; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11165; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
11166; CHECK-GI-NEXT:    ret
11167  call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
11168  %tmp = getelementptr i16, ptr %A, i32 2
11169  ret ptr %tmp
11170}
11171
11172define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
11173; CHECK-SD-LABEL: test_v4i16_post_reg_st2lane:
11174; CHECK-SD:       ; %bb.0:
11175; CHECK-SD-NEXT:    lsl x8, x2, #1
11176; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11177; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11178; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], x8
11179; CHECK-SD-NEXT:    ret
11180;
11181; CHECK-GI-LABEL: test_v4i16_post_reg_st2lane:
11182; CHECK-GI:       ; %bb.0:
11183; CHECK-GI-NEXT:    mov x8, x0
11184; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
11185; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11186; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11187; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
11188; CHECK-GI-NEXT:    ret
11189  call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
11190  %tmp = getelementptr i16, ptr %A, i64 %inc
11191  ret ptr %tmp
11192}
11193
11194declare void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16>, <4 x i16>, i64, ptr)
11195
11196
11197define ptr @test_v4i32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
11198; CHECK-SD-LABEL: test_v4i32_post_imm_st2lane:
11199; CHECK-SD:       ; %bb.0:
11200; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11201; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11202; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
11203; CHECK-SD-NEXT:    ret
11204;
11205; CHECK-GI-LABEL: test_v4i32_post_imm_st2lane:
11206; CHECK-GI:       ; %bb.0:
11207; CHECK-GI-NEXT:    mov x8, x0
11208; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11209; CHECK-GI-NEXT:    add x0, x0, #8
11210; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11211; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
11212; CHECK-GI-NEXT:    ret
11213  call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
11214  %tmp = getelementptr i32, ptr %A, i32 2
11215  ret ptr %tmp
11216}
11217
11218define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
11219; CHECK-SD-LABEL: test_v4i32_post_reg_st2lane:
11220; CHECK-SD:       ; %bb.0:
11221; CHECK-SD-NEXT:    lsl x8, x2, #2
11222; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11223; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11224; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
11225; CHECK-SD-NEXT:    ret
11226;
11227; CHECK-GI-LABEL: test_v4i32_post_reg_st2lane:
11228; CHECK-GI:       ; %bb.0:
11229; CHECK-GI-NEXT:    mov x8, x0
11230; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
11231; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11232; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11233; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
11234; CHECK-GI-NEXT:    ret
11235  call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
11236  %tmp = getelementptr i32, ptr %A, i64 %inc
11237  ret ptr %tmp
11238}
11239
11240declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr)
11241
11242
11243define ptr @test_v2i32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
11244; CHECK-SD-LABEL: test_v2i32_post_imm_st2lane:
11245; CHECK-SD:       ; %bb.0:
11246; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11247; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11248; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
11249; CHECK-SD-NEXT:    ret
11250;
11251; CHECK-GI-LABEL: test_v2i32_post_imm_st2lane:
11252; CHECK-GI:       ; %bb.0:
11253; CHECK-GI-NEXT:    mov x8, x0
11254; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11255; CHECK-GI-NEXT:    add x0, x0, #8
11256; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11257; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
11258; CHECK-GI-NEXT:    ret
11259  call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
11260  %tmp = getelementptr i32, ptr %A, i32 2
11261  ret ptr %tmp
11262}
11263
11264define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
11265; CHECK-SD-LABEL: test_v2i32_post_reg_st2lane:
11266; CHECK-SD:       ; %bb.0:
11267; CHECK-SD-NEXT:    lsl x8, x2, #2
11268; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11269; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11270; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
11271; CHECK-SD-NEXT:    ret
11272;
11273; CHECK-GI-LABEL: test_v2i32_post_reg_st2lane:
11274; CHECK-GI:       ; %bb.0:
11275; CHECK-GI-NEXT:    mov x8, x0
11276; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
11277; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11278; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11279; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
11280; CHECK-GI-NEXT:    ret
11281  call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
11282  %tmp = getelementptr i32, ptr %A, i64 %inc
11283  ret ptr %tmp
11284}
11285
11286declare void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32>, <2 x i32>, i64, ptr)
11287
11288
11289define ptr @test_v2i64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
11290; CHECK-SD-LABEL: test_v2i64_post_imm_st2lane:
11291; CHECK-SD:       ; %bb.0:
11292; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11293; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11294; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
11295; CHECK-SD-NEXT:    ret
11296;
11297; CHECK-GI-LABEL: test_v2i64_post_imm_st2lane:
11298; CHECK-GI:       ; %bb.0:
11299; CHECK-GI-NEXT:    mov x8, x0
11300; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11301; CHECK-GI-NEXT:    add x0, x0, #16
11302; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11303; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
11304; CHECK-GI-NEXT:    ret
11305  call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
11306  %tmp = getelementptr i64, ptr %A, i64 2
11307  ret ptr %tmp
11308}
11309
11310define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
11311; CHECK-SD-LABEL: test_v2i64_post_reg_st2lane:
11312; CHECK-SD:       ; %bb.0:
11313; CHECK-SD-NEXT:    lsl x8, x2, #3
11314; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11315; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11316; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
11317; CHECK-SD-NEXT:    ret
11318;
11319; CHECK-GI-LABEL: test_v2i64_post_reg_st2lane:
11320; CHECK-GI:       ; %bb.0:
11321; CHECK-GI-NEXT:    mov x8, x0
11322; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
11323; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11324; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11325; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
11326; CHECK-GI-NEXT:    ret
11327  call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
11328  %tmp = getelementptr i64, ptr %A, i64 %inc
11329  ret ptr %tmp
11330}
11331
11332declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr)
11333
11334
11335define ptr @test_v1i64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
11336; CHECK-SD-LABEL: test_v1i64_post_imm_st2lane:
11337; CHECK-SD:       ; %bb.0:
11338; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11339; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11340; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
11341; CHECK-SD-NEXT:    ret
11342;
11343; CHECK-GI-LABEL: test_v1i64_post_imm_st2lane:
11344; CHECK-GI:       ; %bb.0:
11345; CHECK-GI-NEXT:    mov x8, x0
11346; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11347; CHECK-GI-NEXT:    add x0, x0, #16
11348; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11349; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
11350; CHECK-GI-NEXT:    ret
11351  call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
11352  %tmp = getelementptr i64, ptr %A, i64 2
11353  ret ptr %tmp
11354}
11355
11356define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
11357; CHECK-SD-LABEL: test_v1i64_post_reg_st2lane:
11358; CHECK-SD:       ; %bb.0:
11359; CHECK-SD-NEXT:    lsl x8, x2, #3
11360; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11361; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11362; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
11363; CHECK-SD-NEXT:    ret
11364;
11365; CHECK-GI-LABEL: test_v1i64_post_reg_st2lane:
11366; CHECK-GI:       ; %bb.0:
11367; CHECK-GI-NEXT:    mov x8, x0
11368; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
11369; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11370; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11371; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
11372; CHECK-GI-NEXT:    ret
11373  call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
11374  %tmp = getelementptr i64, ptr %A, i64 %inc
11375  ret ptr %tmp
11376}
11377
11378declare void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64>, <1 x i64>, i64, ptr)
11379
11380
11381define ptr @test_v4f32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
11382; CHECK-SD-LABEL: test_v4f32_post_imm_st2lane:
11383; CHECK-SD:       ; %bb.0:
11384; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11385; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11386; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
11387; CHECK-SD-NEXT:    ret
11388;
11389; CHECK-GI-LABEL: test_v4f32_post_imm_st2lane:
11390; CHECK-GI:       ; %bb.0:
11391; CHECK-GI-NEXT:    mov x8, x0
11392; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11393; CHECK-GI-NEXT:    add x0, x0, #8
11394; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11395; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
11396; CHECK-GI-NEXT:    ret
11397  call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
11398  %tmp = getelementptr float, ptr %A, i32 2
11399  ret ptr %tmp
11400}
11401
11402define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
11403; CHECK-SD-LABEL: test_v4f32_post_reg_st2lane:
11404; CHECK-SD:       ; %bb.0:
11405; CHECK-SD-NEXT:    lsl x8, x2, #2
11406; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11407; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11408; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
11409; CHECK-SD-NEXT:    ret
11410;
11411; CHECK-GI-LABEL: test_v4f32_post_reg_st2lane:
11412; CHECK-GI:       ; %bb.0:
11413; CHECK-GI-NEXT:    mov x8, x0
11414; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
11415; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11416; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11417; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
11418; CHECK-GI-NEXT:    ret
11419  call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
11420  %tmp = getelementptr float, ptr %A, i64 %inc
11421  ret ptr %tmp
11422}
11423
11424declare void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float>, <4 x float>, i64, ptr)
11425
11426
11427define ptr @test_v2f32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
11428; CHECK-SD-LABEL: test_v2f32_post_imm_st2lane:
11429; CHECK-SD:       ; %bb.0:
11430; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11431; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11432; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
11433; CHECK-SD-NEXT:    ret
11434;
11435; CHECK-GI-LABEL: test_v2f32_post_imm_st2lane:
11436; CHECK-GI:       ; %bb.0:
11437; CHECK-GI-NEXT:    mov x8, x0
11438; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11439; CHECK-GI-NEXT:    add x0, x0, #8
11440; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11441; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
11442; CHECK-GI-NEXT:    ret
11443  call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
11444  %tmp = getelementptr float, ptr %A, i32 2
11445  ret ptr %tmp
11446}
11447
11448define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
11449; CHECK-SD-LABEL: test_v2f32_post_reg_st2lane:
11450; CHECK-SD:       ; %bb.0:
11451; CHECK-SD-NEXT:    lsl x8, x2, #2
11452; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11453; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11454; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
11455; CHECK-SD-NEXT:    ret
11456;
11457; CHECK-GI-LABEL: test_v2f32_post_reg_st2lane:
11458; CHECK-GI:       ; %bb.0:
11459; CHECK-GI-NEXT:    mov x8, x0
11460; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
11461; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11462; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11463; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
11464; CHECK-GI-NEXT:    ret
11465  call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
11466  %tmp = getelementptr float, ptr %A, i64 %inc
11467  ret ptr %tmp
11468}
11469
11470declare void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float>, <2 x float>, i64, ptr)
11471
11472
11473define ptr @test_v2f64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
11474; CHECK-SD-LABEL: test_v2f64_post_imm_st2lane:
11475; CHECK-SD:       ; %bb.0:
11476; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11477; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11478; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
11479; CHECK-SD-NEXT:    ret
11480;
11481; CHECK-GI-LABEL: test_v2f64_post_imm_st2lane:
11482; CHECK-GI:       ; %bb.0:
11483; CHECK-GI-NEXT:    mov x8, x0
11484; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11485; CHECK-GI-NEXT:    add x0, x0, #16
11486; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11487; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
11488; CHECK-GI-NEXT:    ret
11489  call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
11490  %tmp = getelementptr double, ptr %A, i64 2
11491  ret ptr %tmp
11492}
11493
11494define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
11495; CHECK-SD-LABEL: test_v2f64_post_reg_st2lane:
11496; CHECK-SD:       ; %bb.0:
11497; CHECK-SD-NEXT:    lsl x8, x2, #3
11498; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11499; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11500; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
11501; CHECK-SD-NEXT:    ret
11502;
11503; CHECK-GI-LABEL: test_v2f64_post_reg_st2lane:
11504; CHECK-GI:       ; %bb.0:
11505; CHECK-GI-NEXT:    mov x8, x0
11506; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
11507; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
11508; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
11509; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
11510; CHECK-GI-NEXT:    ret
11511  call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
11512  %tmp = getelementptr double, ptr %A, i64 %inc
11513  ret ptr %tmp
11514}
11515
11516declare void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double>, <2 x double>, i64, ptr)
11517
11518
11519define ptr @test_v1f64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
11520; CHECK-SD-LABEL: test_v1f64_post_imm_st2lane:
11521; CHECK-SD:       ; %bb.0:
11522; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11523; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11524; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
11525; CHECK-SD-NEXT:    ret
11526;
11527; CHECK-GI-LABEL: test_v1f64_post_imm_st2lane:
11528; CHECK-GI:       ; %bb.0:
11529; CHECK-GI-NEXT:    mov x8, x0
11530; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11531; CHECK-GI-NEXT:    add x0, x0, #16
11532; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11533; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
11534; CHECK-GI-NEXT:    ret
11535  call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
11536  %tmp = getelementptr double, ptr %A, i64 2
11537  ret ptr %tmp
11538}
11539
11540define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
11541; CHECK-SD-LABEL: test_v1f64_post_reg_st2lane:
11542; CHECK-SD:       ; %bb.0:
11543; CHECK-SD-NEXT:    lsl x8, x2, #3
11544; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11545; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11546; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
11547; CHECK-SD-NEXT:    ret
11548;
11549; CHECK-GI-LABEL: test_v1f64_post_reg_st2lane:
11550; CHECK-GI:       ; %bb.0:
11551; CHECK-GI-NEXT:    mov x8, x0
11552; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
11553; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
11554; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
11555; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
11556; CHECK-GI-NEXT:    ret
11557  call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
11558  %tmp = getelementptr double, ptr %A, i64 %inc
11559  ret ptr %tmp
11560}
11561
11562declare void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double>, <1 x double>, i64, ptr)
11563
11564
11565define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
11566; CHECK-SD-LABEL: test_v16i8_post_imm_st3lane:
11567; CHECK-SD:       ; %bb.0:
11568; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11569; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11570; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11571; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], #3
11572; CHECK-SD-NEXT:    ret
11573;
11574; CHECK-GI-LABEL: test_v16i8_post_imm_st3lane:
11575; CHECK-GI:       ; %bb.0:
11576; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11577; CHECK-GI-NEXT:    mov x8, x0
11578; CHECK-GI-NEXT:    add x0, x0, #3
11579; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11580; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11581; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
11582; CHECK-GI-NEXT:    ret
11583  call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
11584  %tmp = getelementptr i8, ptr %A, i32 3
11585  ret ptr %tmp
11586}
11587
11588define ptr @test_v16i8_post_reg_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
11589; CHECK-SD-LABEL: test_v16i8_post_reg_st3lane:
11590; CHECK-SD:       ; %bb.0:
11591; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11592; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11593; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11594; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], x2
11595; CHECK-SD-NEXT:    ret
11596;
11597; CHECK-GI-LABEL: test_v16i8_post_reg_st3lane:
11598; CHECK-GI:       ; %bb.0:
11599; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11600; CHECK-GI-NEXT:    mov x8, x0
11601; CHECK-GI-NEXT:    add x0, x0, x2
11602; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11603; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11604; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
11605; CHECK-GI-NEXT:    ret
11606  call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
11607  %tmp = getelementptr i8, ptr %A, i64 %inc
11608  ret ptr %tmp
11609}
11610
11611declare void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, i64, ptr)
11612
11613
11614define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
11615; CHECK-SD-LABEL: test_v8i8_post_imm_st3lane:
11616; CHECK-SD:       ; %bb.0:
11617; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11618; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11619; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11620; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], #3
11621; CHECK-SD-NEXT:    ret
11622;
11623; CHECK-GI-LABEL: test_v8i8_post_imm_st3lane:
11624; CHECK-GI:       ; %bb.0:
11625; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11626; CHECK-GI-NEXT:    mov x8, x0
11627; CHECK-GI-NEXT:    add x0, x0, #3
11628; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11629; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11630; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
11631; CHECK-GI-NEXT:    ret
11632  call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
11633  %tmp = getelementptr i8, ptr %A, i32 3
11634  ret ptr %tmp
11635}
11636
11637define ptr @test_v8i8_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
11638; CHECK-SD-LABEL: test_v8i8_post_reg_st3lane:
11639; CHECK-SD:       ; %bb.0:
11640; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11641; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11642; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11643; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], x2
11644; CHECK-SD-NEXT:    ret
11645;
11646; CHECK-GI-LABEL: test_v8i8_post_reg_st3lane:
11647; CHECK-GI:       ; %bb.0:
11648; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11649; CHECK-GI-NEXT:    mov x8, x0
11650; CHECK-GI-NEXT:    add x0, x0, x2
11651; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11652; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11653; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
11654; CHECK-GI-NEXT:    ret
11655  call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
11656  %tmp = getelementptr i8, ptr %A, i64 %inc
11657  ret ptr %tmp
11658}
11659
11660declare void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, i64, ptr)
11661
11662
11663define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
11664; CHECK-SD-LABEL: test_v8i16_post_imm_st3lane:
11665; CHECK-SD:       ; %bb.0:
11666; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11667; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11668; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11669; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], #6
11670; CHECK-SD-NEXT:    ret
11671;
11672; CHECK-GI-LABEL: test_v8i16_post_imm_st3lane:
11673; CHECK-GI:       ; %bb.0:
11674; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11675; CHECK-GI-NEXT:    mov x8, x0
11676; CHECK-GI-NEXT:    add x0, x0, #6
11677; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11678; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11679; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
11680; CHECK-GI-NEXT:    ret
11681  call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
11682  %tmp = getelementptr i16, ptr %A, i32 3
11683  ret ptr %tmp
11684}
11685
11686define ptr @test_v8i16_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
11687; CHECK-SD-LABEL: test_v8i16_post_reg_st3lane:
11688; CHECK-SD:       ; %bb.0:
11689; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11690; CHECK-SD-NEXT:    lsl x8, x2, #1
11691; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11692; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11693; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], x8
11694; CHECK-SD-NEXT:    ret
11695;
11696; CHECK-GI-LABEL: test_v8i16_post_reg_st3lane:
11697; CHECK-GI:       ; %bb.0:
11698; CHECK-GI-NEXT:    mov x8, x0
11699; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11700; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
11701; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11702; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11703; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
11704; CHECK-GI-NEXT:    ret
11705  call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
11706  %tmp = getelementptr i16, ptr %A, i64 %inc
11707  ret ptr %tmp
11708}
11709
11710declare void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, i64, ptr)
11711
11712
11713define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
11714; CHECK-SD-LABEL: test_v4i16_post_imm_st3lane:
11715; CHECK-SD:       ; %bb.0:
11716; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11717; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11718; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11719; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], #6
11720; CHECK-SD-NEXT:    ret
11721;
11722; CHECK-GI-LABEL: test_v4i16_post_imm_st3lane:
11723; CHECK-GI:       ; %bb.0:
11724; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11725; CHECK-GI-NEXT:    mov x8, x0
11726; CHECK-GI-NEXT:    add x0, x0, #6
11727; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11728; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11729; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
11730; CHECK-GI-NEXT:    ret
11731  call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
11732  %tmp = getelementptr i16, ptr %A, i32 3
11733  ret ptr %tmp
11734}
11735
11736define ptr @test_v4i16_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
11737; CHECK-SD-LABEL: test_v4i16_post_reg_st3lane:
11738; CHECK-SD:       ; %bb.0:
11739; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11740; CHECK-SD-NEXT:    lsl x8, x2, #1
11741; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11742; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11743; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], x8
11744; CHECK-SD-NEXT:    ret
11745;
11746; CHECK-GI-LABEL: test_v4i16_post_reg_st3lane:
11747; CHECK-GI:       ; %bb.0:
11748; CHECK-GI-NEXT:    mov x8, x0
11749; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11750; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
11751; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11752; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11753; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
11754; CHECK-GI-NEXT:    ret
11755  call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
11756  %tmp = getelementptr i16, ptr %A, i64 %inc
11757  ret ptr %tmp
11758}
11759
11760declare void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, i64, ptr)
11761
11762
11763define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
11764; CHECK-SD-LABEL: test_v4i32_post_imm_st3lane:
11765; CHECK-SD:       ; %bb.0:
11766; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11767; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11768; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11769; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
11770; CHECK-SD-NEXT:    ret
11771;
11772; CHECK-GI-LABEL: test_v4i32_post_imm_st3lane:
11773; CHECK-GI:       ; %bb.0:
11774; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11775; CHECK-GI-NEXT:    mov x8, x0
11776; CHECK-GI-NEXT:    add x0, x0, #12
11777; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11778; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11779; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
11780; CHECK-GI-NEXT:    ret
11781  call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
11782  %tmp = getelementptr i32, ptr %A, i32 3
11783  ret ptr %tmp
11784}
11785
11786define ptr @test_v4i32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
11787; CHECK-SD-LABEL: test_v4i32_post_reg_st3lane:
11788; CHECK-SD:       ; %bb.0:
11789; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11790; CHECK-SD-NEXT:    lsl x8, x2, #2
11791; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11792; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11793; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
11794; CHECK-SD-NEXT:    ret
11795;
11796; CHECK-GI-LABEL: test_v4i32_post_reg_st3lane:
11797; CHECK-GI:       ; %bb.0:
11798; CHECK-GI-NEXT:    mov x8, x0
11799; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11800; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
11801; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11802; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11803; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
11804; CHECK-GI-NEXT:    ret
11805  call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
11806  %tmp = getelementptr i32, ptr %A, i64 %inc
11807  ret ptr %tmp
11808}
11809
11810declare void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, i64, ptr)
11811
11812
11813define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
11814; CHECK-SD-LABEL: test_v2i32_post_imm_st3lane:
11815; CHECK-SD:       ; %bb.0:
11816; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11817; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11818; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11819; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
11820; CHECK-SD-NEXT:    ret
11821;
11822; CHECK-GI-LABEL: test_v2i32_post_imm_st3lane:
11823; CHECK-GI:       ; %bb.0:
11824; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11825; CHECK-GI-NEXT:    mov x8, x0
11826; CHECK-GI-NEXT:    add x0, x0, #12
11827; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11828; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11829; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
11830; CHECK-GI-NEXT:    ret
11831  call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
11832  %tmp = getelementptr i32, ptr %A, i32 3
11833  ret ptr %tmp
11834}
11835
11836define ptr @test_v2i32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
11837; CHECK-SD-LABEL: test_v2i32_post_reg_st3lane:
11838; CHECK-SD:       ; %bb.0:
11839; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11840; CHECK-SD-NEXT:    lsl x8, x2, #2
11841; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11842; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11843; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
11844; CHECK-SD-NEXT:    ret
11845;
11846; CHECK-GI-LABEL: test_v2i32_post_reg_st3lane:
11847; CHECK-GI:       ; %bb.0:
11848; CHECK-GI-NEXT:    mov x8, x0
11849; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11850; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
11851; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11852; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11853; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
11854; CHECK-GI-NEXT:    ret
11855  call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
11856  %tmp = getelementptr i32, ptr %A, i64 %inc
11857  ret ptr %tmp
11858}
11859
11860declare void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, i64, ptr)
11861
11862
11863define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
11864; CHECK-SD-LABEL: test_v2i64_post_imm_st3lane:
11865; CHECK-SD:       ; %bb.0:
11866; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11867; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11868; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11869; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
11870; CHECK-SD-NEXT:    ret
11871;
11872; CHECK-GI-LABEL: test_v2i64_post_imm_st3lane:
11873; CHECK-GI:       ; %bb.0:
11874; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11875; CHECK-GI-NEXT:    mov x8, x0
11876; CHECK-GI-NEXT:    add x0, x0, #24
11877; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11878; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11879; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
11880; CHECK-GI-NEXT:    ret
11881  call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
11882  %tmp = getelementptr i64, ptr %A, i64 3
11883  ret ptr %tmp
11884}
11885
11886define ptr @test_v2i64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
11887; CHECK-SD-LABEL: test_v2i64_post_reg_st3lane:
11888; CHECK-SD:       ; %bb.0:
11889; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11890; CHECK-SD-NEXT:    lsl x8, x2, #3
11891; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11892; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11893; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
11894; CHECK-SD-NEXT:    ret
11895;
11896; CHECK-GI-LABEL: test_v2i64_post_reg_st3lane:
11897; CHECK-GI:       ; %bb.0:
11898; CHECK-GI-NEXT:    mov x8, x0
11899; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11900; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
11901; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11902; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11903; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
11904; CHECK-GI-NEXT:    ret
11905  call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
11906  %tmp = getelementptr i64, ptr %A, i64 %inc
11907  ret ptr %tmp
11908}
11909
11910declare void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, i64, ptr)
11911
11912
11913define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
11914; CHECK-SD-LABEL: test_v1i64_post_imm_st3lane:
11915; CHECK-SD:       ; %bb.0:
11916; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11917; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11918; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11919; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
11920; CHECK-SD-NEXT:    ret
11921;
11922; CHECK-GI-LABEL: test_v1i64_post_imm_st3lane:
11923; CHECK-GI:       ; %bb.0:
11924; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11925; CHECK-GI-NEXT:    mov x8, x0
11926; CHECK-GI-NEXT:    add x0, x0, #24
11927; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11928; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11929; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
11930; CHECK-GI-NEXT:    ret
11931  call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
11932  %tmp = getelementptr i64, ptr %A, i64 3
11933  ret ptr %tmp
11934}
11935
11936define ptr @test_v1i64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
11937; CHECK-SD-LABEL: test_v1i64_post_reg_st3lane:
11938; CHECK-SD:       ; %bb.0:
11939; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11940; CHECK-SD-NEXT:    lsl x8, x2, #3
11941; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11942; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11943; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
11944; CHECK-SD-NEXT:    ret
11945;
11946; CHECK-GI-LABEL: test_v1i64_post_reg_st3lane:
11947; CHECK-GI:       ; %bb.0:
11948; CHECK-GI-NEXT:    mov x8, x0
11949; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
11950; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
11951; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
11952; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
11953; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
11954; CHECK-GI-NEXT:    ret
11955  call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
11956  %tmp = getelementptr i64, ptr %A, i64 %inc
11957  ret ptr %tmp
11958}
11959
11960declare void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, i64, ptr)
11961
11962
11963define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
11964; CHECK-SD-LABEL: test_v4f32_post_imm_st3lane:
11965; CHECK-SD:       ; %bb.0:
11966; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11967; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11968; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11969; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
11970; CHECK-SD-NEXT:    ret
11971;
11972; CHECK-GI-LABEL: test_v4f32_post_imm_st3lane:
11973; CHECK-GI:       ; %bb.0:
11974; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11975; CHECK-GI-NEXT:    mov x8, x0
11976; CHECK-GI-NEXT:    add x0, x0, #12
11977; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11978; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11979; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
11980; CHECK-GI-NEXT:    ret
11981  call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
11982  %tmp = getelementptr float, ptr %A, i32 3
11983  ret ptr %tmp
11984}
11985
11986define ptr @test_v4f32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
11987; CHECK-SD-LABEL: test_v4f32_post_reg_st3lane:
11988; CHECK-SD:       ; %bb.0:
11989; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
11990; CHECK-SD-NEXT:    lsl x8, x2, #2
11991; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
11992; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
11993; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
11994; CHECK-SD-NEXT:    ret
11995;
11996; CHECK-GI-LABEL: test_v4f32_post_reg_st3lane:
11997; CHECK-GI:       ; %bb.0:
11998; CHECK-GI-NEXT:    mov x8, x0
11999; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
12000; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
12001; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
12002; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
12003; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
12004; CHECK-GI-NEXT:    ret
12005  call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
12006  %tmp = getelementptr float, ptr %A, i64 %inc
12007  ret ptr %tmp
12008}
12009
12010declare void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, i64, ptr)
12011
12012
12013define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
12014; CHECK-SD-LABEL: test_v2f32_post_imm_st3lane:
12015; CHECK-SD:       ; %bb.0:
12016; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
12017; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
12018; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
12019; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
12020; CHECK-SD-NEXT:    ret
12021;
12022; CHECK-GI-LABEL: test_v2f32_post_imm_st3lane:
12023; CHECK-GI:       ; %bb.0:
12024; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
12025; CHECK-GI-NEXT:    mov x8, x0
12026; CHECK-GI-NEXT:    add x0, x0, #12
12027; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
12028; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
12029; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
12030; CHECK-GI-NEXT:    ret
12031  call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
12032  %tmp = getelementptr float, ptr %A, i32 3
12033  ret ptr %tmp
12034}
12035
12036define ptr @test_v2f32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
12037; CHECK-SD-LABEL: test_v2f32_post_reg_st3lane:
12038; CHECK-SD:       ; %bb.0:
12039; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
12040; CHECK-SD-NEXT:    lsl x8, x2, #2
12041; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
12042; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
12043; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
12044; CHECK-SD-NEXT:    ret
12045;
12046; CHECK-GI-LABEL: test_v2f32_post_reg_st3lane:
12047; CHECK-GI:       ; %bb.0:
12048; CHECK-GI-NEXT:    mov x8, x0
12049; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
12050; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
12051; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
12052; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
12053; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
12054; CHECK-GI-NEXT:    ret
12055  call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
12056  %tmp = getelementptr float, ptr %A, i64 %inc
12057  ret ptr %tmp
12058}
12059
12060declare void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, i64, ptr)
12061
12062
12063define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
12064; CHECK-SD-LABEL: test_v2f64_post_imm_st3lane:
12065; CHECK-SD:       ; %bb.0:
12066; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
12067; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
12068; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
12069; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
12070; CHECK-SD-NEXT:    ret
12071;
12072; CHECK-GI-LABEL: test_v2f64_post_imm_st3lane:
12073; CHECK-GI:       ; %bb.0:
12074; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
12075; CHECK-GI-NEXT:    mov x8, x0
12076; CHECK-GI-NEXT:    add x0, x0, #24
12077; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
12078; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
12079; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
12080; CHECK-GI-NEXT:    ret
12081  call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
12082  %tmp = getelementptr double, ptr %A, i64 3
12083  ret ptr %tmp
12084}
12085
12086define ptr @test_v2f64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
12087; CHECK-SD-LABEL: test_v2f64_post_reg_st3lane:
12088; CHECK-SD:       ; %bb.0:
12089; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
12090; CHECK-SD-NEXT:    lsl x8, x2, #3
12091; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
12092; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
12093; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
12094; CHECK-SD-NEXT:    ret
12095;
12096; CHECK-GI-LABEL: test_v2f64_post_reg_st3lane:
12097; CHECK-GI:       ; %bb.0:
12098; CHECK-GI-NEXT:    mov x8, x0
12099; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
12100; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
12101; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
12102; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
12103; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
12104; CHECK-GI-NEXT:    ret
12105  call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
12106  %tmp = getelementptr double, ptr %A, i64 %inc
12107  ret ptr %tmp
12108}
12109
12110declare void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, i64, ptr)
12111
12112
12113define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
12114; CHECK-SD-LABEL: test_v1f64_post_imm_st3lane:
12115; CHECK-SD:       ; %bb.0:
12116; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
12117; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
12118; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
12119; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
12120; CHECK-SD-NEXT:    ret
12121;
12122; CHECK-GI-LABEL: test_v1f64_post_imm_st3lane:
12123; CHECK-GI:       ; %bb.0:
12124; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
12125; CHECK-GI-NEXT:    mov x8, x0
12126; CHECK-GI-NEXT:    add x0, x0, #24
12127; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
12128; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
12129; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
12130; CHECK-GI-NEXT:    ret
12131  call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
12132  %tmp = getelementptr double, ptr %A, i64 3
12133  ret ptr %tmp
12134}
12135
12136define ptr @test_v1f64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
12137; CHECK-SD-LABEL: test_v1f64_post_reg_st3lane:
12138; CHECK-SD:       ; %bb.0:
12139; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
12140; CHECK-SD-NEXT:    lsl x8, x2, #3
12141; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
12142; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
12143; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
12144; CHECK-SD-NEXT:    ret
12145;
12146; CHECK-GI-LABEL: test_v1f64_post_reg_st3lane:
12147; CHECK-GI:       ; %bb.0:
12148; CHECK-GI-NEXT:    mov x8, x0
12149; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
12150; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
12151; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
12152; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
12153; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
12154; CHECK-GI-NEXT:    ret
12155  call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
12156  %tmp = getelementptr double, ptr %A, i64 %inc
12157  ret ptr %tmp
12158}
12159
12160declare void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, i64, ptr)
12161
12162
12163define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
12164; CHECK-SD-LABEL: test_v16i8_post_imm_st4lane:
12165; CHECK-SD:       ; %bb.0:
12166; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12167; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12168; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12169; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12170; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], #4
12171; CHECK-SD-NEXT:    ret
12172;
12173; CHECK-GI-LABEL: test_v16i8_post_imm_st4lane:
12174; CHECK-GI:       ; %bb.0:
12175; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12176; CHECK-GI-NEXT:    mov x8, x0
12177; CHECK-GI-NEXT:    add x0, x0, #4
12178; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12179; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12180; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12181; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
12182; CHECK-GI-NEXT:    ret
12183  call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
12184  %tmp = getelementptr i8, ptr %A, i32 4
12185  ret ptr %tmp
12186}
12187
12188define ptr @test_v16i8_post_reg_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
12189; CHECK-SD-LABEL: test_v16i8_post_reg_st4lane:
12190; CHECK-SD:       ; %bb.0:
12191; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12192; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12193; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12194; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12195; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], x2
12196; CHECK-SD-NEXT:    ret
12197;
12198; CHECK-GI-LABEL: test_v16i8_post_reg_st4lane:
12199; CHECK-GI:       ; %bb.0:
12200; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12201; CHECK-GI-NEXT:    mov x8, x0
12202; CHECK-GI-NEXT:    add x0, x0, x2
12203; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12204; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12205; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12206; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
12207; CHECK-GI-NEXT:    ret
12208  call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
12209  %tmp = getelementptr i8, ptr %A, i64 %inc
12210  ret ptr %tmp
12211}
12212
12213declare void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, ptr)
12214
12215
12216define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
12217; CHECK-SD-LABEL: test_v8i8_post_imm_st4lane:
12218; CHECK-SD:       ; %bb.0:
12219; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12220; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12221; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12222; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12223; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], #4
12224; CHECK-SD-NEXT:    ret
12225;
12226; CHECK-GI-LABEL: test_v8i8_post_imm_st4lane:
12227; CHECK-GI:       ; %bb.0:
12228; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12229; CHECK-GI-NEXT:    mov x8, x0
12230; CHECK-GI-NEXT:    add x0, x0, #4
12231; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12232; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12233; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12234; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
12235; CHECK-GI-NEXT:    ret
12236  call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
12237  %tmp = getelementptr i8, ptr %A, i32 4
12238  ret ptr %tmp
12239}
12240
12241define ptr @test_v8i8_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
12242; CHECK-SD-LABEL: test_v8i8_post_reg_st4lane:
12243; CHECK-SD:       ; %bb.0:
12244; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12245; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12246; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12247; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12248; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], x2
12249; CHECK-SD-NEXT:    ret
12250;
12251; CHECK-GI-LABEL: test_v8i8_post_reg_st4lane:
12252; CHECK-GI:       ; %bb.0:
12253; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12254; CHECK-GI-NEXT:    mov x8, x0
12255; CHECK-GI-NEXT:    add x0, x0, x2
12256; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12257; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12258; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12259; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
12260; CHECK-GI-NEXT:    ret
12261  call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
12262  %tmp = getelementptr i8, ptr %A, i64 %inc
12263  ret ptr %tmp
12264}
12265
12266declare void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, ptr)
12267
12268
12269define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
12270; CHECK-SD-LABEL: test_v8i16_post_imm_st4lane:
12271; CHECK-SD:       ; %bb.0:
12272; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12273; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12274; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12275; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12276; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], #8
12277; CHECK-SD-NEXT:    ret
12278;
12279; CHECK-GI-LABEL: test_v8i16_post_imm_st4lane:
12280; CHECK-GI:       ; %bb.0:
12281; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12282; CHECK-GI-NEXT:    mov x8, x0
12283; CHECK-GI-NEXT:    add x0, x0, #8
12284; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12285; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12286; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12287; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
12288; CHECK-GI-NEXT:    ret
12289  call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
12290  %tmp = getelementptr i16, ptr %A, i32 4
12291  ret ptr %tmp
12292}
12293
12294define ptr @test_v8i16_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
12295; CHECK-SD-LABEL: test_v8i16_post_reg_st4lane:
12296; CHECK-SD:       ; %bb.0:
12297; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12298; CHECK-SD-NEXT:    lsl x8, x2, #1
12299; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12300; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12301; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12302; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], x8
12303; CHECK-SD-NEXT:    ret
12304;
12305; CHECK-GI-LABEL: test_v8i16_post_reg_st4lane:
12306; CHECK-GI:       ; %bb.0:
12307; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12308; CHECK-GI-NEXT:    mov x8, x0
12309; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
12310; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12311; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12312; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12313; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
12314; CHECK-GI-NEXT:    ret
12315  call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
12316  %tmp = getelementptr i16, ptr %A, i64 %inc
12317  ret ptr %tmp
12318}
12319
12320declare void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, ptr)
12321
12322
12323define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
12324; CHECK-SD-LABEL: test_v4i16_post_imm_st4lane:
12325; CHECK-SD:       ; %bb.0:
12326; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12327; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12328; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12329; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12330; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], #8
12331; CHECK-SD-NEXT:    ret
12332;
12333; CHECK-GI-LABEL: test_v4i16_post_imm_st4lane:
12334; CHECK-GI:       ; %bb.0:
12335; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12336; CHECK-GI-NEXT:    mov x8, x0
12337; CHECK-GI-NEXT:    add x0, x0, #8
12338; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12339; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12340; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12341; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
12342; CHECK-GI-NEXT:    ret
12343  call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
12344  %tmp = getelementptr i16, ptr %A, i32 4
12345  ret ptr %tmp
12346}
12347
12348define ptr @test_v4i16_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
12349; CHECK-SD-LABEL: test_v4i16_post_reg_st4lane:
12350; CHECK-SD:       ; %bb.0:
12351; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12352; CHECK-SD-NEXT:    lsl x8, x2, #1
12353; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12354; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12355; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12356; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], x8
12357; CHECK-SD-NEXT:    ret
12358;
12359; CHECK-GI-LABEL: test_v4i16_post_reg_st4lane:
12360; CHECK-GI:       ; %bb.0:
12361; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12362; CHECK-GI-NEXT:    mov x8, x0
12363; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
12364; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12365; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12366; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12367; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
12368; CHECK-GI-NEXT:    ret
12369  call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
12370  %tmp = getelementptr i16, ptr %A, i64 %inc
12371  ret ptr %tmp
12372}
12373
12374declare void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, ptr)
12375
12376
12377define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
12378; CHECK-SD-LABEL: test_v4i32_post_imm_st4lane:
12379; CHECK-SD:       ; %bb.0:
12380; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12381; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12382; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12383; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12384; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
12385; CHECK-SD-NEXT:    ret
12386;
12387; CHECK-GI-LABEL: test_v4i32_post_imm_st4lane:
12388; CHECK-GI:       ; %bb.0:
12389; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12390; CHECK-GI-NEXT:    mov x8, x0
12391; CHECK-GI-NEXT:    add x0, x0, #16
12392; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12393; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12394; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12395; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
12396; CHECK-GI-NEXT:    ret
12397  call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
12398  %tmp = getelementptr i32, ptr %A, i32 4
12399  ret ptr %tmp
12400}
12401
12402define ptr @test_v4i32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
12403; CHECK-SD-LABEL: test_v4i32_post_reg_st4lane:
12404; CHECK-SD:       ; %bb.0:
12405; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12406; CHECK-SD-NEXT:    lsl x8, x2, #2
12407; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12408; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12409; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12410; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
12411; CHECK-SD-NEXT:    ret
12412;
12413; CHECK-GI-LABEL: test_v4i32_post_reg_st4lane:
12414; CHECK-GI:       ; %bb.0:
12415; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12416; CHECK-GI-NEXT:    mov x8, x0
12417; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
12418; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12419; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12420; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12421; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
12422; CHECK-GI-NEXT:    ret
12423  call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
12424  %tmp = getelementptr i32, ptr %A, i64 %inc
12425  ret ptr %tmp
12426}
12427
12428declare void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, ptr)
12429
12430
12431define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
12432; CHECK-SD-LABEL: test_v2i32_post_imm_st4lane:
12433; CHECK-SD:       ; %bb.0:
12434; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12435; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12436; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12437; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12438; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
12439; CHECK-SD-NEXT:    ret
12440;
12441; CHECK-GI-LABEL: test_v2i32_post_imm_st4lane:
12442; CHECK-GI:       ; %bb.0:
12443; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12444; CHECK-GI-NEXT:    mov x8, x0
12445; CHECK-GI-NEXT:    add x0, x0, #16
12446; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12447; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12448; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12449; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
12450; CHECK-GI-NEXT:    ret
12451  call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
12452  %tmp = getelementptr i32, ptr %A, i32 4
12453  ret ptr %tmp
12454}
12455
12456define ptr @test_v2i32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
12457; CHECK-SD-LABEL: test_v2i32_post_reg_st4lane:
12458; CHECK-SD:       ; %bb.0:
12459; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12460; CHECK-SD-NEXT:    lsl x8, x2, #2
12461; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12462; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12463; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12464; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
12465; CHECK-SD-NEXT:    ret
12466;
12467; CHECK-GI-LABEL: test_v2i32_post_reg_st4lane:
12468; CHECK-GI:       ; %bb.0:
12469; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12470; CHECK-GI-NEXT:    mov x8, x0
12471; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
12472; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12473; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12474; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12475; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
12476; CHECK-GI-NEXT:    ret
12477  call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
12478  %tmp = getelementptr i32, ptr %A, i64 %inc
12479  ret ptr %tmp
12480}
12481
12482declare void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, ptr)
12483
12484
12485define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
12486; CHECK-SD-LABEL: test_v2i64_post_imm_st4lane:
12487; CHECK-SD:       ; %bb.0:
12488; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12489; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12490; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12491; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12492; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
12493; CHECK-SD-NEXT:    ret
12494;
12495; CHECK-GI-LABEL: test_v2i64_post_imm_st4lane:
12496; CHECK-GI:       ; %bb.0:
12497; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12498; CHECK-GI-NEXT:    mov x8, x0
12499; CHECK-GI-NEXT:    add x0, x0, #32
12500; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12501; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12502; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12503; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
12504; CHECK-GI-NEXT:    ret
12505  call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
12506  %tmp = getelementptr i64, ptr %A, i64 4
12507  ret ptr %tmp
12508}
12509
12510define ptr @test_v2i64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
12511; CHECK-SD-LABEL: test_v2i64_post_reg_st4lane:
12512; CHECK-SD:       ; %bb.0:
12513; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12514; CHECK-SD-NEXT:    lsl x8, x2, #3
12515; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12516; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12517; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12518; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
12519; CHECK-SD-NEXT:    ret
12520;
12521; CHECK-GI-LABEL: test_v2i64_post_reg_st4lane:
12522; CHECK-GI:       ; %bb.0:
12523; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12524; CHECK-GI-NEXT:    mov x8, x0
12525; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
12526; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12527; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12528; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12529; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
12530; CHECK-GI-NEXT:    ret
12531  call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
12532  %tmp = getelementptr i64, ptr %A, i64 %inc
12533  ret ptr %tmp
12534}
12535
12536declare void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, ptr)
12537
12538
12539define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
12540; CHECK-SD-LABEL: test_v1i64_post_imm_st4lane:
12541; CHECK-SD:       ; %bb.0:
12542; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12543; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12544; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12545; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12546; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
12547; CHECK-SD-NEXT:    ret
12548;
12549; CHECK-GI-LABEL: test_v1i64_post_imm_st4lane:
12550; CHECK-GI:       ; %bb.0:
12551; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12552; CHECK-GI-NEXT:    mov x8, x0
12553; CHECK-GI-NEXT:    add x0, x0, #32
12554; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12555; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12556; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12557; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
12558; CHECK-GI-NEXT:    ret
12559  call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
12560  %tmp = getelementptr i64, ptr %A, i64 4
12561  ret ptr %tmp
12562}
12563
12564define ptr @test_v1i64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
12565; CHECK-SD-LABEL: test_v1i64_post_reg_st4lane:
12566; CHECK-SD:       ; %bb.0:
12567; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12568; CHECK-SD-NEXT:    lsl x8, x2, #3
12569; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12570; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12571; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12572; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
12573; CHECK-SD-NEXT:    ret
12574;
12575; CHECK-GI-LABEL: test_v1i64_post_reg_st4lane:
12576; CHECK-GI:       ; %bb.0:
12577; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12578; CHECK-GI-NEXT:    mov x8, x0
12579; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
12580; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12581; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12582; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12583; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
12584; CHECK-GI-NEXT:    ret
12585  call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
12586  %tmp = getelementptr i64, ptr %A, i64 %inc
12587  ret ptr %tmp
12588}
12589
12590declare void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, ptr)
12591
12592
12593define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
12594; CHECK-SD-LABEL: test_v4f32_post_imm_st4lane:
12595; CHECK-SD:       ; %bb.0:
12596; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12597; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12598; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12599; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12600; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
12601; CHECK-SD-NEXT:    ret
12602;
12603; CHECK-GI-LABEL: test_v4f32_post_imm_st4lane:
12604; CHECK-GI:       ; %bb.0:
12605; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12606; CHECK-GI-NEXT:    mov x8, x0
12607; CHECK-GI-NEXT:    add x0, x0, #16
12608; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12609; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12610; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12611; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
12612; CHECK-GI-NEXT:    ret
12613  call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
12614  %tmp = getelementptr float, ptr %A, i32 4
12615  ret ptr %tmp
12616}
12617
12618define ptr @test_v4f32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
12619; CHECK-SD-LABEL: test_v4f32_post_reg_st4lane:
12620; CHECK-SD:       ; %bb.0:
12621; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12622; CHECK-SD-NEXT:    lsl x8, x2, #2
12623; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12624; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12625; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12626; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
12627; CHECK-SD-NEXT:    ret
12628;
12629; CHECK-GI-LABEL: test_v4f32_post_reg_st4lane:
12630; CHECK-GI:       ; %bb.0:
12631; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12632; CHECK-GI-NEXT:    mov x8, x0
12633; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
12634; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12635; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12636; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12637; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
12638; CHECK-GI-NEXT:    ret
12639  call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
12640  %tmp = getelementptr float, ptr %A, i64 %inc
12641  ret ptr %tmp
12642}
12643
12644declare void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, ptr)
12645
12646
12647define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
12648; CHECK-SD-LABEL: test_v2f32_post_imm_st4lane:
12649; CHECK-SD:       ; %bb.0:
12650; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12651; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12652; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12653; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12654; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
12655; CHECK-SD-NEXT:    ret
12656;
12657; CHECK-GI-LABEL: test_v2f32_post_imm_st4lane:
12658; CHECK-GI:       ; %bb.0:
12659; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12660; CHECK-GI-NEXT:    mov x8, x0
12661; CHECK-GI-NEXT:    add x0, x0, #16
12662; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12663; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12664; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12665; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
12666; CHECK-GI-NEXT:    ret
12667  call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
12668  %tmp = getelementptr float, ptr %A, i32 4
12669  ret ptr %tmp
12670}
12671
12672define ptr @test_v2f32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
12673; CHECK-SD-LABEL: test_v2f32_post_reg_st4lane:
12674; CHECK-SD:       ; %bb.0:
12675; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12676; CHECK-SD-NEXT:    lsl x8, x2, #2
12677; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12678; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12679; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12680; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
12681; CHECK-SD-NEXT:    ret
12682;
12683; CHECK-GI-LABEL: test_v2f32_post_reg_st4lane:
12684; CHECK-GI:       ; %bb.0:
12685; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12686; CHECK-GI-NEXT:    mov x8, x0
12687; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
12688; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12689; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12690; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12691; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
12692; CHECK-GI-NEXT:    ret
12693  call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
12694  %tmp = getelementptr float, ptr %A, i64 %inc
12695  ret ptr %tmp
12696}
12697
12698declare void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, ptr)
12699
12700
12701define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
12702; CHECK-SD-LABEL: test_v2f64_post_imm_st4lane:
12703; CHECK-SD:       ; %bb.0:
12704; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12705; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12706; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12707; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12708; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
12709; CHECK-SD-NEXT:    ret
12710;
12711; CHECK-GI-LABEL: test_v2f64_post_imm_st4lane:
12712; CHECK-GI:       ; %bb.0:
12713; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12714; CHECK-GI-NEXT:    mov x8, x0
12715; CHECK-GI-NEXT:    add x0, x0, #32
12716; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12717; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12718; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12719; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
12720; CHECK-GI-NEXT:    ret
12721  call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
12722  %tmp = getelementptr double, ptr %A, i64 4
12723  ret ptr %tmp
12724}
12725
12726define ptr @test_v2f64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
12727; CHECK-SD-LABEL: test_v2f64_post_reg_st4lane:
12728; CHECK-SD:       ; %bb.0:
12729; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12730; CHECK-SD-NEXT:    lsl x8, x2, #3
12731; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12732; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12733; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12734; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
12735; CHECK-SD-NEXT:    ret
12736;
12737; CHECK-GI-LABEL: test_v2f64_post_reg_st4lane:
12738; CHECK-GI:       ; %bb.0:
12739; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12740; CHECK-GI-NEXT:    mov x8, x0
12741; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
12742; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12743; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12744; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12745; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
12746; CHECK-GI-NEXT:    ret
12747  call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
12748  %tmp = getelementptr double, ptr %A, i64 %inc
12749  ret ptr %tmp
12750}
12751
12752declare void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, ptr)
12753
12754
12755define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
12756; CHECK-SD-LABEL: test_v1f64_post_imm_st4lane:
12757; CHECK-SD:       ; %bb.0:
12758; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12759; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12760; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12761; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12762; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
12763; CHECK-SD-NEXT:    ret
12764;
12765; CHECK-GI-LABEL: test_v1f64_post_imm_st4lane:
12766; CHECK-GI:       ; %bb.0:
12767; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12768; CHECK-GI-NEXT:    mov x8, x0
12769; CHECK-GI-NEXT:    add x0, x0, #32
12770; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12771; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12772; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12773; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
12774; CHECK-GI-NEXT:    ret
12775  call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
12776  %tmp = getelementptr double, ptr %A, i64 4
12777  ret ptr %tmp
12778}
12779
12780define ptr @test_v1f64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
12781; CHECK-SD-LABEL: test_v1f64_post_reg_st4lane:
12782; CHECK-SD:       ; %bb.0:
12783; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12784; CHECK-SD-NEXT:    lsl x8, x2, #3
12785; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12786; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12787; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12788; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
12789; CHECK-SD-NEXT:    ret
12790;
12791; CHECK-GI-LABEL: test_v1f64_post_reg_st4lane:
12792; CHECK-GI:       ; %bb.0:
12793; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12794; CHECK-GI-NEXT:    mov x8, x0
12795; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
12796; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12797; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12798; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
12799; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
12800; CHECK-GI-NEXT:    ret
12801  call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
12802  %tmp = getelementptr double, ptr %A, i64 %inc
12803  ret ptr %tmp
12804}
12805
12806declare void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, ptr)
12807
12808define <16 x i8> @test_v16i8_post_imm_ld1r(ptr %bar, ptr %ptr) {
12809; CHECK-SD-LABEL: test_v16i8_post_imm_ld1r:
12810; CHECK-SD:       ; %bb.0:
12811; CHECK-SD-NEXT:    ld1r.16b { v0 }, [x0], #1
12812; CHECK-SD-NEXT:    str x0, [x1]
12813; CHECK-SD-NEXT:    ret
12814;
12815; CHECK-GI-LABEL: test_v16i8_post_imm_ld1r:
12816; CHECK-GI:       ; %bb.0:
12817; CHECK-GI-NEXT:    ldrb w8, [x0], #1
12818; CHECK-GI-NEXT:    str x0, [x1]
12819; CHECK-GI-NEXT:    dup.16b v0, w8
12820; CHECK-GI-NEXT:    ret
12821  %tmp1 = load i8, ptr %bar
12822  %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
12823  %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
12824  %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
12825  %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
12826  %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
12827  %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
12828  %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
12829  %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
12830  %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
12831  %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
12832  %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
12833  %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
12834  %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
12835  %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
12836  %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
12837  %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
12838  %tmp18 = getelementptr i8, ptr %bar, i64 1
12839  store ptr %tmp18, ptr %ptr
12840  ret <16 x i8> %tmp17
12841}
12842
12843define <16 x i8> @test_v16i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
12844; CHECK-SD-LABEL: test_v16i8_post_reg_ld1r:
12845; CHECK-SD:       ; %bb.0:
12846; CHECK-SD-NEXT:    ld1r.16b { v0 }, [x0], x2
12847; CHECK-SD-NEXT:    str x0, [x1]
12848; CHECK-SD-NEXT:    ret
12849;
12850; CHECK-GI-LABEL: test_v16i8_post_reg_ld1r:
12851; CHECK-GI:       ; %bb.0:
12852; CHECK-GI-NEXT:    ld1r.16b { v0 }, [x0]
12853; CHECK-GI-NEXT:    add x8, x0, x2
12854; CHECK-GI-NEXT:    str x8, [x1]
12855; CHECK-GI-NEXT:    ret
12856  %tmp1 = load i8, ptr %bar
12857  %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
12858  %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
12859  %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
12860  %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
12861  %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
12862  %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
12863  %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
12864  %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
12865  %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
12866  %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
12867  %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
12868  %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
12869  %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
12870  %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
12871  %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
12872  %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
12873  %tmp18 = getelementptr i8, ptr %bar, i64 %inc
12874  store ptr %tmp18, ptr %ptr
12875  ret <16 x i8> %tmp17
12876}
12877
12878define <8 x i8> @test_v8i8_post_imm_ld1r(ptr %bar, ptr %ptr) {
12879; CHECK-SD-LABEL: test_v8i8_post_imm_ld1r:
12880; CHECK-SD:       ; %bb.0:
12881; CHECK-SD-NEXT:    ld1r.8b { v0 }, [x0], #1
12882; CHECK-SD-NEXT:    str x0, [x1]
12883; CHECK-SD-NEXT:    ret
12884;
12885; CHECK-GI-LABEL: test_v8i8_post_imm_ld1r:
12886; CHECK-GI:       ; %bb.0:
12887; CHECK-GI-NEXT:    ldrb w8, [x0], #1
12888; CHECK-GI-NEXT:    str x0, [x1]
12889; CHECK-GI-NEXT:    dup.8b v0, w8
12890; CHECK-GI-NEXT:    ret
12891  %tmp1 = load i8, ptr %bar
12892  %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
12893  %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
12894  %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
12895  %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
12896  %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
12897  %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
12898  %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
12899  %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
12900  %tmp10 = getelementptr i8, ptr %bar, i64 1
12901  store ptr %tmp10, ptr %ptr
12902  ret <8 x i8> %tmp9
12903}
12904
12905define <8 x i8> @test_v8i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
12906; CHECK-SD-LABEL: test_v8i8_post_reg_ld1r:
12907; CHECK-SD:       ; %bb.0:
12908; CHECK-SD-NEXT:    ld1r.8b { v0 }, [x0], x2
12909; CHECK-SD-NEXT:    str x0, [x1]
12910; CHECK-SD-NEXT:    ret
12911;
12912; CHECK-GI-LABEL: test_v8i8_post_reg_ld1r:
12913; CHECK-GI:       ; %bb.0:
12914; CHECK-GI-NEXT:    ld1r.8b { v0 }, [x0]
12915; CHECK-GI-NEXT:    add x8, x0, x2
12916; CHECK-GI-NEXT:    str x8, [x1]
12917; CHECK-GI-NEXT:    ret
12918  %tmp1 = load i8, ptr %bar
12919  %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
12920  %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
12921  %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
12922  %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
12923  %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
12924  %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
12925  %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
12926  %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
12927  %tmp10 = getelementptr i8, ptr %bar, i64 %inc
12928  store ptr %tmp10, ptr %ptr
12929  ret <8 x i8> %tmp9
12930}
12931
12932define <8 x i16> @test_v8i16_post_imm_ld1r(ptr %bar, ptr %ptr) {
12933; CHECK-SD-LABEL: test_v8i16_post_imm_ld1r:
12934; CHECK-SD:       ; %bb.0:
12935; CHECK-SD-NEXT:    ld1r.8h { v0 }, [x0], #2
12936; CHECK-SD-NEXT:    str x0, [x1]
12937; CHECK-SD-NEXT:    ret
12938;
12939; CHECK-GI-LABEL: test_v8i16_post_imm_ld1r:
12940; CHECK-GI:       ; %bb.0:
12941; CHECK-GI-NEXT:    ldrh w8, [x0], #2
12942; CHECK-GI-NEXT:    str x0, [x1]
12943; CHECK-GI-NEXT:    dup.8h v0, w8
12944; CHECK-GI-NEXT:    ret
12945  %tmp1 = load i16, ptr %bar
12946  %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
12947  %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
12948  %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
12949  %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
12950  %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
12951  %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
12952  %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
12953  %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
12954  %tmp10 = getelementptr i16, ptr %bar, i64 1
12955  store ptr %tmp10, ptr %ptr
12956  ret <8 x i16> %tmp9
12957}
12958
12959define <8 x i16> @test_v8i16_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
12960; CHECK-SD-LABEL: test_v8i16_post_reg_ld1r:
12961; CHECK-SD:       ; %bb.0:
12962; CHECK-SD-NEXT:    lsl x8, x2, #1
12963; CHECK-SD-NEXT:    ld1r.8h { v0 }, [x0], x8
12964; CHECK-SD-NEXT:    str x0, [x1]
12965; CHECK-SD-NEXT:    ret
12966;
12967; CHECK-GI-LABEL: test_v8i16_post_reg_ld1r:
12968; CHECK-GI:       ; %bb.0:
12969; CHECK-GI-NEXT:    ld1r.8h { v0 }, [x0]
12970; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
12971; CHECK-GI-NEXT:    str x8, [x1]
12972; CHECK-GI-NEXT:    ret
12973  %tmp1 = load i16, ptr %bar
12974  %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
12975  %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
12976  %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
12977  %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
12978  %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
12979  %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
12980  %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
12981  %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
12982  %tmp10 = getelementptr i16, ptr %bar, i64 %inc
12983  store ptr %tmp10, ptr %ptr
12984  ret <8 x i16> %tmp9
12985}
12986
12987define <4 x i16> @test_v4i16_post_imm_ld1r(ptr %bar, ptr %ptr) {
12988; CHECK-SD-LABEL: test_v4i16_post_imm_ld1r:
12989; CHECK-SD:       ; %bb.0:
12990; CHECK-SD-NEXT:    ld1r.4h { v0 }, [x0], #2
12991; CHECK-SD-NEXT:    str x0, [x1]
12992; CHECK-SD-NEXT:    ret
12993;
12994; CHECK-GI-LABEL: test_v4i16_post_imm_ld1r:
12995; CHECK-GI:       ; %bb.0:
12996; CHECK-GI-NEXT:    ldrh w8, [x0], #2
12997; CHECK-GI-NEXT:    str x0, [x1]
12998; CHECK-GI-NEXT:    dup.4h v0, w8
12999; CHECK-GI-NEXT:    ret
13000  %tmp1 = load i16, ptr %bar
13001  %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
13002  %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
13003  %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
13004  %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
13005  %tmp6 = getelementptr i16, ptr %bar, i64 1
13006  store ptr %tmp6, ptr %ptr
13007  ret <4 x i16> %tmp5
13008}
13009
13010define <4 x i16> @test_v4i16_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
13011; CHECK-SD-LABEL: test_v4i16_post_reg_ld1r:
13012; CHECK-SD:       ; %bb.0:
13013; CHECK-SD-NEXT:    lsl x8, x2, #1
13014; CHECK-SD-NEXT:    ld1r.4h { v0 }, [x0], x8
13015; CHECK-SD-NEXT:    str x0, [x1]
13016; CHECK-SD-NEXT:    ret
13017;
13018; CHECK-GI-LABEL: test_v4i16_post_reg_ld1r:
13019; CHECK-GI:       ; %bb.0:
13020; CHECK-GI-NEXT:    ld1r.4h { v0 }, [x0]
13021; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
13022; CHECK-GI-NEXT:    str x8, [x1]
13023; CHECK-GI-NEXT:    ret
13024  %tmp1 = load i16, ptr %bar
13025  %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
13026  %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
13027  %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
13028  %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
13029  %tmp6 = getelementptr i16, ptr %bar, i64 %inc
13030  store ptr %tmp6, ptr %ptr
13031  ret <4 x i16> %tmp5
13032}
13033
13034define <4 x i32> @test_v4i32_post_imm_ld1r(ptr %bar, ptr %ptr) {
13035; CHECK-SD-LABEL: test_v4i32_post_imm_ld1r:
13036; CHECK-SD:       ; %bb.0:
13037; CHECK-SD-NEXT:    ld1r.4s { v0 }, [x0], #4
13038; CHECK-SD-NEXT:    str x0, [x1]
13039; CHECK-SD-NEXT:    ret
13040;
13041; CHECK-GI-LABEL: test_v4i32_post_imm_ld1r:
13042; CHECK-GI:       ; %bb.0:
13043; CHECK-GI-NEXT:    ldr w8, [x0], #4
13044; CHECK-GI-NEXT:    str x0, [x1]
13045; CHECK-GI-NEXT:    dup.4s v0, w8
13046; CHECK-GI-NEXT:    ret
13047  %tmp1 = load i32, ptr %bar
13048  %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
13049  %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
13050  %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
13051  %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
13052  %tmp6 = getelementptr i32, ptr %bar, i64 1
13053  store ptr %tmp6, ptr %ptr
13054  ret <4 x i32> %tmp5
13055}
13056
13057define <4 x i32> @test_v4i32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
13058; CHECK-SD-LABEL: test_v4i32_post_reg_ld1r:
13059; CHECK-SD:       ; %bb.0:
13060; CHECK-SD-NEXT:    lsl x8, x2, #2
13061; CHECK-SD-NEXT:    ld1r.4s { v0 }, [x0], x8
13062; CHECK-SD-NEXT:    str x0, [x1]
13063; CHECK-SD-NEXT:    ret
13064;
13065; CHECK-GI-LABEL: test_v4i32_post_reg_ld1r:
13066; CHECK-GI:       ; %bb.0:
13067; CHECK-GI-NEXT:    ld1r.4s { v0 }, [x0]
13068; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
13069; CHECK-GI-NEXT:    str x8, [x1]
13070; CHECK-GI-NEXT:    ret
13071  %tmp1 = load i32, ptr %bar
13072  %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
13073  %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
13074  %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
13075  %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
13076  %tmp6 = getelementptr i32, ptr %bar, i64 %inc
13077  store ptr %tmp6, ptr %ptr
13078  ret <4 x i32> %tmp5
13079}
13080
13081define <2 x i32> @test_v2i32_post_imm_ld1r(ptr %bar, ptr %ptr) {
13082; CHECK-SD-LABEL: test_v2i32_post_imm_ld1r:
13083; CHECK-SD:       ; %bb.0:
13084; CHECK-SD-NEXT:    ld1r.2s { v0 }, [x0], #4
13085; CHECK-SD-NEXT:    str x0, [x1]
13086; CHECK-SD-NEXT:    ret
13087;
13088; CHECK-GI-LABEL: test_v2i32_post_imm_ld1r:
13089; CHECK-GI:       ; %bb.0:
13090; CHECK-GI-NEXT:    ldr w8, [x0], #4
13091; CHECK-GI-NEXT:    str x0, [x1]
13092; CHECK-GI-NEXT:    dup.2s v0, w8
13093; CHECK-GI-NEXT:    ret
13094  %tmp1 = load i32, ptr %bar
13095  %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
13096  %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
13097  %tmp4 = getelementptr i32, ptr %bar, i64 1
13098  store ptr %tmp4, ptr %ptr
13099  ret <2 x i32> %tmp3
13100}
13101
13102define <2 x i32> @test_v2i32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
13103; CHECK-SD-LABEL: test_v2i32_post_reg_ld1r:
13104; CHECK-SD:       ; %bb.0:
13105; CHECK-SD-NEXT:    lsl x8, x2, #2
13106; CHECK-SD-NEXT:    ld1r.2s { v0 }, [x0], x8
13107; CHECK-SD-NEXT:    str x0, [x1]
13108; CHECK-SD-NEXT:    ret
13109;
13110; CHECK-GI-LABEL: test_v2i32_post_reg_ld1r:
13111; CHECK-GI:       ; %bb.0:
13112; CHECK-GI-NEXT:    ld1r.2s { v0 }, [x0]
13113; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
13114; CHECK-GI-NEXT:    str x8, [x1]
13115; CHECK-GI-NEXT:    ret
13116  %tmp1 = load i32, ptr %bar
13117  %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
13118  %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
13119  %tmp4 = getelementptr i32, ptr %bar, i64 %inc
13120  store ptr %tmp4, ptr %ptr
13121  ret <2 x i32> %tmp3
13122}
13123
13124define <2 x i64> @test_v2i64_post_imm_ld1r(ptr %bar, ptr %ptr) {
13125; CHECK-SD-LABEL: test_v2i64_post_imm_ld1r:
13126; CHECK-SD:       ; %bb.0:
13127; CHECK-SD-NEXT:    ld1r.2d { v0 }, [x0], #8
13128; CHECK-SD-NEXT:    str x0, [x1]
13129; CHECK-SD-NEXT:    ret
13130;
13131; CHECK-GI-LABEL: test_v2i64_post_imm_ld1r:
13132; CHECK-GI:       ; %bb.0:
13133; CHECK-GI-NEXT:    ldr x8, [x0], #8
13134; CHECK-GI-NEXT:    str x0, [x1]
13135; CHECK-GI-NEXT:    dup.2d v0, x8
13136; CHECK-GI-NEXT:    ret
13137  %tmp1 = load i64, ptr %bar
13138  %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
13139  %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
13140  %tmp4 = getelementptr i64, ptr %bar, i64 1
13141  store ptr %tmp4, ptr %ptr
13142  ret <2 x i64> %tmp3
13143}
13144
13145define <2 x i64> @test_v2i64_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
13146; CHECK-SD-LABEL: test_v2i64_post_reg_ld1r:
13147; CHECK-SD:       ; %bb.0:
13148; CHECK-SD-NEXT:    lsl x8, x2, #3
13149; CHECK-SD-NEXT:    ld1r.2d { v0 }, [x0], x8
13150; CHECK-SD-NEXT:    str x0, [x1]
13151; CHECK-SD-NEXT:    ret
13152;
13153; CHECK-GI-LABEL: test_v2i64_post_reg_ld1r:
13154; CHECK-GI:       ; %bb.0:
13155; CHECK-GI-NEXT:    ld1r.2d { v0 }, [x0]
13156; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
13157; CHECK-GI-NEXT:    str x8, [x1]
13158; CHECK-GI-NEXT:    ret
13159  %tmp1 = load i64, ptr %bar
13160  %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
13161  %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
13162  %tmp4 = getelementptr i64, ptr %bar, i64 %inc
13163  store ptr %tmp4, ptr %ptr
13164  ret <2 x i64> %tmp3
13165}
13166
13167define <4 x float> @test_v4f32_post_imm_ld1r(ptr %bar, ptr %ptr) {
13168; CHECK-SD-LABEL: test_v4f32_post_imm_ld1r:
13169; CHECK-SD:       ; %bb.0:
13170; CHECK-SD-NEXT:    ld1r.4s { v0 }, [x0], #4
13171; CHECK-SD-NEXT:    str x0, [x1]
13172; CHECK-SD-NEXT:    ret
13173;
13174; CHECK-GI-LABEL: test_v4f32_post_imm_ld1r:
13175; CHECK-GI:       ; %bb.0:
13176; CHECK-GI-NEXT:    ldr s0, [x0], #4
13177; CHECK-GI-NEXT:    str x0, [x1]
13178; CHECK-GI-NEXT:    dup.4s v0, v0[0]
13179; CHECK-GI-NEXT:    ret
13180  %tmp1 = load float, ptr %bar
13181  %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
13182  %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
13183  %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
13184  %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
13185  %tmp6 = getelementptr float, ptr %bar, i64 1
13186  store ptr %tmp6, ptr %ptr
13187  ret <4 x float> %tmp5
13188}
13189
13190define <4 x float> @test_v4f32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
13191; CHECK-SD-LABEL: test_v4f32_post_reg_ld1r:
13192; CHECK-SD:       ; %bb.0:
13193; CHECK-SD-NEXT:    lsl x8, x2, #2
13194; CHECK-SD-NEXT:    ld1r.4s { v0 }, [x0], x8
13195; CHECK-SD-NEXT:    str x0, [x1]
13196; CHECK-SD-NEXT:    ret
13197;
13198; CHECK-GI-LABEL: test_v4f32_post_reg_ld1r:
13199; CHECK-GI:       ; %bb.0:
13200; CHECK-GI-NEXT:    ld1r.4s { v0 }, [x0]
13201; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
13202; CHECK-GI-NEXT:    str x8, [x1]
13203; CHECK-GI-NEXT:    ret
13204  %tmp1 = load float, ptr %bar
13205  %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
13206  %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
13207  %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
13208  %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
13209  %tmp6 = getelementptr float, ptr %bar, i64 %inc
13210  store ptr %tmp6, ptr %ptr
13211  ret <4 x float> %tmp5
13212}
13213
13214define <2 x float> @test_v2f32_post_imm_ld1r(ptr %bar, ptr %ptr) {
13215; CHECK-SD-LABEL: test_v2f32_post_imm_ld1r:
13216; CHECK-SD:       ; %bb.0:
13217; CHECK-SD-NEXT:    ld1r.2s { v0 }, [x0], #4
13218; CHECK-SD-NEXT:    str x0, [x1]
13219; CHECK-SD-NEXT:    ret
13220;
13221; CHECK-GI-LABEL: test_v2f32_post_imm_ld1r:
13222; CHECK-GI:       ; %bb.0:
13223; CHECK-GI-NEXT:    ldr s0, [x0], #4
13224; CHECK-GI-NEXT:    str x0, [x1]
13225; CHECK-GI-NEXT:    dup.2s v0, v0[0]
13226; CHECK-GI-NEXT:    ret
13227  %tmp1 = load float, ptr %bar
13228  %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
13229  %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
13230  %tmp4 = getelementptr float, ptr %bar, i64 1
13231  store ptr %tmp4, ptr %ptr
13232  ret <2 x float> %tmp3
13233}
13234
13235define <2 x float> @test_v2f32_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
13236; CHECK-SD-LABEL: test_v2f32_post_reg_ld1r:
13237; CHECK-SD:       ; %bb.0:
13238; CHECK-SD-NEXT:    lsl x8, x2, #2
13239; CHECK-SD-NEXT:    ld1r.2s { v0 }, [x0], x8
13240; CHECK-SD-NEXT:    str x0, [x1]
13241; CHECK-SD-NEXT:    ret
13242;
13243; CHECK-GI-LABEL: test_v2f32_post_reg_ld1r:
13244; CHECK-GI:       ; %bb.0:
13245; CHECK-GI-NEXT:    ld1r.2s { v0 }, [x0]
13246; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
13247; CHECK-GI-NEXT:    str x8, [x1]
13248; CHECK-GI-NEXT:    ret
13249  %tmp1 = load float, ptr %bar
13250  %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
13251  %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
13252  %tmp4 = getelementptr float, ptr %bar, i64 %inc
13253  store ptr %tmp4, ptr %ptr
13254  ret <2 x float> %tmp3
13255}
13256
13257define <2 x double> @test_v2f64_post_imm_ld1r(ptr %bar, ptr %ptr) {
13258; CHECK-SD-LABEL: test_v2f64_post_imm_ld1r:
13259; CHECK-SD:       ; %bb.0:
13260; CHECK-SD-NEXT:    ld1r.2d { v0 }, [x0], #8
13261; CHECK-SD-NEXT:    str x0, [x1]
13262; CHECK-SD-NEXT:    ret
13263;
13264; CHECK-GI-LABEL: test_v2f64_post_imm_ld1r:
13265; CHECK-GI:       ; %bb.0:
13266; CHECK-GI-NEXT:    ldr d0, [x0], #8
13267; CHECK-GI-NEXT:    str x0, [x1]
13268; CHECK-GI-NEXT:    dup.2d v0, v0[0]
13269; CHECK-GI-NEXT:    ret
13270  %tmp1 = load double, ptr %bar
13271  %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
13272  %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
13273  %tmp4 = getelementptr double, ptr %bar, i64 1
13274  store ptr %tmp4, ptr %ptr
13275  ret <2 x double> %tmp3
13276}
13277
13278define <2 x double> @test_v2f64_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
13279; CHECK-SD-LABEL: test_v2f64_post_reg_ld1r:
13280; CHECK-SD:       ; %bb.0:
13281; CHECK-SD-NEXT:    lsl x8, x2, #3
13282; CHECK-SD-NEXT:    ld1r.2d { v0 }, [x0], x8
13283; CHECK-SD-NEXT:    str x0, [x1]
13284; CHECK-SD-NEXT:    ret
13285;
13286; CHECK-GI-LABEL: test_v2f64_post_reg_ld1r:
13287; CHECK-GI:       ; %bb.0:
13288; CHECK-GI-NEXT:    ld1r.2d { v0 }, [x0]
13289; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
13290; CHECK-GI-NEXT:    str x8, [x1]
13291; CHECK-GI-NEXT:    ret
13292  %tmp1 = load double, ptr %bar
13293  %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
13294  %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
13295  %tmp4 = getelementptr double, ptr %bar, i64 %inc
13296  store ptr %tmp4, ptr %ptr
13297  ret <2 x double> %tmp3
13298}
13299
13300define <16 x i8> @test_v16i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <16 x i8> %A) {
13301; CHECK-SD-LABEL: test_v16i8_post_imm_ld1lane:
13302; CHECK-SD:       ; %bb.0:
13303; CHECK-SD-NEXT:    ld1.b { v0 }[1], [x0], #1
13304; CHECK-SD-NEXT:    str x0, [x1]
13305; CHECK-SD-NEXT:    ret
13306;
13307; CHECK-GI-LABEL: test_v16i8_post_imm_ld1lane:
13308; CHECK-GI:       ; %bb.0:
13309; CHECK-GI-NEXT:    ldrb w8, [x0], #1
13310; CHECK-GI-NEXT:    str x0, [x1]
13311; CHECK-GI-NEXT:    mov.b v0[1], w8
13312; CHECK-GI-NEXT:    ret
13313  %tmp1 = load i8, ptr %bar
13314  %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
13315  %tmp3 = getelementptr i8, ptr %bar, i64 1
13316  store ptr %tmp3, ptr %ptr
13317  ret <16 x i8> %tmp2
13318}
13319
13320define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16 x i8> %A) {
13321; CHECK-SD-LABEL: test_v16i8_post_reg_ld1lane:
13322; CHECK-SD:       ; %bb.0:
13323; CHECK-SD-NEXT:    ld1.b { v0 }[1], [x0], x2
13324; CHECK-SD-NEXT:    str x0, [x1]
13325; CHECK-SD-NEXT:    ret
13326;
13327; CHECK-GI-LABEL: test_v16i8_post_reg_ld1lane:
13328; CHECK-GI:       ; %bb.0:
13329; CHECK-GI-NEXT:    ldr b1, [x0]
13330; CHECK-GI-NEXT:    add x8, x0, x2
13331; CHECK-GI-NEXT:    str x8, [x1]
13332; CHECK-GI-NEXT:    mov.b v0[1], v1[0]
13333; CHECK-GI-NEXT:    ret
13334  %tmp1 = load i8, ptr %bar
13335  %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
13336  %tmp3 = getelementptr i8, ptr %bar, i64 %inc
13337  store ptr %tmp3, ptr %ptr
13338  ret <16 x i8> %tmp2
13339}
13340
13341define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) {
13342; CHECK-SD-LABEL: test_v8i8_post_imm_ld1lane:
13343; CHECK-SD:       ; %bb.0:
13344; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
13345; CHECK-SD-NEXT:    ld1.b { v0 }[1], [x0], #1
13346; CHECK-SD-NEXT:    str x0, [x1]
13347; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13348; CHECK-SD-NEXT:    ret
13349;
13350; CHECK-GI-LABEL: test_v8i8_post_imm_ld1lane:
13351; CHECK-GI:       ; %bb.0:
13352; CHECK-GI-NEXT:    ldrb w8, [x0], #1
13353; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
13354; CHECK-GI-NEXT:    str x0, [x1]
13355; CHECK-GI-NEXT:    mov.b v0[1], w8
13356; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13357; CHECK-GI-NEXT:    ret
13358  %tmp1 = load i8, ptr %bar
13359  %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
13360  %tmp3 = getelementptr i8, ptr %bar, i64 1
13361  store ptr %tmp3, ptr %ptr
13362  ret <8 x i8> %tmp2
13363}
13364
13365define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i8> %A) {
13366; CHECK-SD-LABEL: test_v8i8_post_reg_ld1lane:
13367; CHECK-SD:       ; %bb.0:
13368; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
13369; CHECK-SD-NEXT:    ld1.b { v0 }[1], [x0], x2
13370; CHECK-SD-NEXT:    str x0, [x1]
13371; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13372; CHECK-SD-NEXT:    ret
13373;
13374; CHECK-GI-LABEL: test_v8i8_post_reg_ld1lane:
13375; CHECK-GI:       ; %bb.0:
13376; CHECK-GI-NEXT:    ldr b1, [x0]
13377; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
13378; CHECK-GI-NEXT:    add x8, x0, x2
13379; CHECK-GI-NEXT:    str x8, [x1]
13380; CHECK-GI-NEXT:    mov.b v0[1], v1[0]
13381; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13382; CHECK-GI-NEXT:    ret
13383  %tmp1 = load i8, ptr %bar
13384  %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
13385  %tmp3 = getelementptr i8, ptr %bar, i64 %inc
13386  store ptr %tmp3, ptr %ptr
13387  ret <8 x i8> %tmp2
13388}
13389
13390define <8 x i16> @test_v8i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i16> %A) {
13391; CHECK-SD-LABEL: test_v8i16_post_imm_ld1lane:
13392; CHECK-SD:       ; %bb.0:
13393; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], #2
13394; CHECK-SD-NEXT:    str x0, [x1]
13395; CHECK-SD-NEXT:    ret
13396;
13397; CHECK-GI-LABEL: test_v8i16_post_imm_ld1lane:
13398; CHECK-GI:       ; %bb.0:
13399; CHECK-GI-NEXT:    ldrh w8, [x0], #2
13400; CHECK-GI-NEXT:    str x0, [x1]
13401; CHECK-GI-NEXT:    mov.h v0[1], w8
13402; CHECK-GI-NEXT:    ret
13403  %tmp1 = load i16, ptr %bar
13404  %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
13405  %tmp3 = getelementptr i16, ptr %bar, i64 1
13406  store ptr %tmp3, ptr %ptr
13407  ret <8 x i16> %tmp2
13408}
13409
13410define <8 x i16> @test_v8i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i16> %A) {
13411; CHECK-SD-LABEL: test_v8i16_post_reg_ld1lane:
13412; CHECK-SD:       ; %bb.0:
13413; CHECK-SD-NEXT:    lsl x8, x2, #1
13414; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], x8
13415; CHECK-SD-NEXT:    str x0, [x1]
13416; CHECK-SD-NEXT:    ret
13417;
13418; CHECK-GI-LABEL: test_v8i16_post_reg_ld1lane:
13419; CHECK-GI:       ; %bb.0:
13420; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
13421; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
13422; CHECK-GI-NEXT:    str x8, [x1]
13423; CHECK-GI-NEXT:    ret
13424  %tmp1 = load i16, ptr %bar
13425  %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
13426  %tmp3 = getelementptr i16, ptr %bar, i64 %inc
13427  store ptr %tmp3, ptr %ptr
13428  ret <8 x i16> %tmp2
13429}
13430
13431define <4 x i16> @test_v4i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i16> %A) {
13432; CHECK-SD-LABEL: test_v4i16_post_imm_ld1lane:
13433; CHECK-SD:       ; %bb.0:
13434; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
13435; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], #2
13436; CHECK-SD-NEXT:    str x0, [x1]
13437; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13438; CHECK-SD-NEXT:    ret
13439;
13440; CHECK-GI-LABEL: test_v4i16_post_imm_ld1lane:
13441; CHECK-GI:       ; %bb.0:
13442; CHECK-GI-NEXT:    ldrh w8, [x0], #2
13443; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
13444; CHECK-GI-NEXT:    str x0, [x1]
13445; CHECK-GI-NEXT:    mov.h v0[1], w8
13446; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13447; CHECK-GI-NEXT:    ret
13448  %tmp1 = load i16, ptr %bar
13449  %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
13450  %tmp3 = getelementptr i16, ptr %bar, i64 1
13451  store ptr %tmp3, ptr %ptr
13452  ret <4 x i16> %tmp2
13453}
13454
13455define <4 x i16> @test_v4i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x i16> %A) {
13456; CHECK-SD-LABEL: test_v4i16_post_reg_ld1lane:
13457; CHECK-SD:       ; %bb.0:
13458; CHECK-SD-NEXT:    lsl x8, x2, #1
13459; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
13460; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], x8
13461; CHECK-SD-NEXT:    str x0, [x1]
13462; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13463; CHECK-SD-NEXT:    ret
13464;
13465; CHECK-GI-LABEL: test_v4i16_post_reg_ld1lane:
13466; CHECK-GI:       ; %bb.0:
13467; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
13468; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
13469; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
13470; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13471; CHECK-GI-NEXT:    str x8, [x1]
13472; CHECK-GI-NEXT:    ret
13473  %tmp1 = load i16, ptr %bar
13474  %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
13475  %tmp3 = getelementptr i16, ptr %bar, i64 %inc
13476  store ptr %tmp3, ptr %ptr
13477  ret <4 x i16> %tmp2
13478}
13479
13480define <4 x i32> @test_v4i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i32> %A) {
13481; CHECK-SD-LABEL: test_v4i32_post_imm_ld1lane:
13482; CHECK-SD:       ; %bb.0:
13483; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], #4
13484; CHECK-SD-NEXT:    str x0, [x1]
13485; CHECK-SD-NEXT:    ret
13486;
13487; CHECK-GI-LABEL: test_v4i32_post_imm_ld1lane:
13488; CHECK-GI:       ; %bb.0:
13489; CHECK-GI-NEXT:    ldr w8, [x0], #4
13490; CHECK-GI-NEXT:    str x0, [x1]
13491; CHECK-GI-NEXT:    mov.s v0[1], w8
13492; CHECK-GI-NEXT:    ret
13493  %tmp1 = load i32, ptr %bar
13494  %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
13495  %tmp3 = getelementptr i32, ptr %bar, i64 1
13496  store ptr %tmp3, ptr %ptr
13497  ret <4 x i32> %tmp2
13498}
13499
13500define <4 x i32> @test_v4i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x i32> %A) {
13501; CHECK-SD-LABEL: test_v4i32_post_reg_ld1lane:
13502; CHECK-SD:       ; %bb.0:
13503; CHECK-SD-NEXT:    lsl x8, x2, #2
13504; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], x8
13505; CHECK-SD-NEXT:    str x0, [x1]
13506; CHECK-SD-NEXT:    ret
13507;
13508; CHECK-GI-LABEL: test_v4i32_post_reg_ld1lane:
13509; CHECK-GI:       ; %bb.0:
13510; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
13511; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
13512; CHECK-GI-NEXT:    str x8, [x1]
13513; CHECK-GI-NEXT:    ret
13514  %tmp1 = load i32, ptr %bar
13515  %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
13516  %tmp3 = getelementptr i32, ptr %bar, i64 %inc
13517  store ptr %tmp3, ptr %ptr
13518  ret <4 x i32> %tmp2
13519}
13520
13521define <2 x i32> @test_v2i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i32> %A) {
13522; CHECK-SD-LABEL: test_v2i32_post_imm_ld1lane:
13523; CHECK-SD:       ; %bb.0:
13524; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
13525; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], #4
13526; CHECK-SD-NEXT:    str x0, [x1]
13527; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13528; CHECK-SD-NEXT:    ret
13529;
13530; CHECK-GI-LABEL: test_v2i32_post_imm_ld1lane:
13531; CHECK-GI:       ; %bb.0:
13532; CHECK-GI-NEXT:    ldr w8, [x0], #4
13533; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
13534; CHECK-GI-NEXT:    str x0, [x1]
13535; CHECK-GI-NEXT:    mov.s v0[1], w8
13536; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13537; CHECK-GI-NEXT:    ret
13538  %tmp1 = load i32, ptr %bar
13539  %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
13540  %tmp3 = getelementptr i32, ptr %bar, i64 1
13541  store ptr %tmp3, ptr %ptr
13542  ret <2 x i32> %tmp2
13543}
13544
13545define <2 x i32> @test_v2i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x i32> %A) {
13546; CHECK-SD-LABEL: test_v2i32_post_reg_ld1lane:
13547; CHECK-SD:       ; %bb.0:
13548; CHECK-SD-NEXT:    lsl x8, x2, #2
13549; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
13550; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], x8
13551; CHECK-SD-NEXT:    str x0, [x1]
13552; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13553; CHECK-SD-NEXT:    ret
13554;
13555; CHECK-GI-LABEL: test_v2i32_post_reg_ld1lane:
13556; CHECK-GI:       ; %bb.0:
13557; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
13558; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
13559; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
13560; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13561; CHECK-GI-NEXT:    str x8, [x1]
13562; CHECK-GI-NEXT:    ret
13563  %tmp1 = load i32, ptr %bar
13564  %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
13565  %tmp3 = getelementptr i32, ptr %bar, i64 %inc
13566  store ptr %tmp3, ptr %ptr
13567  ret <2 x i32> %tmp2
13568}
13569
13570define <2 x i64> @test_v2i64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i64> %A) {
13571; CHECK-SD-LABEL: test_v2i64_post_imm_ld1lane:
13572; CHECK-SD:       ; %bb.0:
13573; CHECK-SD-NEXT:    ld1.d { v0 }[1], [x0], #8
13574; CHECK-SD-NEXT:    str x0, [x1]
13575; CHECK-SD-NEXT:    ret
13576;
13577; CHECK-GI-LABEL: test_v2i64_post_imm_ld1lane:
13578; CHECK-GI:       ; %bb.0:
13579; CHECK-GI-NEXT:    ldr x8, [x0], #8
13580; CHECK-GI-NEXT:    str x0, [x1]
13581; CHECK-GI-NEXT:    mov.d v0[1], x8
13582; CHECK-GI-NEXT:    ret
13583  %tmp1 = load i64, ptr %bar
13584  %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
13585  %tmp3 = getelementptr i64, ptr %bar, i64 1
13586  store ptr %tmp3, ptr %ptr
13587  ret <2 x i64> %tmp2
13588}
13589
13590define <2 x i64> @test_v2i64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x i64> %A) {
13591; CHECK-SD-LABEL: test_v2i64_post_reg_ld1lane:
13592; CHECK-SD:       ; %bb.0:
13593; CHECK-SD-NEXT:    lsl x8, x2, #3
13594; CHECK-SD-NEXT:    ld1.d { v0 }[1], [x0], x8
13595; CHECK-SD-NEXT:    str x0, [x1]
13596; CHECK-SD-NEXT:    ret
13597;
13598; CHECK-GI-LABEL: test_v2i64_post_reg_ld1lane:
13599; CHECK-GI:       ; %bb.0:
13600; CHECK-GI-NEXT:    ld1.d { v0 }[1], [x0]
13601; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
13602; CHECK-GI-NEXT:    str x8, [x1]
13603; CHECK-GI-NEXT:    ret
13604  %tmp1 = load i64, ptr %bar
13605  %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
13606  %tmp3 = getelementptr i64, ptr %bar, i64 %inc
13607  store ptr %tmp3, ptr %ptr
13608  ret <2 x i64> %tmp2
13609}
13610
13611define <4 x float> @test_v4f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x float> %A) {
13612; CHECK-SD-LABEL: test_v4f32_post_imm_ld1lane:
13613; CHECK-SD:       ; %bb.0:
13614; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], #4
13615; CHECK-SD-NEXT:    str x0, [x1]
13616; CHECK-SD-NEXT:    ret
13617;
13618; CHECK-GI-LABEL: test_v4f32_post_imm_ld1lane:
13619; CHECK-GI:       ; %bb.0:
13620; CHECK-GI-NEXT:    ldr s1, [x0], #4
13621; CHECK-GI-NEXT:    str x0, [x1]
13622; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
13623; CHECK-GI-NEXT:    ret
13624  %tmp1 = load float, ptr %bar
13625  %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
13626  %tmp3 = getelementptr float, ptr %bar, i64 1
13627  store ptr %tmp3, ptr %ptr
13628  ret <4 x float> %tmp2
13629}
13630
13631define <4 x float> @test_v4f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x float> %A) {
13632; CHECK-SD-LABEL: test_v4f32_post_reg_ld1lane:
13633; CHECK-SD:       ; %bb.0:
13634; CHECK-SD-NEXT:    lsl x8, x2, #2
13635; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], x8
13636; CHECK-SD-NEXT:    str x0, [x1]
13637; CHECK-SD-NEXT:    ret
13638;
13639; CHECK-GI-LABEL: test_v4f32_post_reg_ld1lane:
13640; CHECK-GI:       ; %bb.0:
13641; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
13642; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
13643; CHECK-GI-NEXT:    str x8, [x1]
13644; CHECK-GI-NEXT:    ret
13645  %tmp1 = load float, ptr %bar
13646  %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
13647  %tmp3 = getelementptr float, ptr %bar, i64 %inc
13648  store ptr %tmp3, ptr %ptr
13649  ret <4 x float> %tmp2
13650}
13651
13652define <2 x float> @test_v2f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x float> %A) {
13653; CHECK-SD-LABEL: test_v2f32_post_imm_ld1lane:
13654; CHECK-SD:       ; %bb.0:
13655; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
13656; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], #4
13657; CHECK-SD-NEXT:    str x0, [x1]
13658; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13659; CHECK-SD-NEXT:    ret
13660;
13661; CHECK-GI-LABEL: test_v2f32_post_imm_ld1lane:
13662; CHECK-GI:       ; %bb.0:
13663; CHECK-GI-NEXT:    ldr s1, [x0], #4
13664; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
13665; CHECK-GI-NEXT:    str x0, [x1]
13666; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
13667; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13668; CHECK-GI-NEXT:    ret
13669  %tmp1 = load float, ptr %bar
13670  %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
13671  %tmp3 = getelementptr float, ptr %bar, i64 1
13672  store ptr %tmp3, ptr %ptr
13673  ret <2 x float> %tmp2
13674}
13675
13676define <2 x float> @test_v2f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x float> %A) {
13677; CHECK-SD-LABEL: test_v2f32_post_reg_ld1lane:
13678; CHECK-SD:       ; %bb.0:
13679; CHECK-SD-NEXT:    lsl x8, x2, #2
13680; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
13681; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], x8
13682; CHECK-SD-NEXT:    str x0, [x1]
13683; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13684; CHECK-SD-NEXT:    ret
13685;
13686; CHECK-GI-LABEL: test_v2f32_post_reg_ld1lane:
13687; CHECK-GI:       ; %bb.0:
13688; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
13689; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
13690; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
13691; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13692; CHECK-GI-NEXT:    str x8, [x1]
13693; CHECK-GI-NEXT:    ret
13694  %tmp1 = load float, ptr %bar
13695  %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
13696  %tmp3 = getelementptr float, ptr %bar, i64 %inc
13697  store ptr %tmp3, ptr %ptr
13698  ret <2 x float> %tmp2
13699}
13700
13701define <2 x double> @test_v2f64_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x double> %A) {
13702; CHECK-SD-LABEL: test_v2f64_post_imm_ld1lane:
13703; CHECK-SD:       ; %bb.0:
13704; CHECK-SD-NEXT:    ld1.d { v0 }[1], [x0], #8
13705; CHECK-SD-NEXT:    str x0, [x1]
13706; CHECK-SD-NEXT:    ret
13707;
13708; CHECK-GI-LABEL: test_v2f64_post_imm_ld1lane:
13709; CHECK-GI:       ; %bb.0:
13710; CHECK-GI-NEXT:    ldr d1, [x0], #8
13711; CHECK-GI-NEXT:    str x0, [x1]
13712; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
13713; CHECK-GI-NEXT:    ret
13714  %tmp1 = load double, ptr %bar
13715  %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
13716  %tmp3 = getelementptr double, ptr %bar, i64 1
13717  store ptr %tmp3, ptr %ptr
13718  ret <2 x double> %tmp2
13719}
13720
13721define <2 x double> @test_v2f64_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x double> %A) {
13722; CHECK-SD-LABEL: test_v2f64_post_reg_ld1lane:
13723; CHECK-SD:       ; %bb.0:
13724; CHECK-SD-NEXT:    lsl x8, x2, #3
13725; CHECK-SD-NEXT:    ld1.d { v0 }[1], [x0], x8
13726; CHECK-SD-NEXT:    str x0, [x1]
13727; CHECK-SD-NEXT:    ret
13728;
13729; CHECK-GI-LABEL: test_v2f64_post_reg_ld1lane:
13730; CHECK-GI:       ; %bb.0:
13731; CHECK-GI-NEXT:    ld1.d { v0 }[1], [x0]
13732; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
13733; CHECK-GI-NEXT:    str x8, [x1]
13734; CHECK-GI-NEXT:    ret
13735  %tmp1 = load double, ptr %bar
13736  %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
13737  %tmp3 = getelementptr double, ptr %bar, i64 %inc
13738  store ptr %tmp3, ptr %ptr
13739  ret <2 x double> %tmp2
13740}
13741
13742; Check for dependencies between the vector and the scalar load.
13743define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(ptr %bar, ptr %ptr, i64 %inc, ptr %dep_ptr_1, ptr %dep_ptr_2, <4 x float> %vec) {
13744; CHECK-LABEL: test_v4f32_post_reg_ld1lane_dep_vec_on_load:
13745; CHECK:       ; %bb.0:
13746; CHECK-NEXT:    ldr s1, [x0]
13747; CHECK-NEXT:    str q0, [x3]
13748; CHECK-NEXT:    add x8, x0, x2, lsl #2
13749; CHECK-NEXT:    ldr q0, [x4]
13750; CHECK-NEXT:    str x8, [x1]
13751; CHECK-NEXT:    mov.s v0[1], v1[0]
13752; CHECK-NEXT:    ret
13753  %tmp1 = load float, ptr %bar
13754  store <4 x float> %vec, ptr %dep_ptr_1, align 16
13755  %A = load <4 x float>, ptr %dep_ptr_2, align 16
13756  %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
13757  %tmp3 = getelementptr float, ptr %bar, i64 %inc
13758  store ptr %tmp3, ptr %ptr
13759  ret <4 x float> %tmp2
13760}
13761
13762; Make sure that we test the narrow V64 code path.
13763; The tests above don't, because there, 64-bit insert_vector_elt nodes will be
13764; widened to 128-bit before the LD1LANEpost combine has the chance to run,
13765; making it avoid narrow vector types.
13766; One way to trick that combine into running early is to force the vector ops
13767; legalizer to run.  We achieve that using the ctpop.
13768; PR23265
13769define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr, i64 %inc, <4 x i16> %A, ptr %d) {
13770; CHECK-SD-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
13771; CHECK-SD:       ; %bb.0:
13772; CHECK-SD-NEXT:    lsl x8, x2, #1
13773; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
13774; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], x8
13775; CHECK-SD-NEXT:    str x0, [x1]
13776; CHECK-SD-NEXT:    ldr d1, [x3]
13777; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13778; CHECK-SD-NEXT:    cnt.8b v1, v1
13779; CHECK-SD-NEXT:    uaddlp.4h v1, v1
13780; CHECK-SD-NEXT:    uaddlp.2s v1, v1
13781; CHECK-SD-NEXT:    str d1, [x3]
13782; CHECK-SD-NEXT:    ret
13783;
13784; CHECK-GI-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
13785; CHECK-GI:       ; %bb.0:
13786; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
13787; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
13788; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
13789; CHECK-GI-NEXT:    str x8, [x1]
13790; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
13791; CHECK-GI-NEXT:    ldr d1, [x3]
13792; CHECK-GI-NEXT:    cnt.8b v1, v1
13793; CHECK-GI-NEXT:    uaddlp.4h v1, v1
13794; CHECK-GI-NEXT:    uaddlp.2s v1, v1
13795; CHECK-GI-NEXT:    str d1, [x3]
13796; CHECK-GI-NEXT:    ret
13797  %tmp1 = load i16, ptr %bar
13798  %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
13799  %tmp3 = getelementptr i16, ptr %bar, i64 %inc
13800  store ptr %tmp3, ptr %ptr
13801  %dl =  load <2 x i32>,  ptr %d
13802  %dr = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %dl)
13803  store <2 x i32> %dr, ptr %d
13804  ret <4 x i16> %tmp2
13805}
13806
13807declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
13808
13809define void @test_ld1lane_build(ptr %ptr0, ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %out) {
13810; CHECK-SD-LABEL: test_ld1lane_build:
13811; CHECK-SD:       ; %bb.0:
13812; CHECK-SD-NEXT:    ldr s0, [x2]
13813; CHECK-SD-NEXT:    ldr s1, [x0]
13814; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x3]
13815; CHECK-SD-NEXT:    ld1.s { v1 }[1], [x1]
13816; CHECK-SD-NEXT:    sub.2s v0, v1, v0
13817; CHECK-SD-NEXT:    str d0, [x4]
13818; CHECK-SD-NEXT:    ret
13819;
13820; CHECK-GI-LABEL: test_ld1lane_build:
13821; CHECK-GI:       ; %bb.0:
13822; CHECK-GI-NEXT:    ldr s0, [x0]
13823; CHECK-GI-NEXT:    ldr s1, [x2]
13824; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x1]
13825; CHECK-GI-NEXT:    ld1.s { v1 }[1], [x3]
13826; CHECK-GI-NEXT:    sub.2s v0, v0, v1
13827; CHECK-GI-NEXT:    str d0, [x4]
13828; CHECK-GI-NEXT:    ret
13829  %load0 = load i32, ptr %ptr0, align 4
13830  %load1 = load i32, ptr %ptr1, align 4
13831  %vec0_0 = insertelement <2 x i32> undef, i32 %load0, i32 0
13832  %vec0_1 = insertelement <2 x i32> %vec0_0, i32 %load1, i32 1
13833
13834  %load2 = load i32, ptr %ptr2, align 4
13835  %load3 = load i32, ptr %ptr3, align 4
13836  %vec1_0 = insertelement <2 x i32> undef, i32 %load2, i32 0
13837  %vec1_1 = insertelement <2 x i32> %vec1_0, i32 %load3, i32 1
13838
13839  %sub = sub nsw <2 x i32> %vec0_1, %vec1_1
13840  store <2 x i32> %sub, ptr %out, align 16
13841  ret void
13842}
13843
13844define void  @test_ld1lane_build_i16(ptr %a, ptr %b, ptr %c, ptr %d, <4 x i16> %e, ptr %p) {
13845; CHECK-LABEL: test_ld1lane_build_i16:
13846; CHECK:       ; %bb.0:
13847; CHECK-NEXT:    ldr h1, [x0]
13848; CHECK-NEXT:    ld1.h { v1 }[1], [x1]
13849; CHECK-NEXT:    ld1.h { v1 }[2], [x2]
13850; CHECK-NEXT:    ld1.h { v1 }[3], [x3]
13851; CHECK-NEXT:    sub.4h v0, v1, v0
13852; CHECK-NEXT:    str d0, [x4]
13853; CHECK-NEXT:    ret
13854  %ld.a = load i16, ptr %a
13855  %ld.b = load i16, ptr %b
13856  %ld.c = load i16, ptr %c
13857  %ld.d = load i16, ptr %d
13858  %v.a = insertelement <4 x i16> undef, i16 %ld.a, i64 0
13859  %v.b = insertelement <4 x i16> %v.a, i16 %ld.b, i64 1
13860  %v.c = insertelement <4 x i16> %v.b, i16 %ld.c, i64 2
13861  %v = insertelement <4 x i16> %v.c, i16 %ld.d, i64 3
13862  %sub = sub nsw <4 x i16> %v, %e
13863  store <4 x i16> %sub, ptr %p
13864  ret void
13865}
13866
13867define void  @test_ld1lane_build_half(ptr %a, ptr %b, ptr %c, ptr %d, <4 x half> %e, ptr %p) {
13868; CHECK-LABEL: test_ld1lane_build_half:
13869; CHECK:       ; %bb.0:
13870; CHECK-NEXT:    ldr h1, [x0]
13871; CHECK-NEXT:    fcvtl v0.4s, v0.4h
13872; CHECK-NEXT:    ld1.h { v1 }[1], [x1]
13873; CHECK-NEXT:    ld1.h { v1 }[2], [x2]
13874; CHECK-NEXT:    ld1.h { v1 }[3], [x3]
13875; CHECK-NEXT:    fcvtl v1.4s, v1.4h
13876; CHECK-NEXT:    fsub.4s v0, v1, v0
13877; CHECK-NEXT:    fcvtn v0.4h, v0.4s
13878; CHECK-NEXT:    str d0, [x4]
13879; CHECK-NEXT:    ret
13880  %ld.a = load half, ptr %a
13881  %ld.b = load half, ptr %b
13882  %ld.c = load half, ptr %c
13883  %ld.d = load half, ptr %d
13884  %v.a = insertelement <4 x half> undef, half %ld.a, i64 0
13885  %v.b = insertelement <4 x half> %v.a, half %ld.b, i64 1
13886  %v.c = insertelement <4 x half> %v.b, half %ld.c, i64 2
13887  %v = insertelement <4 x half> %v.c, half %ld.d, i64 3
13888  %sub = fsub <4 x half> %v, %e
13889  store <4 x half> %sub, ptr %p
13890  ret void
13891}
13892
13893define void  @test_ld1lane_build_i8(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e, ptr %f, ptr %g, ptr %h, <8 x i8> %v, ptr %p) {
13894; CHECK-SD-LABEL: test_ld1lane_build_i8:
13895; CHECK-SD:       ; %bb.0:
13896; CHECK-SD-NEXT:    ldr b1, [x0]
13897; CHECK-SD-NEXT:    ldr x8, [sp]
13898; CHECK-SD-NEXT:    ld1.b { v1 }[1], [x1]
13899; CHECK-SD-NEXT:    ld1.b { v1 }[2], [x2]
13900; CHECK-SD-NEXT:    ld1.b { v1 }[3], [x3]
13901; CHECK-SD-NEXT:    ld1.b { v1 }[4], [x4]
13902; CHECK-SD-NEXT:    ld1.b { v1 }[5], [x5]
13903; CHECK-SD-NEXT:    ld1.b { v1 }[6], [x6]
13904; CHECK-SD-NEXT:    ld1.b { v1 }[7], [x7]
13905; CHECK-SD-NEXT:    sub.8b v0, v1, v0
13906; CHECK-SD-NEXT:    str d0, [x8]
13907; CHECK-SD-NEXT:    ret
13908;
13909; CHECK-GI-LABEL: test_ld1lane_build_i8:
13910; CHECK-GI:       ; %bb.0:
13911; CHECK-GI-NEXT:    ldr b1, [x0]
13912; CHECK-GI-NEXT:    ldr b2, [x1]
13913; CHECK-GI-NEXT:    ldr x8, [sp]
13914; CHECK-GI-NEXT:    mov.b v1[0], v1[0]
13915; CHECK-GI-NEXT:    mov.b v1[1], v2[0]
13916; CHECK-GI-NEXT:    ldr b2, [x2]
13917; CHECK-GI-NEXT:    mov.b v1[2], v2[0]
13918; CHECK-GI-NEXT:    ldr b2, [x3]
13919; CHECK-GI-NEXT:    mov.b v1[3], v2[0]
13920; CHECK-GI-NEXT:    ldr b2, [x4]
13921; CHECK-GI-NEXT:    mov.b v1[4], v2[0]
13922; CHECK-GI-NEXT:    ldr b2, [x5]
13923; CHECK-GI-NEXT:    mov.b v1[5], v2[0]
13924; CHECK-GI-NEXT:    ldr b2, [x6]
13925; CHECK-GI-NEXT:    mov.b v1[6], v2[0]
13926; CHECK-GI-NEXT:    ldr b2, [x7]
13927; CHECK-GI-NEXT:    mov.b v1[7], v2[0]
13928; CHECK-GI-NEXT:    sub.8b v0, v1, v0
13929; CHECK-GI-NEXT:    str d0, [x8]
13930; CHECK-GI-NEXT:    ret
13931  %ld.a = load i8, ptr %a
13932  %ld.b = load i8, ptr %b
13933  %ld.c = load i8, ptr %c
13934  %ld.d = load i8, ptr %d
13935  %ld.e = load i8, ptr %e
13936  %ld.f = load i8, ptr %f
13937  %ld.g = load i8, ptr %g
13938  %ld.h = load i8, ptr %h
13939  %v.a = insertelement <8 x i8> undef, i8 %ld.a, i64 0
13940  %v.b = insertelement <8 x i8> %v.a,  i8 %ld.b, i64 1
13941  %v.c = insertelement <8 x i8> %v.b,  i8 %ld.c, i64 2
13942  %v.d = insertelement <8 x i8> %v.c,  i8 %ld.d, i64 3
13943  %v.e = insertelement <8 x i8> %v.d,  i8 %ld.e, i64 4
13944  %v.f = insertelement <8 x i8> %v.e,  i8 %ld.f, i64 5
13945  %v.g = insertelement <8 x i8> %v.f,  i8 %ld.g, i64 6
13946  %v1 = insertelement <8 x i8> %v.g,  i8 %ld.h, i64 7
13947  %sub = sub nsw <8 x i8> %v1, %v
13948  store <8 x i8> %sub, ptr %p
13949  ret void
13950}
13951
13952define <4 x i32> @test_inc_cycle(<4 x i32> %vec, ptr %in) {
13953; CHECK-LABEL: test_inc_cycle:
13954; CHECK:       ; %bb.0:
13955; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
13956; CHECK-NEXT:    adrp x9, _var@PAGE
13957; CHECK-NEXT:    fmov x8, d0
13958; CHECK-NEXT:    add x8, x0, x8, lsl #2
13959; CHECK-NEXT:    str x8, [x9, _var@PAGEOFF]
13960; CHECK-NEXT:    ret
13961  %elt = load i32, ptr %in
13962  %newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0
13963
13964  ; %inc cannot be %elt directly because we check that the load is only
13965  ; used by the insert before trying to form post-inc.
13966  %inc.vec = bitcast <4 x i32> %newvec to <2 x i64>
13967  %inc = extractelement <2 x i64> %inc.vec, i32 0
13968  %newaddr = getelementptr i32, ptr %in, i64 %inc
13969  store ptr %newaddr, ptr @var
13970
13971  ret <4 x i32> %newvec
13972}
13973
13974@var = global ptr null
13975
13976define i8 @load_single_extract_variable_index_i8(ptr %A, i32 %idx) {
13977; CHECK-SD-LABEL: load_single_extract_variable_index_i8:
13978; CHECK-SD:       ; %bb.0:
13979; CHECK-SD-NEXT:    sub sp, sp, #16
13980; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
13981; CHECK-SD-NEXT:    mov x8, sp
13982; CHECK-SD-NEXT:    ldr q0, [x0]
13983; CHECK-SD-NEXT:    ; kill: def $w1 killed $w1 def $x1
13984; CHECK-SD-NEXT:    bfxil x8, x1, #0, #4
13985; CHECK-SD-NEXT:    str q0, [sp]
13986; CHECK-SD-NEXT:    ldrb w0, [x8]
13987; CHECK-SD-NEXT:    add sp, sp, #16
13988; CHECK-SD-NEXT:    ret
13989;
13990; CHECK-GI-LABEL: load_single_extract_variable_index_i8:
13991; CHECK-GI:       ; %bb.0:
13992; CHECK-GI-NEXT:    mov w8, w1
13993; CHECK-GI-NEXT:    and x8, x8, #0xf
13994; CHECK-GI-NEXT:    ldrb w0, [x0, x8]
13995; CHECK-GI-NEXT:    ret
13996  %lv = load <16 x i8>, ptr %A
13997  %e = extractelement <16 x i8> %lv, i32 %idx
13998  ret i8 %e
13999}
14000
14001define i16 @load_single_extract_variable_index_i16(ptr %A, i32 %idx) {
14002; CHECK-SD-LABEL: load_single_extract_variable_index_i16:
14003; CHECK-SD:       ; %bb.0:
14004; CHECK-SD-NEXT:    sub sp, sp, #16
14005; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
14006; CHECK-SD-NEXT:    mov x8, sp
14007; CHECK-SD-NEXT:    ldr q0, [x0]
14008; CHECK-SD-NEXT:    ; kill: def $w1 killed $w1 def $x1
14009; CHECK-SD-NEXT:    bfi x8, x1, #1, #3
14010; CHECK-SD-NEXT:    str q0, [sp]
14011; CHECK-SD-NEXT:    ldrh w0, [x8]
14012; CHECK-SD-NEXT:    add sp, sp, #16
14013; CHECK-SD-NEXT:    ret
14014;
14015; CHECK-GI-LABEL: load_single_extract_variable_index_i16:
14016; CHECK-GI:       ; %bb.0:
14017; CHECK-GI-NEXT:    mov w8, w1
14018; CHECK-GI-NEXT:    and x8, x8, #0x7
14019; CHECK-GI-NEXT:    ldrh w0, [x0, x8, lsl #1]
14020; CHECK-GI-NEXT:    ret
14021  %lv = load <8 x i16>, ptr %A
14022  %e = extractelement <8 x i16> %lv, i32 %idx
14023  ret i16 %e
14024}
14025
14026define i32 @load_single_extract_variable_index_i32(ptr %A, i32 %idx) {
14027; CHECK-SD-LABEL: load_single_extract_variable_index_i32:
14028; CHECK-SD:       ; %bb.0:
14029; CHECK-SD-NEXT:    ; kill: def $w1 killed $w1 def $x1
14030; CHECK-SD-NEXT:    and x8, x1, #0x3
14031; CHECK-SD-NEXT:    ldr w0, [x0, x8, lsl #2]
14032; CHECK-SD-NEXT:    ret
14033;
14034; CHECK-GI-LABEL: load_single_extract_variable_index_i32:
14035; CHECK-GI:       ; %bb.0:
14036; CHECK-GI-NEXT:    mov w8, w1
14037; CHECK-GI-NEXT:    and x8, x8, #0x3
14038; CHECK-GI-NEXT:    ldr w0, [x0, x8, lsl #2]
14039; CHECK-GI-NEXT:    ret
14040  %lv = load <4 x i32>, ptr %A
14041  %e = extractelement <4 x i32> %lv, i32 %idx
14042  ret i32 %e
14043}
14044
14045define i32 @load_single_extract_variable_index_v3i32_small_align(ptr %A, i32 %idx) {
14046; CHECK-LABEL: load_single_extract_variable_index_v3i32_small_align:
14047; CHECK:       ; %bb.0:
14048; CHECK-NEXT:    mov w9, w1
14049; CHECK-NEXT:    mov w8, #2 ; =0x2
14050; CHECK-NEXT:    cmp x9, #2
14051; CHECK-NEXT:    csel x8, x9, x8, lo
14052; CHECK-NEXT:    ldr w0, [x0, x8, lsl #2]
14053; CHECK-NEXT:    ret
14054  %lv = load <3 x i32>, ptr %A, align 2
14055  %e = extractelement <3 x i32> %lv, i32 %idx
14056  ret i32 %e
14057}
14058
14059define i32 @load_single_extract_variable_index_v3i32_default_align(ptr %A, i32 %idx) {
14060; CHECK-LABEL: load_single_extract_variable_index_v3i32_default_align:
14061; CHECK:       ; %bb.0:
14062; CHECK-NEXT:    mov w9, w1
14063; CHECK-NEXT:    mov w8, #2 ; =0x2
14064; CHECK-NEXT:    cmp x9, #2
14065; CHECK-NEXT:    csel x8, x9, x8, lo
14066; CHECK-NEXT:    ldr w0, [x0, x8, lsl #2]
14067; CHECK-NEXT:    ret
14068  %lv = load <3 x i32>, ptr %A
14069  %e = extractelement <3 x i32> %lv, i32 %idx
14070  ret i32 %e
14071}
14072
14073define i32 @load_single_extract_valid_const_index_v3i32(ptr %A, i32 %idx) {
14074; CHECK-LABEL: load_single_extract_valid_const_index_v3i32:
14075; CHECK:       ; %bb.0:
14076; CHECK-NEXT:    ldr w0, [x0, #8]
14077; CHECK-NEXT:    ret
14078  %lv = load <3 x i32>, ptr %A
14079  %e = extractelement <3 x i32> %lv, i32 2
14080  ret i32 %e
14081}
14082
14083define i32 @load_single_extract_variable_index_masked_i32(ptr %A, i32 %idx) {
14084; CHECK-LABEL: load_single_extract_variable_index_masked_i32:
14085; CHECK:       ; %bb.0:
14086; CHECK-NEXT:    and w8, w1, #0x3
14087; CHECK-NEXT:    ldr w0, [x0, w8, uxtw #2]
14088; CHECK-NEXT:    ret
14089  %idx.x = and i32 %idx, 3
14090  %lv = load <4 x i32>, ptr %A
14091  %e = extractelement <4 x i32> %lv, i32 %idx.x
14092  ret i32 %e
14093}
14094
14095define i32 @load_single_extract_variable_index_masked2_i32(ptr %A, i32 %idx) {
14096; CHECK-LABEL: load_single_extract_variable_index_masked2_i32:
14097; CHECK:       ; %bb.0:
14098; CHECK-NEXT:    and w8, w1, #0x1
14099; CHECK-NEXT:    ldr w0, [x0, w8, uxtw #2]
14100; CHECK-NEXT:    ret
14101  %idx.x = and i32 %idx, 1
14102  %lv = load <4 x i32>, ptr %A
14103  %e = extractelement <4 x i32> %lv, i32 %idx.x
14104  ret i32 %e
14105}
14106