xref: /llvm-project/llvm/test/CodeGen/ARM/arm-vlddup-update.ll (revision de75e5079ae1d4894c918fd452e468fb6a888be1)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc < %s -mtriple=armv8-linux-gnueabi --float-abi=hard -verify-machineinstrs \
3; RUN:     -asm-verbose=false | FileCheck %s
4
5%struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> }
6%struct.uint16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
7%struct.uint16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
8
9%struct.uint32x2x2_t = type { <2 x i32>, <2 x i32> }
10%struct.uint32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
11%struct.uint32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
12
13%struct.uint64x1x2_t = type { <1 x i64>, <1 x i64> }
14%struct.uint64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
15%struct.uint64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
16
17%struct.uint8x8x2_t = type { <8 x i8>, <8 x i8> }
18%struct.uint8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
19%struct.uint8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
20
21%struct.uint16x8x2_t = type { <8 x i16>, <8 x i16> }
22%struct.uint16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
23%struct.uint16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
24
25%struct.uint32x4x2_t = type { <4 x i32>, <4 x i32> }
26%struct.uint32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
27%struct.uint32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
28
29%struct.uint8x16x2_t = type { <16 x i8>, <16 x i8> }
30%struct.uint8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> }
31%struct.uint8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
32
33declare %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr, i32)
34declare %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr, i32)
35declare %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr, i32)
36declare %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr, i32)
37
38declare %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr, i32)
39declare %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr, i32)
40declare %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr, i32)
41declare %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr, i32)
42
43declare %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr, i32)
44declare %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr, i32)
45declare %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr, i32)
46declare %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr, i32)
47
48declare %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr, i32)
49declare %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr, i32)
50declare %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr, i32)
51
52declare %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr, i32)
53declare %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr, i32)
54declare %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr, i32)
55
56declare %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr, i32)
57declare %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr, i32)
58declare %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr, i32)
59
60define ptr @test_vld2_dup_u16_update(ptr %dest, ptr %src) {
61; CHECK-LABEL: test_vld2_dup_u16_update:
62; CHECK:         vld2.16 {d16[], d17[]}, [r1]!
63; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
64; CHECK-NEXT:    vstr d17, [r0]
65; CHECK-NEXT:    mov r0, r1
66; CHECK-NEXT:    bx lr
67entry:
68  %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2)
69  store %struct.uint16x4x2_t %tmp, ptr %dest, align 8
70  %updated_src = getelementptr inbounds i8, ptr %src, i32 4
71  ret ptr %updated_src
72}
73
74define ptr @test_vld2_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
75; CHECK-LABEL: test_vld2_dup_u16_update_reg:
76; CHECK:         vld2.16 {d16[], d17[]}, [r1], r2
77; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
78; CHECK-NEXT:    vstr d17, [r0]
79; CHECK-NEXT:    mov r0, r1
80; CHECK-NEXT:    bx lr
81entry:
82  %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2)
83  store %struct.uint16x4x2_t %tmp, ptr %dest, align 8
84  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
85  ret ptr %updated_src
86}
87
88define ptr @test_vld2_dup_update(ptr %dest, ptr %src) {
89; CHECK-LABEL: test_vld2_dup_update:
90; CHECK:         vld2.32 {d16[], d17[]}, [r1]!
91; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
92; CHECK-NEXT:    vstr d17, [r0]
93; CHECK-NEXT:    mov r0, r1
94; CHECK-NEXT:    bx lr
95entry:
96  %tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr %src, i32 4)
97  store %struct.uint32x2x2_t %tmp, ptr %dest, align 8
98  %updated_src = getelementptr inbounds i8, ptr %src, i32 8
99  ret ptr %updated_src
100}
101
102define ptr @test_vld2_dup_update_reg(ptr %dest, ptr %src, i32 %inc) {
103; CHECK-LABEL: test_vld2_dup_update_reg:
104; CHECK:         vld2.32 {d16[], d17[]}, [r1], r2
105; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
106; CHECK-NEXT:    vstr d17, [r0]
107; CHECK-NEXT:    mov r0, r1
108; CHECK-NEXT:    bx lr
109entry:
110  %tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr %src, i32 4)
111  store %struct.uint32x2x2_t %tmp, ptr %dest, align 8
112  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
113  ret ptr %updated_src
114}
115
116define ptr @test_vld2_dup_u64_update(ptr %dest, ptr %src) {
117; CHECK-LABEL: test_vld2_dup_u64_update:
118; CHECK:         vld1.64 {d16, d17}, [r1:64]!
119; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
120; CHECK-NEXT:    vstr d17, [r0]
121; CHECK-NEXT:    mov r0, r1
122; CHECK-NEXT:    bx lr
123entry:
124  %tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr %src, i32 8)
125  store %struct.uint64x1x2_t %tmp, ptr %dest, align 8
126  %updated_src = getelementptr inbounds i8, ptr %src, i32 16
127  ret ptr %updated_src
128}
129
130define ptr @test_vld2_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) {
131; CHECK-LABEL: test_vld2_dup_u64_update_reg:
132; CHECK:         vld1.64 {d16, d17}, [r1:64], r2
133; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
134; CHECK-NEXT:    vstr d17, [r0]
135; CHECK-NEXT:    mov r0, r1
136; CHECK-NEXT:    bx lr
137entry:
138  %tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr %src, i32 8)
139  store %struct.uint64x1x2_t %tmp, ptr %dest, align 8
140  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
141  ret ptr %updated_src
142}
143
144define ptr @test_vld2_dup_u8_update(ptr %dest, ptr %src) {
145; CHECK-LABEL: test_vld2_dup_u8_update:
146; CHECK:         vld2.8 {d16[], d17[]}, [r1]!
147; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
148; CHECK-NEXT:    vstr d17, [r0]
149; CHECK-NEXT:    mov r0, r1
150; CHECK-NEXT:    bx lr
151entry:
152  %tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1)
153  store %struct.uint8x8x2_t %tmp, ptr %dest, align 8
154  %updated_src = getelementptr inbounds i8, ptr %src, i32 2
155  ret ptr %updated_src
156}
157
158define ptr @test_vld2_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
159; CHECK-LABEL: test_vld2_dup_u8_update_reg:
160; CHECK:         vld2.8 {d16[], d17[]}, [r1], r2
161; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
162; CHECK-NEXT:    vstr d17, [r0]
163; CHECK-NEXT:    mov r0, r1
164; CHECK-NEXT:    bx lr
165entry:
166  %tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1)
167  store %struct.uint8x8x2_t %tmp, ptr %dest, align 8
168  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
169  ret ptr %updated_src
170}
171
172define ptr @test_vld3_dup_u16_update(ptr %dest, ptr %src) {
173; CHECK-LABEL: test_vld3_dup_u16_update:
174; CHECK:         vld3.16 {d16[], d17[], d18[]}, [r1]!
175; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
176; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
177; CHECK-NEXT:    vstr d18, [r0]
178; CHECK-NEXT:    mov r0, r1
179; CHECK-NEXT:    bx lr
180entry:
181  %tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2)
182  store %struct.uint16x4x3_t %tmp, ptr %dest, align 8
183  %updated_src = getelementptr inbounds i8, ptr %src, i32 6
184  ret ptr %updated_src
185}
186
187define ptr @test_vld3_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
188; CHECK-LABEL: test_vld3_dup_u16_update_reg:
189; CHECK:         vld3.16 {d16[], d17[], d18[]}, [r1], r2
190; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
191; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
192; CHECK-NEXT:    vstr d18, [r0]
193; CHECK-NEXT:    mov r0, r1
194; CHECK-NEXT:    bx lr
195entry:
196  %tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2)
197  store %struct.uint16x4x3_t %tmp, ptr %dest, align 8
198  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
199  ret ptr %updated_src
200}
201
202define ptr @test_vld3_dup_u32_update(ptr %dest, ptr %src) {
203; CHECK-LABEL: test_vld3_dup_u32_update:
204; CHECK:         vld3.32 {d16[], d17[], d18[]}, [r1]!
205; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
206; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
207; CHECK-NEXT:    vstr d18, [r0]
208; CHECK-NEXT:    mov r0, r1
209; CHECK-NEXT:    bx lr
210entry:
211  %tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr %src, i32 4)
212  store %struct.uint32x2x3_t %tmp, ptr %dest, align 8
213  %updated_src = getelementptr inbounds i8, ptr %src, i32 12
214  ret ptr %updated_src
215}
216
217define ptr @test_vld3_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
218; CHECK-LABEL: test_vld3_dup_u32_update_reg:
219; CHECK:         vld3.32 {d16[], d17[], d18[]}, [r1], r2
220; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
221; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
222; CHECK-NEXT:    vstr d18, [r0]
223; CHECK-NEXT:    mov r0, r1
224; CHECK-NEXT:    bx lr
225entry:
226  %tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr %src, i32 4)
227  store %struct.uint32x2x3_t %tmp, ptr %dest, align 8
228  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
229  ret ptr %updated_src
230}
231
232define ptr @test_vld3_dup_u64_update(ptr %dest, ptr %src) {
233; CHECK-LABEL: test_vld3_dup_u64_update:
234; CHECK:         vld1.64 {d16, d17, d18}, [r1]!
235; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
236; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
237; CHECK-NEXT:    vstr d18, [r0]
238; CHECK-NEXT:    mov r0, r1
239; CHECK-NEXT:    bx lr
240entry:
241  %tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr %src, i32 8)
242  store %struct.uint64x1x3_t %tmp, ptr %dest, align 8
243  %updated_src = getelementptr inbounds i8, ptr %src, i32 24
244  ret ptr %updated_src
245}
246
247define ptr @test_vld3_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) {
248; CHECK-LABEL: test_vld3_dup_u64_update_reg:
249; CHECK:         vld1.64 {d16, d17, d18}, [r1], r2
250; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
251; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
252; CHECK-NEXT:    vstr d18, [r0]
253; CHECK-NEXT:    mov r0, r1
254; CHECK-NEXT:    bx lr
255entry:
256  %tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr %src, i32 8)
257  store %struct.uint64x1x3_t %tmp, ptr %dest, align 8
258  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
259  ret ptr %updated_src
260}
261
262define ptr @test_vld3_dup_u8_update(ptr %dest, ptr %src) {
263; CHECK-LABEL: test_vld3_dup_u8_update:
264; CHECK:         vld3.8 {d16[], d17[], d18[]}, [r1]!
265; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
266; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
267; CHECK-NEXT:    vstr d18, [r0]
268; CHECK-NEXT:    mov r0, r1
269; CHECK-NEXT:    bx lr
270entry:
271  %tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1)
272  store %struct.uint8x8x3_t %tmp, ptr %dest, align 8
273  %updated_src = getelementptr inbounds i8, ptr %src, i32 3
274  ret ptr %updated_src
275}
276
277define ptr @test_vld3_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
278; CHECK-LABEL: test_vld3_dup_u8_update_reg:
279; CHECK:         vld3.8 {d16[], d17[], d18[]}, [r1], r2
280; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
281; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
282; CHECK-NEXT:    vstr d18, [r0]
283; CHECK-NEXT:    mov r0, r1
284; CHECK-NEXT:    bx lr
285entry:
286  %tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1)
287  store %struct.uint8x8x3_t %tmp, ptr %dest, align 8
288  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
289  ret ptr %updated_src
290}
291
292define ptr @test_vld4_dup_u16_update(ptr %dest, ptr %src) {
293; CHECK-LABEL: test_vld4_dup_u16_update:
294; CHECK:         vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
295; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
296; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
297; CHECK-NEXT:    vst1.16 {d18}, [r0:64]!
298; CHECK-NEXT:    vstr d19, [r0]
299; CHECK-NEXT:    mov r0, r1
300; CHECK-NEXT:    bx lr
301entry:
302  %tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2)
303  store %struct.uint16x4x4_t %tmp, ptr %dest, align 8
304  %updated_src = getelementptr inbounds i8, ptr %src, i32 8
305  ret ptr %updated_src
306}
307
308define ptr @test_vld4_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
309; CHECK-LABEL: test_vld4_dup_u16_update_reg:
310; CHECK:         vld4.16 {d16[], d17[], d18[], d19[]}, [r1], r2
311; CHECK-NEXT:    vst1.16 {d16}, [r0:64]!
312; CHECK-NEXT:    vst1.16 {d17}, [r0:64]!
313; CHECK-NEXT:    vst1.16 {d18}, [r0:64]!
314; CHECK-NEXT:    vstr d19, [r0]
315; CHECK-NEXT:    mov r0, r1
316; CHECK-NEXT:    bx lr
317entry:
318  %tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2)
319  store %struct.uint16x4x4_t %tmp, ptr %dest, align 8
320  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
321  ret ptr %updated_src
322}
323
324define ptr @test_vld4_dup_u32_update(ptr %dest, ptr %src) {
325; CHECK-LABEL: test_vld4_dup_u32_update:
326; CHECK:         vld4.32 {d16[], d17[], d18[], d19[]}, [r1]!
327; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
328; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
329; CHECK-NEXT:    vst1.32 {d18}, [r0:64]!
330; CHECK-NEXT:    vstr d19, [r0]
331; CHECK-NEXT:    mov r0, r1
332; CHECK-NEXT:    bx lr
333entry:
334  %tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr %src, i32 4)
335  store %struct.uint32x2x4_t %tmp, ptr %dest, align 8
336  %updated_src = getelementptr inbounds i8, ptr %src, i32 16
337  ret ptr %updated_src
338}
339
340define ptr @test_vld4_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
341; CHECK-LABEL: test_vld4_dup_u32_update_reg:
342; CHECK:         vld4.32 {d16[], d17[], d18[], d19[]}, [r1], r2
343; CHECK-NEXT:    vst1.32 {d16}, [r0:64]!
344; CHECK-NEXT:    vst1.32 {d17}, [r0:64]!
345; CHECK-NEXT:    vst1.32 {d18}, [r0:64]!
346; CHECK-NEXT:    vstr d19, [r0]
347; CHECK-NEXT:    mov r0, r1
348; CHECK-NEXT:    bx lr
349entry:
350  %tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr %src, i32 4)
351  store %struct.uint32x2x4_t %tmp, ptr %dest, align 8
352  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
353  ret ptr %updated_src
354}
355
356define ptr @test_vld4_dup_u64_update(ptr %dest, ptr %src) {
357; CHECK-LABEL: test_vld4_dup_u64_update:
358; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:64]!
359; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
360; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
361; CHECK-NEXT:    vst1.64 {d18}, [r0:64]!
362; CHECK-NEXT:    vstr d19, [r0]
363; CHECK-NEXT:    mov r0, r1
364; CHECK-NEXT:    bx lr
365entry:
366  %tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr %src, i32 8)
367  store %struct.uint64x1x4_t %tmp, ptr %dest, align 8
368  %updated_src = getelementptr inbounds i8, ptr %src, i32 32
369  ret ptr %updated_src
370}
371
372define ptr @test_vld4_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) {
373; CHECK-LABEL: test_vld4_dup_u64_update_reg:
374; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:64], r2
375; CHECK-NEXT:    vst1.64 {d16}, [r0:64]!
376; CHECK-NEXT:    vst1.64 {d17}, [r0:64]!
377; CHECK-NEXT:    vst1.64 {d18}, [r0:64]!
378; CHECK-NEXT:    vstr d19, [r0]
379; CHECK-NEXT:    mov r0, r1
380; CHECK-NEXT:    bx lr
381entry:
382  %tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr %src, i32 8)
383  store %struct.uint64x1x4_t %tmp, ptr %dest, align 8
384  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
385  ret ptr %updated_src
386}
387
388define ptr @test_vld4_dup_u8_update(ptr %dest, ptr %src) {
389; CHECK-LABEL: test_vld4_dup_u8_update:
390; CHECK:         vld4.8 {d16[], d17[], d18[], d19[]}, [r1]!
391; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
392; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
393; CHECK-NEXT:    vst1.8 {d18}, [r0:64]!
394; CHECK-NEXT:    vstr d19, [r0]
395; CHECK-NEXT:    mov r0, r1
396; CHECK-NEXT:    bx lr
397entry:
398  %tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1)
399  store %struct.uint8x8x4_t %tmp, ptr %dest, align 8
400  %updated_src = getelementptr inbounds i8, ptr %src, i32 4
401  ret ptr %updated_src
402}
403
404define ptr @test_vld4_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
405; CHECK-LABEL: test_vld4_dup_u8_update_reg:
406; CHECK:         vld4.8 {d16[], d17[], d18[], d19[]}, [r1], r2
407; CHECK-NEXT:    vst1.8 {d16}, [r0:64]!
408; CHECK-NEXT:    vst1.8 {d17}, [r0:64]!
409; CHECK-NEXT:    vst1.8 {d18}, [r0:64]!
410; CHECK-NEXT:    vstr d19, [r0]
411; CHECK-NEXT:    mov r0, r1
412; CHECK-NEXT:    bx lr
413entry:
414  %tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1)
415  store %struct.uint8x8x4_t %tmp, ptr %dest, align 8
416  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
417  ret ptr %updated_src
418}
419
420define ptr @test_vld2q_dup_u16_update(ptr %dest, ptr %src, ptr %dest0) {
421; CHECK-LABEL: test_vld2q_dup_u16_update:
422; CHECK:         vld2.16 {d16[], d18[]}, [r1]
423; CHECK-NEXT:    vld2.16 {d17[], d19[]}, [r1]!
424; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
425; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
426; CHECK-NEXT:    mov r0, r1
427; CHECK-NEXT:    bx lr
428entry:
429  %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2)
430  store %struct.uint16x8x2_t %tmp, ptr %dest, align 8
431  %updated_src = getelementptr inbounds i8, ptr %src, i32 4
432  ret ptr %updated_src
433}
434
435define ptr @test_vld2q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
436; CHECK-LABEL: test_vld2q_dup_u16_update_reg:
437; CHECK:         vld2.16 {d16[], d18[]}, [r1]
438; CHECK-NEXT:    vld2.16 {d17[], d19[]}, [r1], r2
439; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
440; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
441; CHECK-NEXT:    mov r0, r1
442; CHECK-NEXT:    bx lr
443entry:
444  %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2)
445  store %struct.uint16x8x2_t %tmp, ptr %dest, align 8
446  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
447  ret ptr %updated_src
448}
449
450define ptr @test_vld2q_dup_u32_update(ptr %dest, ptr %src) {
451; CHECK-LABEL: test_vld2q_dup_u32_update:
452; CHECK:         vld2.32 {d16[], d18[]}, [r1]
453; CHECK-NEXT:    vld2.32 {d17[], d19[]}, [r1]!
454; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
455; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
456; CHECK-NEXT:    mov r0, r1
457; CHECK-NEXT:    bx lr
458entry:
459  %tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr %src, i32 4)
460  store %struct.uint32x4x2_t %tmp, ptr %dest, align 8
461  %updated_src = getelementptr inbounds i8, ptr %src, i32 8
462  ret ptr %updated_src
463}
464
465define ptr @test_vld2q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
466; CHECK-LABEL: test_vld2q_dup_u32_update_reg:
467; CHECK:         vld2.32 {d16[], d18[]}, [r1]
468; CHECK-NEXT:    vld2.32 {d17[], d19[]}, [r1], r2
469; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
470; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
471; CHECK-NEXT:    mov r0, r1
472; CHECK-NEXT:    bx lr
473entry:
474  %tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr %src, i32 4)
475  store %struct.uint32x4x2_t %tmp, ptr %dest, align 8
476  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
477  ret ptr %updated_src
478}
479
480define ptr @test_vld2q_dup_u8_update(ptr %dest, ptr %src) {
481; CHECK-LABEL: test_vld2q_dup_u8_update:
482; CHECK:         vld2.8 {d16[], d18[]}, [r1]
483; CHECK-NEXT:    vld2.8 {d17[], d19[]}, [r1]!
484; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
485; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
486; CHECK-NEXT:    mov r0, r1
487; CHECK-NEXT:    bx lr
488entry:
489  %tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1)
490  store %struct.uint8x16x2_t %tmp, ptr %dest, align 8
491  %updated_src = getelementptr inbounds i8, ptr %src, i32 2
492  ret ptr %updated_src
493}
494
495define ptr @test_vld2q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
496; CHECK-LABEL: test_vld2q_dup_u8_update_reg:
497; CHECK:         vld2.8 {d16[], d18[]}, [r1]
498; CHECK-NEXT:    vld2.8 {d17[], d19[]}, [r1], r2
499; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
500; CHECK-NEXT:    vst1.64 {d18, d19}, [r0]
501; CHECK-NEXT:    mov r0, r1
502; CHECK-NEXT:    bx lr
503entry:
504  %tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1)
505  store %struct.uint8x16x2_t %tmp, ptr %dest, align 8
506  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
507  ret ptr %updated_src
508}
509
510define ptr @test_vld3q_dup_u16_update(ptr %dest, ptr %src) {
511; CHECK-LABEL: test_vld3q_dup_u16_update:
512; CHECK:         vld3.16 {d16[], d18[], d20[]}, [r1]
513; CHECK-NEXT:    vld3.16 {d17[], d19[], d21[]}, [r1]!
514; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
515; CHECK-NEXT:    vst1.16 {d18, d19}, [r0]!
516; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
517; CHECK-NEXT:    mov r0, r1
518; CHECK-NEXT:    bx lr
519entry:
520  %tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2)
521  store %struct.uint16x8x3_t %tmp, ptr %dest, align 8
522  %updated_src = getelementptr inbounds i8, ptr %src, i32 6
523  ret ptr %updated_src
524}
525
526define ptr @test_vld3q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
527; CHECK-LABEL: test_vld3q_dup_u16_update_reg:
528; CHECK:         vld3.16 {d16[], d18[], d20[]}, [r1]
529; CHECK-NEXT:    vld3.16 {d17[], d19[], d21[]}, [r1], r2
530; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
531; CHECK-NEXT:    vst1.16 {d18, d19}, [r0]!
532; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
533; CHECK-NEXT:    mov r0, r1
534; CHECK-NEXT:    bx lr
535entry:
536  %tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2)
537  store %struct.uint16x8x3_t %tmp, ptr %dest, align 8
538  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
539  ret ptr %updated_src
540}
541
542define ptr @test_vld3q_dup_u32_update(ptr %dest, ptr %src) {
543; CHECK-LABEL: test_vld3q_dup_u32_update:
544; CHECK:         vld3.32 {d16[], d18[], d20[]}, [r1]
545; CHECK-NEXT:    vld3.32 {d17[], d19[], d21[]}, [r1]!
546; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
547; CHECK-NEXT:    vst1.32 {d18, d19}, [r0]!
548; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
549; CHECK-NEXT:    mov r0, r1
550; CHECK-NEXT:    bx lr
551entry:
552  %tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr %src, i32 4)
553  store %struct.uint32x4x3_t %tmp, ptr %dest, align 8
554  %updated_src = getelementptr inbounds i8, ptr %src, i32 12
555  ret ptr %updated_src
556}
557
558define ptr @test_vld3q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
559; CHECK-LABEL: test_vld3q_dup_u32_update_reg:
560; CHECK:         vld3.32 {d16[], d18[], d20[]}, [r1]
561; CHECK-NEXT:    vld3.32 {d17[], d19[], d21[]}, [r1], r2
562; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
563; CHECK-NEXT:    vst1.32 {d18, d19}, [r0]!
564; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
565; CHECK-NEXT:    mov r0, r1
566; CHECK-NEXT:    bx lr
567entry:
568  %tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr %src, i32 4)
569  store %struct.uint32x4x3_t %tmp, ptr %dest, align 8
570  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
571  ret ptr %updated_src
572}
573
574define ptr @test_vld3q_dup_u8_update(ptr %dest, ptr %src) {
575; CHECK-LABEL: test_vld3q_dup_u8_update:
576; CHECK:         vld3.8 {d16[], d18[], d20[]}, [r1]
577; CHECK-NEXT:    vld3.8 {d17[], d19[], d21[]}, [r1]!
578; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
579; CHECK-NEXT:    vst1.8 {d18, d19}, [r0]!
580; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
581; CHECK-NEXT:    mov r0, r1
582; CHECK-NEXT:    bx lr
583entry:
584  %tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1)
585  store %struct.uint8x16x3_t %tmp, ptr %dest, align 8
586  %updated_src = getelementptr inbounds i8, ptr %src, i32 3
587  ret ptr %updated_src
588}
589
590define ptr @test_vld3q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
591; CHECK-LABEL: test_vld3q_dup_u8_update_reg:
592; CHECK:         vld3.8 {d16[], d18[], d20[]}, [r1]
593; CHECK-NEXT:    vld3.8 {d17[], d19[], d21[]}, [r1], r2
594; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
595; CHECK-NEXT:    vst1.8 {d18, d19}, [r0]!
596; CHECK-NEXT:    vst1.64 {d20, d21}, [r0]
597; CHECK-NEXT:    mov r0, r1
598; CHECK-NEXT:    bx lr
599entry:
600  %tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1)
601  store %struct.uint8x16x3_t %tmp, ptr %dest, align 8
602  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
603  ret ptr %updated_src
604}
605
606define ptr @test_vld4q_dup_u16_update(ptr %dest, ptr %src) {
607; CHECK-LABEL: test_vld4q_dup_u16_update:
608; CHECK:         vld4.16 {d16[], d18[], d20[], d22[]}, [r1]
609; CHECK-NEXT:    vld4.16 {d17[], d19[], d21[], d23[]}, [r1]!
610; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
611; CHECK-NEXT:    vst1.16 {d18, d19}, [r0]!
612; CHECK-NEXT:    vst1.16 {d20, d21}, [r0]!
613; CHECK-NEXT:    vst1.64 {d22, d23}, [r0]
614; CHECK-NEXT:    mov r0, r1
615; CHECK-NEXT:    bx lr
616entry:
617  %tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2)
618  store %struct.uint16x8x4_t %tmp, ptr %dest, align 8
619  %updated_src = getelementptr inbounds i8, ptr %src, i32 8
620  ret ptr %updated_src
621}
622
623define ptr @test_vld4q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
624; CHECK-LABEL: test_vld4q_dup_u16_update_reg:
625; CHECK:         vld4.16 {d16[], d18[], d20[], d22[]}, [r1]
626; CHECK-NEXT:    vld4.16 {d17[], d19[], d21[], d23[]}, [r1], r2
627; CHECK-NEXT:    vst1.16 {d16, d17}, [r0]!
628; CHECK-NEXT:    vst1.16 {d18, d19}, [r0]!
629; CHECK-NEXT:    vst1.16 {d20, d21}, [r0]!
630; CHECK-NEXT:    vst1.64 {d22, d23}, [r0]
631; CHECK-NEXT:    mov r0, r1
632; CHECK-NEXT:    bx lr
633entry:
634  %tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2)
635  store %struct.uint16x8x4_t %tmp, ptr %dest, align 8
636  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
637  ret ptr %updated_src
638}
639
640define ptr @test_vld4q_dup_u32_update(ptr %dest, ptr %src) {
641; CHECK-LABEL: test_vld4q_dup_u32_update:
642; CHECK:         vld4.32 {d16[], d18[], d20[], d22[]}, [r1]
643; CHECK-NEXT:    vld4.32 {d17[], d19[], d21[], d23[]}, [r1]!
644; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
645; CHECK-NEXT:    vst1.32 {d18, d19}, [r0]!
646; CHECK-NEXT:    vst1.32 {d20, d21}, [r0]!
647; CHECK-NEXT:    vst1.64 {d22, d23}, [r0]
648; CHECK-NEXT:    mov r0, r1
649; CHECK-NEXT:    bx lr
650entry:
651  %tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr %src, i32 4)
652  store %struct.uint32x4x4_t %tmp, ptr %dest, align 8
653  %updated_src = getelementptr inbounds i8, ptr %src, i32 16
654  ret ptr %updated_src
655}
656
657define ptr @test_vld4q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
658; CHECK-LABEL: test_vld4q_dup_u32_update_reg:
659; CHECK:         vld4.32 {d16[], d18[], d20[], d22[]}, [r1]
660; CHECK-NEXT:    vld4.32 {d17[], d19[], d21[], d23[]}, [r1], r2
661; CHECK-NEXT:    vst1.32 {d16, d17}, [r0]!
662; CHECK-NEXT:    vst1.32 {d18, d19}, [r0]!
663; CHECK-NEXT:    vst1.32 {d20, d21}, [r0]!
664; CHECK-NEXT:    vst1.64 {d22, d23}, [r0]
665; CHECK-NEXT:    mov r0, r1
666; CHECK-NEXT:    bx lr
667entry:
668  %tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr %src, i32 4)
669  store %struct.uint32x4x4_t %tmp, ptr %dest, align 8
670  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
671  ret ptr %updated_src
672}
673
674define ptr @test_vld4q_dup_u8_update(ptr %dest, ptr %src) {
675; CHECK-LABEL: test_vld4q_dup_u8_update:
676; CHECK:         vld4.8 {d16[], d18[], d20[], d22[]}, [r1]
677; CHECK-NEXT:    vld4.8 {d17[], d19[], d21[], d23[]}, [r1]!
678; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
679; CHECK-NEXT:    vst1.8 {d18, d19}, [r0]!
680; CHECK-NEXT:    vst1.8 {d20, d21}, [r0]!
681; CHECK-NEXT:    vst1.64 {d22, d23}, [r0]
682; CHECK-NEXT:    mov r0, r1
683; CHECK-NEXT:    bx lr
684entry:
685  %tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1)
686  store %struct.uint8x16x4_t %tmp, ptr %dest, align 8
687  %updated_src = getelementptr inbounds i8, ptr %src, i32 4
688  ret ptr %updated_src
689}
690
691define ptr @test_vld4q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
692; CHECK-LABEL: test_vld4q_dup_u8_update_reg:
693; CHECK:         vld4.8 {d16[], d18[], d20[], d22[]}, [r1]
694; CHECK-NEXT:    vld4.8 {d17[], d19[], d21[], d23[]}, [r1], r2
695; CHECK-NEXT:    vst1.8 {d16, d17}, [r0]!
696; CHECK-NEXT:    vst1.8 {d18, d19}, [r0]!
697; CHECK-NEXT:    vst1.8 {d20, d21}, [r0]!
698; CHECK-NEXT:    vst1.64 {d22, d23}, [r0]
699; CHECK-NEXT:    mov r0, r1
700; CHECK-NEXT:    bx lr
701entry:
702  %tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1)
703  store %struct.uint8x16x4_t %tmp, ptr %dest, align 8
704  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
705  ret ptr %updated_src
706}
707