xref: /llvm-project/llvm/test/CodeGen/ARM/arm-vlddup-update.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; RUN: llc < %s -mtriple=armv8-linux-gnueabi -verify-machineinstrs \
2; RUN:     -asm-verbose=false | FileCheck %s
3
4%struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> }
5%struct.uint16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
6%struct.uint16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
7
8%struct.uint32x2x2_t = type { <2 x i32>, <2 x i32> }
9%struct.uint32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
10%struct.uint32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
11
12%struct.uint64x1x2_t = type { <1 x i64>, <1 x i64> }
13%struct.uint64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
14%struct.uint64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
15
16%struct.uint8x8x2_t = type { <8 x i8>, <8 x i8> }
17%struct.uint8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
18%struct.uint8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
19
20%struct.uint16x8x2_t = type { <8 x i16>, <8 x i16> }
21%struct.uint16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
22%struct.uint16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
23
24%struct.uint32x4x2_t = type { <4 x i32>, <4 x i32> }
25%struct.uint32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
26%struct.uint32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
27
28%struct.uint8x16x2_t = type { <16 x i8>, <16 x i8> }
29%struct.uint8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> }
30%struct.uint8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
31
32declare %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr, i32)
33declare %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr, i32)
34declare %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr, i32)
35declare %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr, i32)
36
37declare %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr, i32)
38declare %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr, i32)
39declare %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr, i32)
40declare %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr, i32)
41
42declare %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr, i32)
43declare %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr, i32)
44declare %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr, i32)
45declare %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr, i32)
46
47declare %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr, i32)
48declare %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr, i32)
49declare %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr, i32)
50
51declare %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr, i32)
52declare %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr, i32)
53declare %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr, i32)
54
55declare %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr, i32)
56declare %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr, i32)
57declare %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr, i32)
58
59define ptr @test_vld2_dup_u16_update(ptr %dest, ptr %src) {
60; CHECK-LABEL: test_vld2_dup_u16_update:
61; CHECK:         vld2.16 {d16[], d17[]}, [r1]!
62entry:
63  %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2)
64  store %struct.uint16x4x2_t %tmp, ptr %dest, align 8
65  %updated_src = getelementptr inbounds i8, ptr %src, i32 4
66  ret ptr %updated_src
67}
68
69define ptr @test_vld2_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
70; CHECK-LABEL: test_vld2_dup_u16_update_reg:
71; CHECK:         vld2.16 {d16[], d17[]}, [r1], r2
72entry:
73  %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2)
74  store %struct.uint16x4x2_t %tmp, ptr %dest, align 8
75  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
76  ret ptr %updated_src
77}
78
79define ptr @test_vld2_dup_update(ptr %dest, ptr %src) {
80; CHECK-LABEL: test_vld2_dup_update:
81; CHECK:         vld2.32 {d16[], d17[]}, [r1]!
82entry:
83  %tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr %src, i32 4)
84  store %struct.uint32x2x2_t %tmp, ptr %dest, align 8
85  %updated_src = getelementptr inbounds i8, ptr %src, i32 8
86  ret ptr %updated_src
87}
88
89define ptr @test_vld2_dup_update_reg(ptr %dest, ptr %src, i32 %inc) {
90; CHECK-LABEL: test_vld2_dup_update_reg:
91; CHECK:         vld2.32 {d16[], d17[]}, [r1], r2
92entry:
93  %tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr %src, i32 4)
94  store %struct.uint32x2x2_t %tmp, ptr %dest, align 8
95  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
96  ret ptr %updated_src
97}
98
99define ptr @test_vld2_dup_u64_update(ptr %dest, ptr %src) {
100; CHECK-LABEL: test_vld2_dup_u64_update:
101; CHECK:         vld1.64 {d16, d17}, [r1:64]!
102entry:
103  %tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr %src, i32 8)
104  store %struct.uint64x1x2_t %tmp, ptr %dest, align 8
105  %updated_src = getelementptr inbounds i8, ptr %src, i32 16
106  ret ptr %updated_src
107}
108
109define ptr @test_vld2_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) {
110; CHECK-LABEL: test_vld2_dup_u64_update_reg:
111; CHECK:         vld1.64 {d16, d17}, [r1:64], r2
112entry:
113  %tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr %src, i32 8)
114  store %struct.uint64x1x2_t %tmp, ptr %dest, align 8
115  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
116  ret ptr %updated_src
117}
118
119define ptr @test_vld2_dup_u8_update(ptr %dest, ptr %src) {
120; CHECK-LABEL: test_vld2_dup_u8_update:
121; CHECK:         vld2.8 {d16[], d17[]}, [r1]!
122entry:
123  %tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1)
124  store %struct.uint8x8x2_t %tmp, ptr %dest, align 8
125  %updated_src = getelementptr inbounds i8, ptr %src, i32 2
126  ret ptr %updated_src
127}
128
129define ptr @test_vld2_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
130; CHECK-LABEL: test_vld2_dup_u8_update_reg:
131; CHECK:         vld2.8 {d16[], d17[]}, [r1], r2
132entry:
133  %tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1)
134  store %struct.uint8x8x2_t %tmp, ptr %dest, align 8
135  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
136  ret ptr %updated_src
137}
138
139define ptr @test_vld3_dup_u16_update(ptr %dest, ptr %src) {
140; CHECK-LABEL: test_vld3_dup_u16_update:
141; CHECK:         vld3.16 {d16[], d17[], d18[]}, [r1]!
142entry:
143  %tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2)
144  store %struct.uint16x4x3_t %tmp, ptr %dest, align 8
145  %updated_src = getelementptr inbounds i8, ptr %src, i32 6
146  ret ptr %updated_src
147}
148
149define ptr @test_vld3_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
150; CHECK-LABEL: test_vld3_dup_u16_update_reg:
151; CHECK:         vld3.16 {d16[], d17[], d18[]}, [r1], r2
152entry:
153  %tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2)
154  store %struct.uint16x4x3_t %tmp, ptr %dest, align 8
155  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
156  ret ptr %updated_src
157}
158
159define ptr @test_vld3_dup_u32_update(ptr %dest, ptr %src) {
160; CHECK-LABEL: test_vld3_dup_u32_update:
161; CHECK:         vld3.32 {d16[], d17[], d18[]}, [r1]!
162entry:
163  %tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr %src, i32 4)
164  store %struct.uint32x2x3_t %tmp, ptr %dest, align 8
165  %updated_src = getelementptr inbounds i8, ptr %src, i32 12
166  ret ptr %updated_src
167}
168
169define ptr @test_vld3_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
170; CHECK-LABEL: test_vld3_dup_u32_update_reg:
171; CHECK:         vld3.32 {d16[], d17[], d18[]}, [r1], r2
172entry:
173  %tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr %src, i32 4)
174  store %struct.uint32x2x3_t %tmp, ptr %dest, align 8
175  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
176  ret ptr %updated_src
177}
178
179define ptr @test_vld3_dup_u64_update(ptr %dest, ptr %src) {
180; CHECK-LABEL: test_vld3_dup_u64_update:
181; CHECK:         vld1.64 {d16, d17, d18}, [r1]!
182entry:
183  %tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr %src, i32 8)
184  store %struct.uint64x1x3_t %tmp, ptr %dest, align 8
185  %updated_src = getelementptr inbounds i8, ptr %src, i32 24
186  ret ptr %updated_src
187}
188
189define ptr @test_vld3_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) {
190; CHECK-LABEL: test_vld3_dup_u64_update_reg:
191; CHECK:         vld1.64 {d16, d17, d18}, [r1], r2
192entry:
193  %tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr %src, i32 8)
194  store %struct.uint64x1x3_t %tmp, ptr %dest, align 8
195  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
196  ret ptr %updated_src
197}
198
199define ptr @test_vld3_dup_u8_update(ptr %dest, ptr %src) {
200; CHECK-LABEL: test_vld3_dup_u8_update:
201; CHECK:         vld3.8 {d16[], d17[], d18[]}, [r1]!
202entry:
203  %tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1)
204  store %struct.uint8x8x3_t %tmp, ptr %dest, align 8
205  %updated_src = getelementptr inbounds i8, ptr %src, i32 3
206  ret ptr %updated_src
207}
208
209define ptr @test_vld3_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
210; CHECK-LABEL: test_vld3_dup_u8_update_reg:
211; CHECK:         vld3.8 {d16[], d17[], d18[]}, [r1], r2
212entry:
213  %tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1)
214  store %struct.uint8x8x3_t %tmp, ptr %dest, align 8
215  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
216  ret ptr %updated_src
217}
218
219define ptr @test_vld4_dup_u16_update(ptr %dest, ptr %src) {
220; CHECK-LABEL: test_vld4_dup_u16_update:
221; CHECK:         vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
222entry:
223  %tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2)
224  store %struct.uint16x4x4_t %tmp, ptr %dest, align 8
225  %updated_src = getelementptr inbounds i8, ptr %src, i32 8
226  ret ptr %updated_src
227}
228
229define ptr @test_vld4_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
230; CHECK-LABEL: test_vld4_dup_u16_update_reg:
231; CHECK:         vld4.16 {d16[], d17[], d18[], d19[]}, [r1], r2
232entry:
233  %tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2)
234  store %struct.uint16x4x4_t %tmp, ptr %dest, align 8
235  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
236  ret ptr %updated_src
237}
238
239define ptr @test_vld4_dup_u32_update(ptr %dest, ptr %src) {
240; CHECK-LABEL: test_vld4_dup_u32_update:
241; CHECK:         vld4.32 {d16[], d17[], d18[], d19[]}, [r1]!
242entry:
243  %tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr %src, i32 4)
244  store %struct.uint32x2x4_t %tmp, ptr %dest, align 8
245  %updated_src = getelementptr inbounds i8, ptr %src, i32 16
246  ret ptr %updated_src
247}
248
249define ptr @test_vld4_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
250; CHECK-LABEL: test_vld4_dup_u32_update_reg:
251; CHECK:         vld4.32 {d16[], d17[], d18[], d19[]}, [r1], r2
252entry:
253  %tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr %src, i32 4)
254  store %struct.uint32x2x4_t %tmp, ptr %dest, align 8
255  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
256  ret ptr %updated_src
257}
258
259define ptr @test_vld4_dup_u64_update(ptr %dest, ptr %src) {
260; CHECK-LABEL: test_vld4_dup_u64_update:
261; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:64]!
262entry:
263  %tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr %src, i32 8)
264  store %struct.uint64x1x4_t %tmp, ptr %dest, align 8
265  %updated_src = getelementptr inbounds i8, ptr %src, i32 32
266  ret ptr %updated_src
267}
268
269define ptr @test_vld4_dup_u64_update_reg(ptr %dest, ptr %src, i32 %inc) {
270; CHECK-LABEL: test_vld4_dup_u64_update_reg:
271; CHECK:         vld1.64 {d16, d17, d18, d19}, [r1:64], r2
272entry:
273  %tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr %src, i32 8)
274  store %struct.uint64x1x4_t %tmp, ptr %dest, align 8
275  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
276  ret ptr %updated_src
277}
278
279define ptr @test_vld4_dup_u8_update(ptr %dest, ptr %src) {
280; CHECK-LABEL: test_vld4_dup_u8_update:
281; CHECK:         vld4.8 {d16[], d17[], d18[], d19[]}, [r1]!
282entry:
283  %tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1)
284  store %struct.uint8x8x4_t %tmp, ptr %dest, align 8
285  %updated_src = getelementptr inbounds i8, ptr %src, i32 4
286  ret ptr %updated_src
287}
288
289define ptr @test_vld4_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
290; CHECK-LABEL: test_vld4_dup_u8_update_reg:
291; CHECK:         vld4.8 {d16[], d17[], d18[], d19[]}, [r1], r2
292entry:
293  %tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1)
294  store %struct.uint8x8x4_t %tmp, ptr %dest, align 8
295  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
296  ret ptr %updated_src
297}
298
299define ptr @test_vld2q_dup_u16_update(ptr %dest, ptr %src, ptr %dest0) {
300; CHECK-LABEL: test_vld2q_dup_u16_update:
301; CHECK:         vld2.16 {d16[], d18[]}, [r1]
302; CHECK-NEXT:    vld2.16 {d17[], d19[]}, [r1]!
303entry:
304  %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2)
305  store %struct.uint16x8x2_t %tmp, ptr %dest, align 8
306  %updated_src = getelementptr inbounds i8, ptr %src, i32 4
307  ret ptr %updated_src
308}
309
310define ptr @test_vld2q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
311; CHECK-LABEL: test_vld2q_dup_u16_update_reg:
312; CHECK:         vld2.16 {d16[], d18[]}, [r1]
313; CHECK-NEXT:    vld2.16 {d17[], d19[]}, [r1], r2
314entry:
315  %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2)
316  store %struct.uint16x8x2_t %tmp, ptr %dest, align 8
317  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
318  ret ptr %updated_src
319}
320
321define ptr @test_vld2q_dup_u32_update(ptr %dest, ptr %src) {
322; CHECK-LABEL: test_vld2q_dup_u32_update:
323; CHECK:         vld2.32 {d16[], d18[]}, [r1]
324; CHECK-NEXT:    vld2.32 {d17[], d19[]}, [r1]!
325entry:
326  %tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr %src, i32 4)
327  store %struct.uint32x4x2_t %tmp, ptr %dest, align 8
328  %updated_src = getelementptr inbounds i8, ptr %src, i32 8
329  ret ptr %updated_src
330}
331
332define ptr @test_vld2q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
333; CHECK-LABEL: test_vld2q_dup_u32_update_reg:
334; CHECK:         vld2.32 {d16[], d18[]}, [r1]
335; CHECK-NEXT:    vld2.32 {d17[], d19[]}, [r1], r2
336entry:
337  %tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr %src, i32 4)
338  store %struct.uint32x4x2_t %tmp, ptr %dest, align 8
339  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
340  ret ptr %updated_src
341}
342
343define ptr @test_vld2q_dup_u8_update(ptr %dest, ptr %src) {
344; CHECK-LABEL: test_vld2q_dup_u8_update:
345; CHECK:         vld2.8 {d16[], d18[]}, [r1]
346; CHECK-NEXT:    vld2.8 {d17[], d19[]}, [r1]!
347entry:
348  %tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1)
349  store %struct.uint8x16x2_t %tmp, ptr %dest, align 8
350  %updated_src = getelementptr inbounds i8, ptr %src, i32 2
351  ret ptr %updated_src
352}
353
354define ptr @test_vld2q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
355; CHECK-LABEL: test_vld2q_dup_u8_update_reg:
356; CHECK:         vld2.8 {d16[], d18[]}, [r1]
357; CHECK-NEXT:    vld2.8 {d17[], d19[]}, [r1], r2
358entry:
359  %tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1)
360  store %struct.uint8x16x2_t %tmp, ptr %dest, align 8
361  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
362  ret ptr %updated_src
363}
364
365define ptr @test_vld3q_dup_u16_update(ptr %dest, ptr %src) {
366; CHECK-LABEL: test_vld3q_dup_u16_update:
367; CHECK:         vld3.16 {d16[], d18[], d20[]}, [r1]
368; CHECK:         vld3.16 {d17[], d19[], d21[]}, [r1]!
369entry:
370  %tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2)
371  store %struct.uint16x8x3_t %tmp, ptr %dest, align 8
372  %updated_src = getelementptr inbounds i8, ptr %src, i32 6
373  ret ptr %updated_src
374}
375
376define ptr @test_vld3q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
377; CHECK-LABEL: test_vld3q_dup_u16_update_reg:
378; CHECK:         vld3.16 {d16[], d18[], d20[]}, [r1]
379; CHECK-NEXT:    vld3.16 {d17[], d19[], d21[]}, [r1], r2
380entry:
381  %tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2)
382  store %struct.uint16x8x3_t %tmp, ptr %dest, align 8
383  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
384  ret ptr %updated_src
385}
386
387define ptr @test_vld3q_dup_u32_update(ptr %dest, ptr %src) {
388; CHECK-LABEL: test_vld3q_dup_u32_update:
389; CHECK:         vld3.32 {d16[], d18[], d20[]}, [r1]
390; CHECK:         vld3.32 {d17[], d19[], d21[]}, [r1]!
391entry:
392  %tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr %src, i32 4)
393  store %struct.uint32x4x3_t %tmp, ptr %dest, align 8
394  %updated_src = getelementptr inbounds i8, ptr %src, i32 12
395  ret ptr %updated_src
396}
397
398define ptr @test_vld3q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
399; CHECK-LABEL: test_vld3q_dup_u32_update_reg:
400; CHECK:         vld3.32 {d16[], d18[], d20[]}, [r1]
401; CHECK-NEXT:    vld3.32 {d17[], d19[], d21[]}, [r1], r2
402entry:
403  %tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr %src, i32 4)
404  store %struct.uint32x4x3_t %tmp, ptr %dest, align 8
405  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
406  ret ptr %updated_src
407}
408
409define ptr @test_vld3q_dup_u8_update(ptr %dest, ptr %src) {
410; CHECK-LABEL: test_vld3q_dup_u8_update:
411; CHECK:         vld3.8 {d16[], d18[], d20[]}, [r1]
412; CHECK:         vld3.8 {d17[], d19[], d21[]}, [r1]!
413entry:
414  %tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1)
415  store %struct.uint8x16x3_t %tmp, ptr %dest, align 8
416  %updated_src = getelementptr inbounds i8, ptr %src, i32 3
417  ret ptr %updated_src
418}
419
420define ptr @test_vld3q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
421; CHECK-LABEL: test_vld3q_dup_u8_update_reg:
422; CHECK:         vld3.8 {d16[], d18[], d20[]}, [r1]
423; CHECK-NEXT:    vld3.8 {d17[], d19[], d21[]}, [r1], r2
424entry:
425  %tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1)
426  store %struct.uint8x16x3_t %tmp, ptr %dest, align 8
427  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
428  ret ptr %updated_src
429}
430
431define ptr @test_vld4q_dup_u16_update(ptr %dest, ptr %src) {
432; CHECK-LABEL: test_vld4q_dup_u16_update:
433; CHECK:         vld4.16 {d16[], d18[], d20[], d22[]}, [r1]
434; CHECK:         vld4.16 {d17[], d19[], d21[], d23[]}, [r1]!
435entry:
436  %tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2)
437  store %struct.uint16x8x4_t %tmp, ptr %dest, align 8
438  %updated_src = getelementptr inbounds i8, ptr %src, i32 8
439  ret ptr %updated_src
440}
441
442define ptr @test_vld4q_dup_u16_update_reg(ptr %dest, ptr %src, i32 %inc) {
443; CHECK-LABEL: test_vld4q_dup_u16_update_reg:
444; CHECK:         vld4.16 {d16[], d18[], d20[], d22[]}, [r1]
445; CHECK-NEXT:    vld4.16 {d17[], d19[], d21[], d23[]}, [r1], r2
446entry:
447  %tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2)
448  store %struct.uint16x8x4_t %tmp, ptr %dest, align 8
449  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
450  ret ptr %updated_src
451}
452
453define ptr @test_vld4q_dup_u32_update(ptr %dest, ptr %src) {
454; CHECK-LABEL: test_vld4q_dup_u32_update:
455; CHECK:         vld4.32 {d16[], d18[], d20[], d22[]}, [r1]
456; CHECK:         vld4.32 {d17[], d19[], d21[], d23[]}, [r1]!
457entry:
458  %tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr %src, i32 4)
459  store %struct.uint32x4x4_t %tmp, ptr %dest, align 8
460  %updated_src = getelementptr inbounds i8, ptr %src, i32 16
461  ret ptr %updated_src
462}
463
464define ptr @test_vld4q_dup_u32_update_reg(ptr %dest, ptr %src, i32 %inc) {
465; CHECK-LABEL: test_vld4q_dup_u32_update_reg:
466; CHECK:         vld4.32 {d16[], d18[], d20[], d22[]}, [r1]
467; CHECK-NEXT:    vld4.32 {d17[], d19[], d21[], d23[]}, [r1], r2
468entry:
469  %tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr %src, i32 4)
470  store %struct.uint32x4x4_t %tmp, ptr %dest, align 8
471  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
472  ret ptr %updated_src
473}
474
475define ptr @test_vld4q_dup_u8_update(ptr %dest, ptr %src) {
476; CHECK-LABEL: test_vld4q_dup_u8_update:
477; CHECK:         vld4.8 {d16[], d18[], d20[], d22[]}, [r1]
478; CHECK:         vld4.8 {d17[], d19[], d21[], d23[]}, [r1]!
479entry:
480  %tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1)
481  store %struct.uint8x16x4_t %tmp, ptr %dest, align 8
482  %updated_src = getelementptr inbounds i8, ptr %src, i32 4
483  ret ptr %updated_src
484}
485
486define ptr @test_vld4q_dup_u8_update_reg(ptr %dest, ptr %src, i32 %inc) {
487; CHECK-LABEL: test_vld4q_dup_u8_update_reg:
488; CHECK:         vld4.8 {d16[], d18[], d20[], d22[]}, [r1]
489; CHECK-NEXT:    vld4.8 {d17[], d19[], d21[], d23[]}, [r1], r2
490entry:
491  %tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1)
492  store %struct.uint8x16x4_t %tmp, ptr %dest, align 8
493  %updated_src = getelementptr inbounds i8, ptr %src, i32 %inc
494  ret ptr %updated_src
495}
496