xref: /llvm-project/llvm/test/CodeGen/ARM/arm-vlddup.ll (revision de75e5079ae1d4894c918fd452e468fb6a888be1)
1; RUN: llc < %s -mtriple=armv8-linux-gnueabi --float-abi=hard -verify-machineinstrs \
2; RUN:     -asm-verbose=false | FileCheck %s
3
4%struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> }
5%struct.uint16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
6%struct.uint16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
7
8%struct.uint32x2x2_t = type { <2 x i32>, <2 x i32> }
9%struct.uint32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
10%struct.uint32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
11
12%struct.uint64x1x2_t = type { <1 x i64>, <1 x i64> }
13%struct.uint64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
14%struct.uint64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
15
16%struct.uint8x8x2_t = type { <8 x i8>, <8 x i8> }
17%struct.uint8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
18%struct.uint8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
19
20%struct.uint16x8x2_t = type { <8 x i16>, <8 x i16> }
21%struct.uint16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
22%struct.uint16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
23
24%struct.uint32x4x2_t = type { <4 x i32>, <4 x i32> }
25%struct.uint32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
26%struct.uint32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
27
28%struct.uint8x16x2_t = type { <16 x i8>, <16 x i8> }
29%struct.uint8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> }
30%struct.uint8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
31
32declare %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr, i32)
33declare %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr, i32)
34declare %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr, i32)
35declare %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr, i32)
36
37declare %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr, i32)
38declare %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr, i32)
39declare %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr, i32)
40declare %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr, i32)
41
42declare %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr, i32)
43declare %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr, i32)
44declare %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr, i32)
45declare %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr, i32)
46
47declare %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr, i32)
48declare %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr, i32)
49declare %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr, i32)
50
51declare %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr, i32)
52declare %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr, i32)
53declare %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr, i32)
54
55declare %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr, i32)
56declare %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr, i32)
57declare %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr, i32)
58
59define %struct.uint16x4x2_t @test_vld2_dup_u16(ptr %src) {
60; CHECK-LABEL: test_vld2_dup_u16:
61; CHECK:         vld2.16 {d0[], d1[]}, [r0]
62; CHECK-NEXT:    bx lr
63entry:
64  %tmp = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld2dup.v4i16.p0(ptr %src, i32 2)
65  ret %struct.uint16x4x2_t %tmp
66}
67
68define %struct.uint32x2x2_t @test_vld2_dup_u32(ptr %src) {
69; CHECK-LABEL: test_vld2_dup_u32:
70; CHECK:         vld2.32 {d0[], d1[]}, [r0]
71; CHECK-NEXT:    bx lr
72entry:
73  %tmp = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld2dup.v2i32.p0(ptr %src, i32 4)
74  ret %struct.uint32x2x2_t %tmp
75}
76
77define %struct.uint64x1x2_t @test_vld2_dup_u64(ptr %src) {
78; CHECK-LABEL: test_vld2_dup_u64:
79; CHECK:         vld1.64 {d0, d1}, [r0:64]
80; CHECK-NEXT:    bx lr
81entry:
82  %tmp = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld2dup.v1i64.p0(ptr %src, i32 8)
83  ret %struct.uint64x1x2_t %tmp
84}
85
86define %struct.uint8x8x2_t @test_vld2_dup_u8(ptr %src) {
87; CHECK-LABEL: test_vld2_dup_u8:
88; CHECK:         vld2.8 {d0[], d1[]}, [r0]
89; CHECK-NEXT:    bx lr
90entry:
91  %tmp = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld2dup.v8i8.p0(ptr %src, i32 1)
92  ret %struct.uint8x8x2_t %tmp
93}
94
95define %struct.uint16x4x3_t @test_vld3_dup_u16(ptr %src) {
96; CHECK-LABEL: test_vld3_dup_u16:
97; CHECK:         vld3.16 {d0[], d1[], d2[]}, [r0]
98; CHECK-NEXT:    bx lr
99entry:
100  %tmp = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld3dup.v4i16.p0(ptr %src, i32 2)
101  ret %struct.uint16x4x3_t %tmp
102}
103
104define %struct.uint32x2x3_t @test_vld3_dup_u32(ptr %src) {
105; CHECK-LABEL: test_vld3_dup_u32:
106; CHECK:         vld3.32 {d0[], d1[], d2[]}, [r0]
107; CHECK-NEXT:    bx lr
108entry:
109  %tmp = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld3dup.v2i32.p0(ptr %src, i32 4)
110  ret %struct.uint32x2x3_t %tmp
111}
112
113define %struct.uint64x1x3_t @test_vld3_dup_u64(ptr %src) {
114; CHECK-LABEL: test_vld3_dup_u64:
115; CHECK:         vld1.64 {d0, d1, d2}, [r0]
116; CHECK-NEXT:    bx lr
117entry:
118  %tmp = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld3dup.v1i64.p0(ptr %src, i32 8)
119  ret %struct.uint64x1x3_t %tmp
120}
121
122define %struct.uint8x8x3_t @test_vld3_dup_u8(ptr %src) {
123; CHECK-LABEL: test_vld3_dup_u8:
124; CHECK:         vld3.8 {d0[], d1[], d2[]}, [r0]
125; CHECK-NEXT:    bx lr
126entry:
127  %tmp = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld3dup.v8i8.p0(ptr %src, i32 1)
128  ret %struct.uint8x8x3_t %tmp
129}
130
131define %struct.uint16x4x4_t @test_vld4_dup_u16(ptr %src) {
132; CHECK-LABEL: test_vld4_dup_u16:
133; CHECK:         vld4.16 {d0[], d1[], d2[], d3[]}, [r0]
134; CHECK-NEXT:    bx lr
135entry:
136  %tmp = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld4dup.v4i16.p0(ptr %src, i32 2)
137  ret %struct.uint16x4x4_t %tmp
138}
139
140define %struct.uint32x2x4_t @test_vld4_dup_u32(ptr %src) {
141; CHECK-LABEL: test_vld4_dup_u32:
142; CHECK:         vld4.32 {d0[], d1[], d2[], d3[]}, [r0]
143; CHECK-NEXT:    bx lr
144entry:
145  %tmp = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld4dup.v2i32.p0(ptr %src, i32 4)
146  ret %struct.uint32x2x4_t %tmp
147}
148
149define %struct.uint64x1x4_t @test_vld4_dup_u64(ptr %src) {
150; CHECK-LABEL: test_vld4_dup_u64:
151; CHECK:         vld1.64 {d0, d1, d2, d3}, [r0:64]
152; CHECK-NEXT:    bx lr
153entry:
154  %tmp = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld4dup.v1i64.p0(ptr %src, i32 8)
155  ret %struct.uint64x1x4_t %tmp
156}
157
158define %struct.uint8x8x4_t @test_vld4_dup_u8(ptr %src) {
159; CHECK-LABEL: test_vld4_dup_u8:
160; CHECK:         vld4.8 {d0[], d1[], d2[], d3[]}, [r0]
161; CHECK-NEXT:    bx lr
162entry:
163  %tmp = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld4dup.v8i8.p0(ptr %src, i32 1)
164  ret %struct.uint8x8x4_t %tmp
165}
166
167define %struct.uint16x8x2_t @test_vld2q_dup_u16(ptr %src) {
168; CHECK-LABEL: test_vld2q_dup_u16:
169; CHECK:         vld2.16 {d0[], d2[]}, [r0]
170; CHECK-NEXT:    vld2.16 {d1[], d3[]}, [r0]
171; CHECK-NEXT:    bx lr
172entry:
173  %tmp = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld2dup.v8i16.p0(ptr %src, i32 2)
174  ret %struct.uint16x8x2_t %tmp
175}
176
177define %struct.uint32x4x2_t @test_vld2q_dup_u32(ptr %src) {
178; CHECK-LABEL: test_vld2q_dup_u32:
179; CHECK:         vld2.32 {d0[], d2[]}, [r0]
180; CHECK-NEXT:    vld2.32 {d1[], d3[]}, [r0]
181; CHECK-NEXT:    bx lr
182entry:
183  %tmp = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld2dup.v4i32.p0(ptr %src, i32 4)
184  ret %struct.uint32x4x2_t %tmp
185}
186
187define %struct.uint8x16x2_t @test_vld2q_dup_u8(ptr %src) {
188; CHECK-LABEL: test_vld2q_dup_u8:
189; CHECK:         vld2.8 {d0[], d2[]}, [r0]
190; CHECK-NEXT:    vld2.8 {d1[], d3[]}, [r0]
191; CHECK-NEXT:    bx lr
192entry:
193  %tmp = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld2dup.v16i8.p0(ptr %src, i32 1)
194  ret %struct.uint8x16x2_t %tmp
195}
196
197define %struct.uint16x8x3_t @test_vld3q_dup_u16(ptr %src) {
198; CHECK-LABEL: test_vld3q_dup_u16:
199; CHECK:         vld3.16 {d0[], d2[], d4[]}, [r0]
200; CHECK-NEXT:    vld3.16 {d1[], d3[], d5[]}, [r0]
201; CHECK-NEXT:    bx lr
202entry:
203  %tmp = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld3dup.v8i16.p0(ptr %src, i32 2)
204  ret %struct.uint16x8x3_t %tmp
205}
206
207define %struct.uint32x4x3_t @test_vld3q_dup_u32(ptr %src) {
208; CHECK-LABEL: test_vld3q_dup_u32:
209; CHECK:         vld3.32 {d0[], d2[], d4[]}, [r0]
210; CHECK-NEXT:    vld3.32 {d1[], d3[], d5[]}, [r0]
211; CHECK-NEXT:    bx lr
212entry:
213  %tmp = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld3dup.v4i32.p0(ptr %src, i32 4)
214  ret %struct.uint32x4x3_t %tmp
215}
216
217define %struct.uint8x16x3_t @test_vld3q_dup_u8(ptr %src) {
218; CHECK-LABEL: test_vld3q_dup_u8:
219; CHECK:         vld3.8 {d0[], d2[], d4[]}, [r0]
220; CHECK-NEXT:    vld3.8 {d1[], d3[], d5[]}, [r0]
221; CHECK-NEXT:    bx lr
222entry:
223  %tmp = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld3dup.v16i8.p0(ptr %src, i32 1)
224  ret %struct.uint8x16x3_t %tmp
225}
226
227define %struct.uint16x8x4_t @test_vld4q_dup_u16(ptr %src) {
228; CHECK-LABEL: test_vld4q_dup_u16:
229; CHECK:         vld4.16 {d0[], d2[], d4[], d6[]}, [r0]
230; CHECK-NEXT:    vld4.16 {d1[], d3[], d5[], d7[]}, [r0]
231; CHECK-NEXT:    bx lr
232entry:
233  %tmp = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld4dup.v8i16.p0(ptr %src, i32 2)
234  ret %struct.uint16x8x4_t %tmp
235}
236
237define %struct.uint32x4x4_t @test_vld4q_dup_u32(ptr %src) {
238; CHECK-LABEL: test_vld4q_dup_u32:
239; CHECK:         vld4.32 {d0[], d2[], d4[], d6[]}, [r0]
240; CHECK-NEXT:    vld4.32 {d1[], d3[], d5[], d7[]}, [r0]
241; CHECK-NEXT:    bx lr
242entry:
243  %tmp = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld4dup.v4i32.p0(ptr %src, i32 4)
244  ret %struct.uint32x4x4_t %tmp
245}
246
247define %struct.uint8x16x4_t @test_vld4q_dup_u8(ptr %src) {
248; CHECK-LABEL: test_vld4q_dup_u8:
249; CHECK:         vld4.8 {d0[], d2[], d4[], d6[]}, [r0]
250; CHECK-NEXT:    vld4.8 {d1[], d3[], d5[], d7[]}, [r0]
251; CHECK-NEXT:    bx lr
252entry:
253  %tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld4dup.v16i8.p0(ptr %src, i32 1)
254  ret %struct.uint8x16x4_t %tmp
255}
256