xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-ld1.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3; RUN: llc < %s -global-isel=1 -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
5%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
6%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
7%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>, <8 x i8>,  <8 x i8> }
8
9define %struct.__neon_int8x8x2_t @ld2_8b(ptr %A) nounwind {
10; CHECK-LABEL: ld2_8b:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    ld2.8b { v0, v1 }, [x0]
13; CHECK-NEXT:    ret
14; Make sure we are loading into the results defined by the ABI (i.e., v0, v1)
15; and from the argument of the function also defined by ABI (i.e., x0)
16	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
17	ret %struct.__neon_int8x8x2_t  %tmp2
18}
19
20define %struct.__neon_int8x8x3_t @ld3_8b(ptr %A) nounwind {
21; CHECK-LABEL: ld3_8b:
22; CHECK:       // %bb.0:
23; CHECK-NEXT:    ld3.8b { v0, v1, v2 }, [x0]
24; CHECK-NEXT:    ret
25; Make sure we are using the operands defined by the ABI
26	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0(ptr %A)
27	ret %struct.__neon_int8x8x3_t  %tmp2
28}
29
30define %struct.__neon_int8x8x4_t @ld4_8b(ptr %A) nounwind {
31; CHECK-LABEL: ld4_8b:
32; CHECK:       // %bb.0:
33; CHECK-NEXT:    ld4.8b { v0, v1, v2, v3 }, [x0]
34; CHECK-NEXT:    ret
35; Make sure we are using the operands defined by the ABI
36	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0(ptr %A)
37	ret %struct.__neon_int8x8x4_t  %tmp2
38}
39
40declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0(ptr) nounwind readonly
41declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0(ptr) nounwind readonly
42declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0(ptr) nounwind readonly
43
44%struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
45%struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
46%struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>, <16 x i8>,  <16 x i8> }
47
48define %struct.__neon_int8x16x2_t @ld2_16b(ptr %A) nounwind {
49; CHECK-LABEL: ld2_16b:
50; CHECK:       // %bb.0:
51; CHECK-NEXT:    ld2.16b { v0, v1 }, [x0]
52; CHECK-NEXT:    ret
53; Make sure we are using the operands defined by the ABI
54  %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0(ptr %A)
55  ret %struct.__neon_int8x16x2_t  %tmp2
56}
57
58define %struct.__neon_int8x16x3_t @ld3_16b(ptr %A) nounwind {
59; CHECK-LABEL: ld3_16b:
60; CHECK:       // %bb.0:
61; CHECK-NEXT:    ld3.16b { v0, v1, v2 }, [x0]
62; CHECK-NEXT:    ret
63; Make sure we are using the operands defined by the ABI
64  %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0(ptr %A)
65  ret %struct.__neon_int8x16x3_t  %tmp2
66}
67
68define %struct.__neon_int8x16x4_t @ld4_16b(ptr %A) nounwind {
69; CHECK-LABEL: ld4_16b:
70; CHECK:       // %bb.0:
71; CHECK-NEXT:    ld4.16b { v0, v1, v2, v3 }, [x0]
72; CHECK-NEXT:    ret
73; Make sure we are using the operands defined by the ABI
74  %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0(ptr %A)
75  ret %struct.__neon_int8x16x4_t  %tmp2
76}
77
78declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0(ptr) nounwind readonly
79declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0(ptr) nounwind readonly
80declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0(ptr) nounwind readonly
81
82%struct.__neon_int16x4x2_t = type { <4 x i16>,  <4 x i16> }
83%struct.__neon_int16x4x3_t = type { <4 x i16>,  <4 x i16>,  <4 x i16> }
84%struct.__neon_int16x4x4_t = type { <4 x i16>,  <4 x i16>, <4 x i16>,  <4 x i16> }
85
86define %struct.__neon_int16x4x2_t @ld2_4h(ptr %A) nounwind {
87; CHECK-LABEL: ld2_4h:
88; CHECK:       // %bb.0:
89; CHECK-NEXT:    ld2.4h { v0, v1 }, [x0]
90; CHECK-NEXT:    ret
91; Make sure we are using the operands defined by the ABI
92	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0(ptr %A)
93	ret %struct.__neon_int16x4x2_t  %tmp2
94}
95
96define %struct.__neon_int16x4x3_t @ld3_4h(ptr %A) nounwind {
97; CHECK-LABEL: ld3_4h:
98; CHECK:       // %bb.0:
99; CHECK-NEXT:    ld3.4h { v0, v1, v2 }, [x0]
100; CHECK-NEXT:    ret
101; Make sure we are using the operands defined by the ABI
102	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0(ptr %A)
103	ret %struct.__neon_int16x4x3_t  %tmp2
104}
105
106define %struct.__neon_int16x4x4_t @ld4_4h(ptr %A) nounwind {
107; CHECK-LABEL: ld4_4h:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    ld4.4h { v0, v1, v2, v3 }, [x0]
110; CHECK-NEXT:    ret
111; Make sure we are using the operands defined by the ABI
112	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0(ptr %A)
113	ret %struct.__neon_int16x4x4_t  %tmp2
114}
115
116declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0(ptr) nounwind readonly
117declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0(ptr) nounwind readonly
118declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0(ptr) nounwind readonly
119
120%struct.__neon_int16x8x2_t = type { <8 x i16>,  <8 x i16> }
121%struct.__neon_int16x8x3_t = type { <8 x i16>,  <8 x i16>,  <8 x i16> }
122%struct.__neon_int16x8x4_t = type { <8 x i16>,  <8 x i16>, <8 x i16>,  <8 x i16> }
123
124define %struct.__neon_int16x8x2_t @ld2_8h(ptr %A) nounwind {
125; CHECK-LABEL: ld2_8h:
126; CHECK:       // %bb.0:
127; CHECK-NEXT:    ld2.8h { v0, v1 }, [x0]
128; CHECK-NEXT:    ret
129; Make sure we are using the operands defined by the ABI
130  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0(ptr %A)
131  ret %struct.__neon_int16x8x2_t  %tmp2
132}
133
134define %struct.__neon_int16x8x3_t @ld3_8h(ptr %A) nounwind {
135; CHECK-LABEL: ld3_8h:
136; CHECK:       // %bb.0:
137; CHECK-NEXT:    ld3.8h { v0, v1, v2 }, [x0]
138; CHECK-NEXT:    ret
139; Make sure we are using the operands defined by the ABI
140  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0(ptr %A)
141  ret %struct.__neon_int16x8x3_t %tmp2
142}
143
144define %struct.__neon_int16x8x4_t @ld4_8h(ptr %A) nounwind {
145; CHECK-LABEL: ld4_8h:
146; CHECK:       // %bb.0:
147; CHECK-NEXT:    ld4.8h { v0, v1, v2, v3 }, [x0]
148; CHECK-NEXT:    ret
149; Make sure we are using the operands defined by the ABI
150  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0(ptr %A)
151  ret %struct.__neon_int16x8x4_t  %tmp2
152}
153
154declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0(ptr) nounwind readonly
155declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0(ptr) nounwind readonly
156declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0(ptr) nounwind readonly
157
158%struct.__neon_int32x2x2_t = type { <2 x i32>,  <2 x i32> }
159%struct.__neon_int32x2x3_t = type { <2 x i32>,  <2 x i32>,  <2 x i32> }
160%struct.__neon_int32x2x4_t = type { <2 x i32>,  <2 x i32>, <2 x i32>,  <2 x i32> }
161
162define %struct.__neon_int32x2x2_t @ld2_2s(ptr %A) nounwind {
163; CHECK-LABEL: ld2_2s:
164; CHECK:       // %bb.0:
165; CHECK-NEXT:    ld2.2s { v0, v1 }, [x0]
166; CHECK-NEXT:    ret
167; Make sure we are using the operands defined by the ABI
168	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0(ptr %A)
169	ret %struct.__neon_int32x2x2_t  %tmp2
170}
171
172define %struct.__neon_int32x2x3_t @ld3_2s(ptr %A) nounwind {
173; CHECK-LABEL: ld3_2s:
174; CHECK:       // %bb.0:
175; CHECK-NEXT:    ld3.2s { v0, v1, v2 }, [x0]
176; CHECK-NEXT:    ret
177; Make sure we are using the operands defined by the ABI
178	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0(ptr %A)
179	ret %struct.__neon_int32x2x3_t  %tmp2
180}
181
182define %struct.__neon_int32x2x4_t @ld4_2s(ptr %A) nounwind {
183; CHECK-LABEL: ld4_2s:
184; CHECK:       // %bb.0:
185; CHECK-NEXT:    ld4.2s { v0, v1, v2, v3 }, [x0]
186; CHECK-NEXT:    ret
187; Make sure we are using the operands defined by the ABI
188	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0(ptr %A)
189	ret %struct.__neon_int32x2x4_t  %tmp2
190}
191
192declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0(ptr) nounwind readonly
193declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0(ptr) nounwind readonly
194declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0(ptr) nounwind readonly
195
196%struct.__neon_int32x4x2_t = type { <4 x i32>,  <4 x i32> }
197%struct.__neon_int32x4x3_t = type { <4 x i32>,  <4 x i32>,  <4 x i32> }
198%struct.__neon_int32x4x4_t = type { <4 x i32>,  <4 x i32>, <4 x i32>,  <4 x i32> }
199
200define %struct.__neon_int32x4x2_t @ld2_4s(ptr %A) nounwind {
201; CHECK-LABEL: ld2_4s:
202; CHECK:       // %bb.0:
203; CHECK-NEXT:    ld2.4s { v0, v1 }, [x0]
204; CHECK-NEXT:    ret
205; Make sure we are using the operands defined by the ABI
206	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0(ptr %A)
207	ret %struct.__neon_int32x4x2_t  %tmp2
208}
209
210define %struct.__neon_int32x4x3_t @ld3_4s(ptr %A) nounwind {
211; CHECK-LABEL: ld3_4s:
212; CHECK:       // %bb.0:
213; CHECK-NEXT:    ld3.4s { v0, v1, v2 }, [x0]
214; CHECK-NEXT:    ret
215; Make sure we are using the operands defined by the ABI
216	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0(ptr %A)
217	ret %struct.__neon_int32x4x3_t  %tmp2
218}
219
220define %struct.__neon_int32x4x4_t @ld4_4s(ptr %A) nounwind {
221; CHECK-LABEL: ld4_4s:
222; CHECK:       // %bb.0:
223; CHECK-NEXT:    ld4.4s { v0, v1, v2, v3 }, [x0]
224; CHECK-NEXT:    ret
225; Make sure we are using the operands defined by the ABI
226	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0(ptr %A)
227	ret %struct.__neon_int32x4x4_t  %tmp2
228}
229
230declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0(ptr) nounwind readonly
231declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0(ptr) nounwind readonly
232declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0(ptr) nounwind readonly
233
234%struct.__neon_int64x2x2_t = type { <2 x i64>,  <2 x i64> }
235%struct.__neon_int64x2x3_t = type { <2 x i64>,  <2 x i64>,  <2 x i64> }
236%struct.__neon_int64x2x4_t = type { <2 x i64>,  <2 x i64>, <2 x i64>,  <2 x i64> }
237
238define %struct.__neon_int64x2x2_t @ld2_2d(ptr %A) nounwind {
239; CHECK-LABEL: ld2_2d:
240; CHECK:       // %bb.0:
241; CHECK-NEXT:    ld2.2d { v0, v1 }, [x0]
242; CHECK-NEXT:    ret
243; Make sure we are using the operands defined by the ABI
244	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0(ptr %A)
245	ret %struct.__neon_int64x2x2_t  %tmp2
246}
247
248define %struct.__neon_int64x2x3_t @ld3_2d(ptr %A) nounwind {
249; CHECK-LABEL: ld3_2d:
250; CHECK:       // %bb.0:
251; CHECK-NEXT:    ld3.2d { v0, v1, v2 }, [x0]
252; CHECK-NEXT:    ret
253; Make sure we are using the operands defined by the ABI
254	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0(ptr %A)
255	ret %struct.__neon_int64x2x3_t  %tmp2
256}
257
258define %struct.__neon_int64x2x4_t @ld4_2d(ptr %A) nounwind {
259; CHECK-LABEL: ld4_2d:
260; CHECK:       // %bb.0:
261; CHECK-NEXT:    ld4.2d { v0, v1, v2, v3 }, [x0]
262; CHECK-NEXT:    ret
263; Make sure we are using the operands defined by the ABI
264	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0(ptr %A)
265	ret %struct.__neon_int64x2x4_t  %tmp2
266}
267
268declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0(ptr) nounwind readonly
269declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0(ptr) nounwind readonly
270declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0(ptr) nounwind readonly
271
272%struct.__neon_int64x1x2_t = type { <1 x i64>,  <1 x i64> }
273%struct.__neon_int64x1x3_t = type { <1 x i64>,  <1 x i64>, <1 x i64> }
274%struct.__neon_int64x1x4_t = type { <1 x i64>,  <1 x i64>, <1 x i64>, <1 x i64> }
275
276
277define %struct.__neon_int64x1x2_t @ld2_1di64(ptr %A) nounwind {
278; CHECK-LABEL: ld2_1di64:
279; CHECK:       // %bb.0:
280; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0]
281; CHECK-NEXT:    ret
282; Make sure we are using the operands defined by the ABI
283	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0(ptr %A)
284	ret %struct.__neon_int64x1x2_t  %tmp2
285}
286
287define %struct.__neon_int64x1x3_t @ld3_1di64(ptr %A) nounwind {
288; CHECK-LABEL: ld3_1di64:
289; CHECK:       // %bb.0:
290; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
291; CHECK-NEXT:    ret
292; Make sure we are using the operands defined by the ABI
293	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0(ptr %A)
294	ret %struct.__neon_int64x1x3_t  %tmp2
295}
296
297define %struct.__neon_int64x1x4_t @ld4_1di64(ptr %A) nounwind {
298; CHECK-LABEL: ld4_1di64:
299; CHECK:       // %bb.0:
300; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
301; CHECK-NEXT:    ret
302; Make sure we are using the operands defined by the ABI
303	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0(ptr %A)
304	ret %struct.__neon_int64x1x4_t  %tmp2
305}
306
307
308declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0(ptr) nounwind readonly
309declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0(ptr) nounwind readonly
310declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0(ptr) nounwind readonly
311
312%struct.__neon_float64x1x2_t = type { <1 x double>,  <1 x double> }
313%struct.__neon_float64x1x3_t = type { <1 x double>,  <1 x double>, <1 x double> }
314%struct.__neon_float64x1x4_t = type { <1 x double>,  <1 x double>, <1 x double>, <1 x double> }
315
316
317define %struct.__neon_float64x1x2_t @ld2_1df64(ptr %A) nounwind {
318; CHECK-LABEL: ld2_1df64:
319; CHECK:       // %bb.0:
320; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0]
321; CHECK-NEXT:    ret
322; Make sure we are using the operands defined by the ABI
323	%tmp2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0(ptr %A)
324	ret %struct.__neon_float64x1x2_t  %tmp2
325}
326
327define %struct.__neon_float64x1x3_t @ld3_1df64(ptr %A) nounwind {
328; CHECK-LABEL: ld3_1df64:
329; CHECK:       // %bb.0:
330; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
331; CHECK-NEXT:    ret
332; Make sure we are using the operands defined by the ABI
333	%tmp2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0(ptr %A)
334	ret %struct.__neon_float64x1x3_t  %tmp2
335}
336
337define %struct.__neon_float64x1x4_t @ld4_1df64(ptr %A) nounwind {
338; CHECK-LABEL: ld4_1df64:
339; CHECK:       // %bb.0:
340; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
341; CHECK-NEXT:    ret
342; Make sure we are using the operands defined by the ABI
343	%tmp2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr %A)
344	ret %struct.__neon_float64x1x4_t  %tmp2
345}
346
347declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0(ptr) nounwind readonly
348declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0(ptr) nounwind readonly
349declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr) nounwind readonly
350
351
352define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, ptr %A) nounwind {
353; Make sure we are using the operands defined by the ABI
354; CHECK-SD-LABEL: ld2lane_16b:
355; CHECK-SD:       // %bb.0:
356; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
357; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
358; CHECK-SD-NEXT:    ld2.b { v0, v1 }[1], [x0]
359; CHECK-SD-NEXT:    ret
360;
361; CHECK-GI-LABEL: ld2lane_16b:
362; CHECK-GI:       // %bb.0:
363; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
364; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
365; CHECK-GI-NEXT:    ld2.b { v0, v1 }[1], [x0]
366; CHECK-GI-NEXT:    ret
367	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, i64 1, ptr %A)
368	ret %struct.__neon_int8x16x2_t  %tmp2
369}
370
371define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, ptr %A) nounwind {
372; Make sure we are using the operands defined by the ABI
373; CHECK-SD-LABEL: ld3lane_16b:
374; CHECK-SD:       // %bb.0:
375; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
376; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
377; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
378; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[1], [x0]
379; CHECK-SD-NEXT:    ret
380;
381; CHECK-GI-LABEL: ld3lane_16b:
382; CHECK-GI:       // %bb.0:
383; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
384; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
385; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
386; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[1], [x0]
387; CHECK-GI-NEXT:    ret
388	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, ptr %A)
389	ret %struct.__neon_int8x16x3_t  %tmp2
390}
391
392define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, ptr %A) nounwind {
393; Make sure we are using the operands defined by the ABI
394; CHECK-SD-LABEL: ld4lane_16b:
395; CHECK-SD:       // %bb.0:
396; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
397; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
398; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
399; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
400; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[1], [x0]
401; CHECK-SD-NEXT:    ret
402;
403; CHECK-GI-LABEL: ld4lane_16b:
404; CHECK-GI:       // %bb.0:
405; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
406; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
407; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
408; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
409; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[1], [x0]
410; CHECK-GI-NEXT:    ret
411	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, ptr %A)
412	ret %struct.__neon_int8x16x4_t  %tmp2
413}
414
415declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr) nounwind readonly
416declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readonly
417declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readonly
418
419define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, ptr %A) nounwind {
420; Make sure we are using the operands defined by the ABI
421; CHECK-SD-LABEL: ld2lane_8h:
422; CHECK-SD:       // %bb.0:
423; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
424; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
425; CHECK-SD-NEXT:    ld2.h { v0, v1 }[1], [x0]
426; CHECK-SD-NEXT:    ret
427;
428; CHECK-GI-LABEL: ld2lane_8h:
429; CHECK-GI:       // %bb.0:
430; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
431; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
432; CHECK-GI-NEXT:    ld2.h { v0, v1 }[1], [x0]
433; CHECK-GI-NEXT:    ret
434	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, i64 1, ptr %A)
435	ret %struct.__neon_int16x8x2_t  %tmp2
436}
437
438define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, ptr %A) nounwind {
439; Make sure we are using the operands defined by the ABI
440; CHECK-SD-LABEL: ld3lane_8h:
441; CHECK-SD:       // %bb.0:
442; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
443; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
444; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
445; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[1], [x0]
446; CHECK-SD-NEXT:    ret
447;
448; CHECK-GI-LABEL: ld3lane_8h:
449; CHECK-GI:       // %bb.0:
450; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
451; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
452; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
453; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[1], [x0]
454; CHECK-GI-NEXT:    ret
455	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, ptr %A)
456	ret %struct.__neon_int16x8x3_t  %tmp2
457}
458
459define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, ptr %A) nounwind {
460; Make sure we are using the operands defined by the ABI
461; CHECK-SD-LABEL: ld4lane_8h:
462; CHECK-SD:       // %bb.0:
463; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
464; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
465; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
466; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
467; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[1], [x0]
468; CHECK-SD-NEXT:    ret
469;
470; CHECK-GI-LABEL: ld4lane_8h:
471; CHECK-GI:       // %bb.0:
472; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
473; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
474; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
475; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
476; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[1], [x0]
477; CHECK-GI-NEXT:    ret
478	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, ptr %A)
479	ret %struct.__neon_int16x8x4_t  %tmp2
480}
481
482declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr) nounwind readonly
483declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readonly
484declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readonly
485
486define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, ptr %A) nounwind {
487; Make sure we are using the operands defined by the ABI
488; CHECK-SD-LABEL: ld2lane_4s:
489; CHECK-SD:       // %bb.0:
490; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
491; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
492; CHECK-SD-NEXT:    ld2.s { v0, v1 }[1], [x0]
493; CHECK-SD-NEXT:    ret
494;
495; CHECK-GI-LABEL: ld2lane_4s:
496; CHECK-GI:       // %bb.0:
497; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
498; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
499; CHECK-GI-NEXT:    ld2.s { v0, v1 }[1], [x0]
500; CHECK-GI-NEXT:    ret
501	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, i64 1, ptr %A)
502	ret %struct.__neon_int32x4x2_t  %tmp2
503}
504
505define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, ptr %A) nounwind {
506; Make sure we are using the operands defined by the ABI
507; CHECK-SD-LABEL: ld3lane_4s:
508; CHECK-SD:       // %bb.0:
509; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
510; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
511; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
512; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[1], [x0]
513; CHECK-SD-NEXT:    ret
514;
515; CHECK-GI-LABEL: ld3lane_4s:
516; CHECK-GI:       // %bb.0:
517; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
518; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
519; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
520; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[1], [x0]
521; CHECK-GI-NEXT:    ret
522	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, ptr %A)
523	ret %struct.__neon_int32x4x3_t  %tmp2
524}
525
526define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, ptr %A) nounwind {
527; Make sure we are using the operands defined by the ABI
528; CHECK-SD-LABEL: ld4lane_4s:
529; CHECK-SD:       // %bb.0:
530; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
531; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
532; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
533; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
534; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[1], [x0]
535; CHECK-SD-NEXT:    ret
536;
537; CHECK-GI-LABEL: ld4lane_4s:
538; CHECK-GI:       // %bb.0:
539; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
540; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
541; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
542; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
543; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[1], [x0]
544; CHECK-GI-NEXT:    ret
545	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, ptr %A)
546	ret %struct.__neon_int32x4x4_t  %tmp2
547}
548
549declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) nounwind readonly
550declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readonly
551declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readonly
552
553define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, ptr %A) nounwind {
554; Make sure we are using the operands defined by the ABI
555; CHECK-SD-LABEL: ld2lane_2d:
556; CHECK-SD:       // %bb.0:
557; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
558; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
559; CHECK-SD-NEXT:    ld2.d { v0, v1 }[1], [x0]
560; CHECK-SD-NEXT:    ret
561;
562; CHECK-GI-LABEL: ld2lane_2d:
563; CHECK-GI:       // %bb.0:
564; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
565; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
566; CHECK-GI-NEXT:    ld2.d { v0, v1 }[1], [x0]
567; CHECK-GI-NEXT:    ret
568	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, i64 1, ptr %A)
569	ret %struct.__neon_int64x2x2_t  %tmp2
570}
571
572define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, ptr %A) nounwind {
573; Make sure we are using the operands defined by the ABI
574; CHECK-SD-LABEL: ld3lane_2d:
575; CHECK-SD:       // %bb.0:
576; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
577; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
578; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
579; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[1], [x0]
580; CHECK-SD-NEXT:    ret
581;
582; CHECK-GI-LABEL: ld3lane_2d:
583; CHECK-GI:       // %bb.0:
584; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
585; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
586; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
587; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[1], [x0]
588; CHECK-GI-NEXT:    ret
589	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, ptr %A)
590	ret %struct.__neon_int64x2x3_t  %tmp2
591}
592
593define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, ptr %A) nounwind {
594; Make sure we are using the operands defined by the ABI
595; CHECK-SD-LABEL: ld4lane_2d:
596; CHECK-SD:       // %bb.0:
597; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
598; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
599; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
600; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
601; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[1], [x0]
602; CHECK-SD-NEXT:    ret
603;
604; CHECK-GI-LABEL: ld4lane_2d:
605; CHECK-GI:       // %bb.0:
606; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
607; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
608; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
609; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
610; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[1], [x0]
611; CHECK-GI-NEXT:    ret
612	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, ptr %A)
613	ret %struct.__neon_int64x2x4_t  %tmp2
614}
615
616declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr) nounwind readonly
617declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readonly
618declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readonly
619
620define <8 x i8> @ld1r_8b(ptr %bar) {
621; CHECK-LABEL: ld1r_8b:
622; CHECK:       // %bb.0:
623; CHECK-NEXT:    ld1r.8b { v0 }, [x0]
624; CHECK-NEXT:    ret
625; Make sure we are using the operands defined by the ABI
626  %tmp1 = load i8, ptr %bar
627  %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
628  %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
629  %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
630  %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
631  %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
632  %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
633  %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
634  %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
635  ret <8 x i8> %tmp9
636}
637
638define <16 x i8> @ld1r_16b(ptr %bar) {
639; CHECK-LABEL: ld1r_16b:
640; CHECK:       // %bb.0:
641; CHECK-NEXT:    ld1r.16b { v0 }, [x0]
642; CHECK-NEXT:    ret
643; Make sure we are using the operands defined by the ABI
644  %tmp1 = load i8, ptr %bar
645  %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
646  %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
647  %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
648  %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
649  %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
650  %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
651  %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
652  %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
653  %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
654  %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
655  %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
656  %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
657  %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
658  %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
659  %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
660  %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
661  ret <16 x i8> %tmp17
662}
663
664define <4 x i16> @ld1r_4h(ptr %bar) {
665; CHECK-LABEL: ld1r_4h:
666; CHECK:       // %bb.0:
667; CHECK-NEXT:    ld1r.4h { v0 }, [x0]
668; CHECK-NEXT:    ret
669; Make sure we are using the operands defined by the ABI
670  %tmp1 = load i16, ptr %bar
671  %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
672  %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
673  %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
674  %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
675  ret <4 x i16> %tmp5
676}
677
678define <8 x i16> @ld1r_8h(ptr %bar) {
679; CHECK-LABEL: ld1r_8h:
680; CHECK:       // %bb.0:
681; CHECK-NEXT:    ld1r.8h { v0 }, [x0]
682; CHECK-NEXT:    ret
683; Make sure we are using the operands defined by the ABI
684  %tmp1 = load i16, ptr %bar
685  %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
686  %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
687  %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
688  %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
689  %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
690  %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
691  %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
692  %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
693  ret <8 x i16> %tmp9
694}
695
696define <2 x i32> @ld1r_2s(ptr %bar) {
697; CHECK-LABEL: ld1r_2s:
698; CHECK:       // %bb.0:
699; CHECK-NEXT:    ld1r.2s { v0 }, [x0]
700; CHECK-NEXT:    ret
701; Make sure we are using the operands defined by the ABI
702  %tmp1 = load i32, ptr %bar
703  %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
704  %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
705  ret <2 x i32> %tmp3
706}
707
708define <4 x i32> @ld1r_4s(ptr %bar) {
709; CHECK-LABEL: ld1r_4s:
710; CHECK:       // %bb.0:
711; CHECK-NEXT:    ld1r.4s { v0 }, [x0]
712; CHECK-NEXT:    ret
713; Make sure we are using the operands defined by the ABI
714  %tmp1 = load i32, ptr %bar
715  %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
716  %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
717  %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
718  %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
719  ret <4 x i32> %tmp5
720}
721
722define <2 x i64> @ld1r_2d(ptr %bar) {
723; CHECK-LABEL: ld1r_2d:
724; CHECK:       // %bb.0:
725; CHECK-NEXT:    ld1r.2d { v0 }, [x0]
726; CHECK-NEXT:    ret
727; Make sure we are using the operands defined by the ABI
728  %tmp1 = load i64, ptr %bar
729  %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
730  %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
731  ret <2 x i64> %tmp3
732}
733
734define %struct.__neon_int8x8x2_t @ld2r_8b(ptr %A) nounwind {
735; CHECK-LABEL: ld2r_8b:
736; CHECK:       // %bb.0:
737; CHECK-NEXT:    ld2r.8b { v0, v1 }, [x0]
738; CHECK-NEXT:    ret
739; Make sure we are using the operands defined by the ABI
740	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0(ptr %A)
741	ret %struct.__neon_int8x8x2_t  %tmp2
742}
743
744define %struct.__neon_int8x8x3_t @ld3r_8b(ptr %A) nounwind {
745; CHECK-LABEL: ld3r_8b:
746; CHECK:       // %bb.0:
747; CHECK-NEXT:    ld3r.8b { v0, v1, v2 }, [x0]
748; CHECK-NEXT:    ret
749; Make sure we are using the operands defined by the ABI
750	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0(ptr %A)
751	ret %struct.__neon_int8x8x3_t  %tmp2
752}
753
754define %struct.__neon_int8x8x4_t @ld4r_8b(ptr %A) nounwind {
755; CHECK-LABEL: ld4r_8b:
756; CHECK:       // %bb.0:
757; CHECK-NEXT:    ld4r.8b { v0, v1, v2, v3 }, [x0]
758; CHECK-NEXT:    ret
759; Make sure we are using the operands defined by the ABI
760	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0(ptr %A)
761	ret %struct.__neon_int8x8x4_t  %tmp2
762}
763
764declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0(ptr) nounwind readonly
765declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0(ptr) nounwind readonly
766declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0(ptr) nounwind readonly
767
768define %struct.__neon_int8x16x2_t @ld2r_16b(ptr %A) nounwind {
769; CHECK-LABEL: ld2r_16b:
770; CHECK:       // %bb.0:
771; CHECK-NEXT:    ld2r.16b { v0, v1 }, [x0]
772; CHECK-NEXT:    ret
773; Make sure we are using the operands defined by the ABI
774	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0(ptr %A)
775	ret %struct.__neon_int8x16x2_t  %tmp2
776}
777
778define %struct.__neon_int8x16x3_t @ld3r_16b(ptr %A) nounwind {
779; CHECK-LABEL: ld3r_16b:
780; CHECK:       // %bb.0:
781; CHECK-NEXT:    ld3r.16b { v0, v1, v2 }, [x0]
782; CHECK-NEXT:    ret
783; Make sure we are using the operands defined by the ABI
784	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0(ptr %A)
785	ret %struct.__neon_int8x16x3_t  %tmp2
786}
787
788define %struct.__neon_int8x16x4_t @ld4r_16b(ptr %A) nounwind {
789; CHECK-LABEL: ld4r_16b:
790; CHECK:       // %bb.0:
791; CHECK-NEXT:    ld4r.16b { v0, v1, v2, v3 }, [x0]
792; CHECK-NEXT:    ret
793; Make sure we are using the operands defined by the ABI
794	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0(ptr %A)
795	ret %struct.__neon_int8x16x4_t  %tmp2
796}
797
798declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0(ptr) nounwind readonly
799declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0(ptr) nounwind readonly
800declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0(ptr) nounwind readonly
801
802define %struct.__neon_int16x4x2_t @ld2r_4h(ptr %A) nounwind {
803; CHECK-LABEL: ld2r_4h:
804; CHECK:       // %bb.0:
805; CHECK-NEXT:    ld2r.4h { v0, v1 }, [x0]
806; CHECK-NEXT:    ret
807; Make sure we are using the operands defined by the ABI
808	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0(ptr %A)
809	ret %struct.__neon_int16x4x2_t  %tmp2
810}
811
812define %struct.__neon_int16x4x3_t @ld3r_4h(ptr %A) nounwind {
813; CHECK-LABEL: ld3r_4h:
814; CHECK:       // %bb.0:
815; CHECK-NEXT:    ld3r.4h { v0, v1, v2 }, [x0]
816; CHECK-NEXT:    ret
817; Make sure we are using the operands defined by the ABI
818	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0(ptr %A)
819	ret %struct.__neon_int16x4x3_t  %tmp2
820}
821
822define %struct.__neon_int16x4x4_t @ld4r_4h(ptr %A) nounwind {
823; CHECK-LABEL: ld4r_4h:
824; CHECK:       // %bb.0:
825; CHECK-NEXT:    ld4r.4h { v0, v1, v2, v3 }, [x0]
826; CHECK-NEXT:    ret
827; Make sure we are using the operands defined by the ABI
828	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0(ptr %A)
829	ret %struct.__neon_int16x4x4_t  %tmp2
830}
831
832declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0(ptr) nounwind readonly
833declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0(ptr) nounwind readonly
834declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0(ptr) nounwind readonly
835
836define %struct.__neon_int16x8x2_t @ld2r_8h(ptr %A) nounwind {
837; CHECK-LABEL: ld2r_8h:
838; CHECK:       // %bb.0:
839; CHECK-NEXT:    ld2r.8h { v0, v1 }, [x0]
840; CHECK-NEXT:    ret
841; Make sure we are using the operands defined by the ABI
842  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0(ptr %A)
843  ret %struct.__neon_int16x8x2_t  %tmp2
844}
845
846define %struct.__neon_int16x8x3_t @ld3r_8h(ptr %A) nounwind {
847; CHECK-LABEL: ld3r_8h:
848; CHECK:       // %bb.0:
849; CHECK-NEXT:    ld3r.8h { v0, v1, v2 }, [x0]
850; CHECK-NEXT:    ret
851; Make sure we are using the operands defined by the ABI
852  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0(ptr %A)
853  ret %struct.__neon_int16x8x3_t  %tmp2
854}
855
856define %struct.__neon_int16x8x4_t @ld4r_8h(ptr %A) nounwind {
857; CHECK-LABEL: ld4r_8h:
858; CHECK:       // %bb.0:
859; CHECK-NEXT:    ld4r.8h { v0, v1, v2, v3 }, [x0]
860; CHECK-NEXT:    ret
861; Make sure we are using the operands defined by the ABI
862  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0(ptr %A)
863  ret %struct.__neon_int16x8x4_t  %tmp2
864}
865
866declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0(ptr) nounwind readonly
867declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0(ptr) nounwind readonly
868declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0(ptr) nounwind readonly
869
870define %struct.__neon_int32x2x2_t @ld2r_2s(ptr %A) nounwind {
871; CHECK-LABEL: ld2r_2s:
872; CHECK:       // %bb.0:
873; CHECK-NEXT:    ld2r.2s { v0, v1 }, [x0]
874; CHECK-NEXT:    ret
875; Make sure we are using the operands defined by the ABI
876	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0(ptr %A)
877	ret %struct.__neon_int32x2x2_t  %tmp2
878}
879
880define %struct.__neon_int32x2x3_t @ld3r_2s(ptr %A) nounwind {
881; CHECK-LABEL: ld3r_2s:
882; CHECK:       // %bb.0:
883; CHECK-NEXT:    ld3r.2s { v0, v1, v2 }, [x0]
884; CHECK-NEXT:    ret
885; Make sure we are using the operands defined by the ABI
886	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0(ptr %A)
887	ret %struct.__neon_int32x2x3_t  %tmp2
888}
889
890define %struct.__neon_int32x2x4_t @ld4r_2s(ptr %A) nounwind {
891; CHECK-LABEL: ld4r_2s:
892; CHECK:       // %bb.0:
893; CHECK-NEXT:    ld4r.2s { v0, v1, v2, v3 }, [x0]
894; CHECK-NEXT:    ret
895; Make sure we are using the operands defined by the ABI
896	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0(ptr %A)
897	ret %struct.__neon_int32x2x4_t  %tmp2
898}
899
900declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0(ptr) nounwind readonly
901declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0(ptr) nounwind readonly
902declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0(ptr) nounwind readonly
903
904define %struct.__neon_int32x4x2_t @ld2r_4s(ptr %A) nounwind {
905; CHECK-LABEL: ld2r_4s:
906; CHECK:       // %bb.0:
907; CHECK-NEXT:    ld2r.4s { v0, v1 }, [x0]
908; CHECK-NEXT:    ret
909; Make sure we are using the operands defined by the ABI
910	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0(ptr %A)
911	ret %struct.__neon_int32x4x2_t  %tmp2
912}
913
914define %struct.__neon_int32x4x3_t @ld3r_4s(ptr %A) nounwind {
915; CHECK-LABEL: ld3r_4s:
916; CHECK:       // %bb.0:
917; CHECK-NEXT:    ld3r.4s { v0, v1, v2 }, [x0]
918; CHECK-NEXT:    ret
919; Make sure we are using the operands defined by the ABI
920	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0(ptr %A)
921	ret %struct.__neon_int32x4x3_t  %tmp2
922}
923
924define %struct.__neon_int32x4x4_t @ld4r_4s(ptr %A) nounwind {
925; CHECK-LABEL: ld4r_4s:
926; CHECK:       // %bb.0:
927; CHECK-NEXT:    ld4r.4s { v0, v1, v2, v3 }, [x0]
928; CHECK-NEXT:    ret
929; Make sure we are using the operands defined by the ABI
930	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0(ptr %A)
931	ret %struct.__neon_int32x4x4_t  %tmp2
932}
933
934declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0(ptr) nounwind readonly
935declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0(ptr) nounwind readonly
936declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0(ptr) nounwind readonly
937
938define %struct.__neon_int64x1x2_t @ld2r_1d(ptr %A) nounwind {
939; CHECK-LABEL: ld2r_1d:
940; CHECK:       // %bb.0:
941; CHECK-NEXT:    ld2r.1d { v0, v1 }, [x0]
942; CHECK-NEXT:    ret
943; Make sure we are using the operands defined by the ABI
944	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0(ptr %A)
945	ret %struct.__neon_int64x1x2_t  %tmp2
946}
947
948define %struct.__neon_int64x1x3_t @ld3r_1d(ptr %A) nounwind {
949; CHECK-LABEL: ld3r_1d:
950; CHECK:       // %bb.0:
951; CHECK-NEXT:    ld3r.1d { v0, v1, v2 }, [x0]
952; CHECK-NEXT:    ret
953; Make sure we are using the operands defined by the ABI
954	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0(ptr %A)
955	ret %struct.__neon_int64x1x3_t  %tmp2
956}
957
958define %struct.__neon_int64x1x4_t @ld4r_1d(ptr %A) nounwind {
959; CHECK-LABEL: ld4r_1d:
960; CHECK:       // %bb.0:
961; CHECK-NEXT:    ld4r.1d { v0, v1, v2, v3 }, [x0]
962; CHECK-NEXT:    ret
963; Make sure we are using the operands defined by the ABI
964	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0(ptr %A)
965	ret %struct.__neon_int64x1x4_t  %tmp2
966}
967
968declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0(ptr) nounwind readonly
969declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0(ptr) nounwind readonly
970declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0(ptr) nounwind readonly
971
972define %struct.__neon_int64x2x2_t @ld2r_2d(ptr %A) nounwind {
973; CHECK-LABEL: ld2r_2d:
974; CHECK:       // %bb.0:
975; CHECK-NEXT:    ld2r.2d { v0, v1 }, [x0]
976; CHECK-NEXT:    ret
977; Make sure we are using the operands defined by the ABI
978	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %A)
979	ret %struct.__neon_int64x2x2_t  %tmp2
980}
981
982define %struct.__neon_int64x2x3_t @ld3r_2d(ptr %A) nounwind {
983; CHECK-LABEL: ld3r_2d:
984; CHECK:       // %bb.0:
985; CHECK-NEXT:    ld3r.2d { v0, v1, v2 }, [x0]
986; CHECK-NEXT:    ret
987; Make sure we are using the operands defined by the ABI
988	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %A)
989	ret %struct.__neon_int64x2x3_t  %tmp2
990}
991
992define %struct.__neon_int64x2x4_t @ld4r_2d(ptr %A) nounwind {
993; CHECK-LABEL: ld4r_2d:
994; CHECK:       // %bb.0:
995; CHECK-NEXT:    ld4r.2d { v0, v1, v2, v3 }, [x0]
996; CHECK-NEXT:    ret
997; Make sure we are using the operands defined by the ABI
998	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %A)
999	ret %struct.__neon_int64x2x4_t  %tmp2
1000}
1001
1002declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0(ptr) nounwind readonly
1003declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr) nounwind readonly
1004declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly
1005
1006define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) {
1007; CHECK-SD-LABEL: ld1_16b:
1008; CHECK-SD:       // %bb.0:
1009; CHECK-SD-NEXT:    ld1.b { v0 }[0], [x0]
1010; CHECK-SD-NEXT:    ret
1011;
1012; CHECK-GI-LABEL: ld1_16b:
1013; CHECK-GI:       // %bb.0:
1014; CHECK-GI-NEXT:    ldr b1, [x0]
1015; CHECK-GI-NEXT:    mov.b v0[0], v1[0]
1016; CHECK-GI-NEXT:    ret
1017; Make sure we are using the operands defined by the ABI
1018  %tmp1 = load i8, ptr %bar
1019  %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
1020  ret <16 x i8> %tmp2
1021}
1022
1023define <8 x i16> @ld1_8h(<8 x i16> %V, ptr %bar) {
1024; CHECK-LABEL: ld1_8h:
1025; CHECK:       // %bb.0:
1026; CHECK-NEXT:    ld1.h { v0 }[0], [x0]
1027; CHECK-NEXT:    ret
1028; Make sure we are using the operands defined by the ABI
1029  %tmp1 = load i16, ptr %bar
1030  %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
1031  ret <8 x i16> %tmp2
1032}
1033
1034define <4 x i32> @ld1_4s(<4 x i32> %V, ptr %bar) {
1035; CHECK-LABEL: ld1_4s:
1036; CHECK:       // %bb.0:
1037; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
1038; CHECK-NEXT:    ret
1039; Make sure we are using the operands defined by the ABI
1040  %tmp1 = load i32, ptr %bar
1041  %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
1042  ret <4 x i32> %tmp2
1043}
1044
1045define <4 x float> @ld1_4s_float(<4 x float> %V, ptr %bar) {
1046; CHECK-LABEL: ld1_4s_float:
1047; CHECK:       // %bb.0:
1048; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
1049; CHECK-NEXT:    ret
1050; Make sure we are using the operands defined by the ABI
1051  %tmp1 = load float, ptr %bar
1052  %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
1053  ret <4 x float> %tmp2
1054}
1055
1056define <2 x i64> @ld1_2d(<2 x i64> %V, ptr %bar) {
1057; CHECK-LABEL: ld1_2d:
1058; CHECK:       // %bb.0:
1059; CHECK-NEXT:    ld1.d { v0 }[0], [x0]
1060; CHECK-NEXT:    ret
1061; Make sure we are using the operands defined by the ABI
1062  %tmp1 = load i64, ptr %bar
1063  %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
1064  ret <2 x i64> %tmp2
1065}
1066
1067define <2 x double> @ld1_2d_double(<2 x double> %V, ptr %bar) {
1068; CHECK-LABEL: ld1_2d_double:
1069; CHECK:       // %bb.0:
1070; CHECK-NEXT:    ld1.d { v0 }[0], [x0]
1071; CHECK-NEXT:    ret
1072; Make sure we are using the operands defined by the ABI
1073  %tmp1 = load double, ptr %bar
1074  %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
1075  ret <2 x double> %tmp2
1076}
1077
1078define <1 x i64> @ld1_1d(ptr %p) {
1079; CHECK-LABEL: ld1_1d:
1080; CHECK:       // %bb.0:
1081; CHECK-NEXT:    ldr d0, [x0]
1082; CHECK-NEXT:    ret
1083; Make sure we are using the operands defined by the ABI
1084  %tmp = load <1 x i64>, ptr %p, align 8
1085  ret <1 x i64> %tmp
1086}
1087
1088define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) {
1089; CHECK-SD-LABEL: ld1_8b:
1090; CHECK-SD:       // %bb.0:
1091; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
1092; CHECK-SD-NEXT:    ld1.b { v0 }[0], [x0]
1093; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
1094; CHECK-SD-NEXT:    ret
1095;
1096; CHECK-GI-LABEL: ld1_8b:
1097; CHECK-GI:       // %bb.0:
1098; CHECK-GI-NEXT:    ldr b1, [x0]
1099; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
1100; CHECK-GI-NEXT:    mov.b v0[0], v1[0]
1101; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
1102; CHECK-GI-NEXT:    ret
1103; Make sure we are using the operands defined by the ABI
1104  %tmp1 = load i8, ptr %bar
1105  %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
1106  ret <8 x i8> %tmp2
1107}
1108
1109define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) {
1110; CHECK-LABEL: ld1_4h:
1111; CHECK:       // %bb.0:
1112; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1113; CHECK-NEXT:    ld1.h { v0 }[0], [x0]
1114; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1115; CHECK-NEXT:    ret
1116; Make sure we are using the operands defined by the ABI
1117  %tmp1 = load i16, ptr %bar
1118  %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
1119  ret <4 x i16> %tmp2
1120}
1121
1122define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) {
1123; CHECK-LABEL: ld1_2s:
1124; CHECK:       // %bb.0:
1125; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1126; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
1127; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1128; CHECK-NEXT:    ret
1129; Make sure we are using the operands defined by the ABI
1130  %tmp1 = load i32, ptr %bar
1131  %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
1132  ret <2 x i32> %tmp2
1133}
1134
1135define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) {
1136; CHECK-LABEL: ld1_2s_float:
1137; CHECK:       // %bb.0:
1138; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
1139; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
1140; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
1141; CHECK-NEXT:    ret
1142; Make sure we are using the operands defined by the ABI
1143  %tmp1 = load float, ptr %bar
1144  %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0
1145  ret <2 x float> %tmp2
1146}
1147
1148
1149; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s
1150define void @ld1r_2s_from_dup(ptr nocapture %a, ptr nocapture %b, ptr nocapture %diff) nounwind ssp {
1151; CHECK-SD-LABEL: ld1r_2s_from_dup:
1152; CHECK-SD:       // %bb.0: // %entry
1153; CHECK-SD-NEXT:    ldr s0, [x0]
1154; CHECK-SD-NEXT:    ldr s1, [x1]
1155; CHECK-SD-NEXT:    usubl.8h v0, v0, v1
1156; CHECK-SD-NEXT:    str d0, [x2]
1157; CHECK-SD-NEXT:    ret
1158;
1159; CHECK-GI-LABEL: ld1r_2s_from_dup:
1160; CHECK-GI:       // %bb.0: // %entry
1161; CHECK-GI-NEXT:    ld1r.2s { v0 }, [x0]
1162; CHECK-GI-NEXT:    ld1r.2s { v1 }, [x1]
1163; CHECK-GI-NEXT:    usubl.8h v0, v0, v1
1164; CHECK-GI-NEXT:    str d0, [x2]
1165; CHECK-GI-NEXT:    ret
1166entry:
1167  %tmp1 = load i32, ptr %a, align 4
1168  %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
1169  %lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
1170  %tmp3 = bitcast <2 x i32> %lane to <8 x i8>
1171  %tmp5 = load i32, ptr %b, align 4
1172  %tmp6 = insertelement <2 x i32> undef, i32 %tmp5, i32 0
1173  %lane1 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> zeroinitializer
1174  %tmp7 = bitcast <2 x i32> %lane1 to <8 x i8>
1175  %vmovl.i.i = zext <8 x i8> %tmp3 to <8 x i16>
1176  %vmovl.i4.i = zext <8 x i8> %tmp7 to <8 x i16>
1177  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i4.i
1178  %tmp8 = bitcast <8 x i16> %sub.i to <2 x i64>
1179  %shuffle.i = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
1180  %tmp9 = bitcast <1 x i64> %shuffle.i to <4 x i16>
1181  store <4 x i16> %tmp9, ptr %diff, align 8
1182  ret void
1183}
1184
1185; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal
1186define <4 x float> @ld1r_4s_float(ptr nocapture %x) {
1187; CHECK-LABEL: ld1r_4s_float:
1188; CHECK:       // %bb.0: // %entry
1189; CHECK-NEXT:    ld1r.4s { v0 }, [x0]
1190; CHECK-NEXT:    ret
1191entry:
1192; Make sure we are using the operands defined by the ABI
1193  %tmp = load float, ptr %x, align 4
1194  %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
1195  %tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1
1196  %tmp3 = insertelement <4 x float> %tmp2, float %tmp, i32 2
1197  %tmp4 = insertelement <4 x float> %tmp3, float %tmp, i32 3
1198  ret <4 x float> %tmp4
1199}
1200
1201define <2 x float> @ld1r_2s_float(ptr nocapture %x) {
1202; CHECK-LABEL: ld1r_2s_float:
1203; CHECK:       // %bb.0: // %entry
1204; CHECK-NEXT:    ld1r.2s { v0 }, [x0]
1205; CHECK-NEXT:    ret
1206entry:
1207; Make sure we are using the operands defined by the ABI
1208  %tmp = load float, ptr %x, align 4
1209  %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
1210  %tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1
1211  ret <2 x float> %tmp2
1212}
1213
1214define <2 x double> @ld1r_2d_double(ptr nocapture %x) {
1215; CHECK-LABEL: ld1r_2d_double:
1216; CHECK:       // %bb.0: // %entry
1217; CHECK-NEXT:    ld1r.2d { v0 }, [x0]
1218; CHECK-NEXT:    ret
1219entry:
1220; Make sure we are using the operands defined by the ABI
1221  %tmp = load double, ptr %x, align 4
1222  %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
1223  %tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1
1224  ret <2 x double> %tmp2
1225}
1226
1227define <1 x double> @ld1r_1d_double(ptr nocapture %x) {
1228; CHECK-LABEL: ld1r_1d_double:
1229; CHECK:       // %bb.0: // %entry
1230; CHECK-NEXT:    ldr d0, [x0]
1231; CHECK-NEXT:    ret
1232entry:
1233; Make sure we are using the operands defined by the ABI
1234  %tmp = load double, ptr %x, align 4
1235  %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
1236  ret <1 x double> %tmp1
1237}
1238
1239define <4 x float> @ld1r_4s_float_shuff(ptr nocapture %x) {
1240; CHECK-LABEL: ld1r_4s_float_shuff:
1241; CHECK:       // %bb.0: // %entry
1242; CHECK-NEXT:    ld1r.4s { v0 }, [x0]
1243; CHECK-NEXT:    ret
1244entry:
1245; Make sure we are using the operands defined by the ABI
1246  %tmp = load float, ptr %x, align 4
1247  %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
1248  %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
1249  ret <4 x float> %lane
1250}
1251
1252define <2 x float> @ld1r_2s_float_shuff(ptr nocapture %x) {
1253; CHECK-LABEL: ld1r_2s_float_shuff:
1254; CHECK:       // %bb.0: // %entry
1255; CHECK-NEXT:    ld1r.2s { v0 }, [x0]
1256; CHECK-NEXT:    ret
1257entry:
1258; Make sure we are using the operands defined by the ABI
1259  %tmp = load float, ptr %x, align 4
1260  %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
1261  %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
1262  ret <2 x float> %lane
1263}
1264
1265define <2 x double> @ld1r_2d_double_shuff(ptr nocapture %x) {
1266; CHECK-LABEL: ld1r_2d_double_shuff:
1267; CHECK:       // %bb.0: // %entry
1268; CHECK-NEXT:    ld1r.2d { v0 }, [x0]
1269; CHECK-NEXT:    ret
1270entry:
1271; Make sure we are using the operands defined by the ABI
1272  %tmp = load double, ptr %x, align 4
1273  %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
1274  %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer
1275  ret <2 x double> %lane
1276}
1277
1278define <1 x double> @ld1r_1d_double_shuff(ptr nocapture %x) {
1279; CHECK-LABEL: ld1r_1d_double_shuff:
1280; CHECK:       // %bb.0: // %entry
1281; CHECK-NEXT:    ldr d0, [x0]
1282; CHECK-NEXT:    ret
1283entry:
1284; Make sure we are using the operands defined by the ABI
1285  %tmp = load double, ptr %x, align 4
1286  %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
1287  %lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x i32> zeroinitializer
1288  ret <1 x double> %lane
1289}
1290
1291%struct.__neon_float32x2x2_t = type { <2 x float>,  <2 x float> }
1292%struct.__neon_float32x2x3_t = type { <2 x float>,  <2 x float>,  <2 x float> }
1293%struct.__neon_float32x2x4_t = type { <2 x float>,  <2 x float>, <2 x float>,  <2 x float> }
1294
1295declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr) nounwind readonly
1296declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr) nounwind readonly
1297declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr) nounwind readonly
1298declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr) nounwind readonly
1299declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr) nounwind readonly
1300declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr) nounwind readonly
1301
1302define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(ptr %addr) {
1303; CHECK-LABEL: ld1_x2_v8i8:
1304; CHECK:       // %bb.0:
1305; CHECK-NEXT:    ld1.8b { v0, v1 }, [x0]
1306; CHECK-NEXT:    ret
1307  %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr %addr)
1308  ret %struct.__neon_int8x8x2_t %val
1309}
1310
1311define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(ptr %addr) {
1312; CHECK-LABEL: ld1_x2_v4i16:
1313; CHECK:       // %bb.0:
1314; CHECK-NEXT:    ld1.4h { v0, v1 }, [x0]
1315; CHECK-NEXT:    ret
1316  %val = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr %addr)
1317  ret %struct.__neon_int16x4x2_t %val
1318}
1319
1320define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(ptr %addr) {
1321; CHECK-LABEL: ld1_x2_v2i32:
1322; CHECK:       // %bb.0:
1323; CHECK-NEXT:    ld1.2s { v0, v1 }, [x0]
1324; CHECK-NEXT:    ret
1325  %val = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr %addr)
1326  ret %struct.__neon_int32x2x2_t %val
1327}
1328
1329define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(ptr %addr) {
1330; CHECK-LABEL: ld1_x2_v2f32:
1331; CHECK:       // %bb.0:
1332; CHECK-NEXT:    ld1.2s { v0, v1 }, [x0]
1333; CHECK-NEXT:    ret
1334  %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr %addr)
1335  ret %struct.__neon_float32x2x2_t %val
1336}
1337
1338define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(ptr %addr) {
1339; CHECK-LABEL: ld1_x2_v1i64:
1340; CHECK:       // %bb.0:
1341; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0]
1342; CHECK-NEXT:    ret
1343  %val = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %addr)
1344  ret %struct.__neon_int64x1x2_t %val
1345}
1346
1347define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(ptr %addr) {
1348; CHECK-LABEL: ld1_x2_v1f64:
1349; CHECK:       // %bb.0:
1350; CHECK-NEXT:    ld1.1d { v0, v1 }, [x0]
1351; CHECK-NEXT:    ret
1352  %val = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %addr)
1353  ret %struct.__neon_float64x1x2_t %val
1354}
1355
1356
1357%struct.__neon_float32x4x2_t = type { <4 x float>,  <4 x float> }
1358%struct.__neon_float32x4x3_t = type { <4 x float>,  <4 x float>,  <4 x float> }
1359%struct.__neon_float32x4x4_t = type { <4 x float>,  <4 x float>, <4 x float>,  <4 x float> }
1360
1361%struct.__neon_float64x2x2_t = type { <2 x double>,  <2 x double> }
1362%struct.__neon_float64x2x3_t = type { <2 x double>,  <2 x double>,  <2 x double> }
1363%struct.__neon_float64x2x4_t = type { <2 x double>,  <2 x double>, <2 x double>,  <2 x double> }
1364
1365declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr) nounwind readonly
1366declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr) nounwind readonly
1367declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr) nounwind readonly
1368declare %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr) nounwind readonly
1369declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr) nounwind readonly
1370declare %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr) nounwind readonly
1371
1372define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(ptr %addr) {
1373; CHECK-LABEL: ld1_x2_v16i8:
1374; CHECK:       // %bb.0:
1375; CHECK-NEXT:    ld1.16b { v0, v1 }, [x0]
1376; CHECK-NEXT:    ret
1377  %val = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr %addr)
1378  ret %struct.__neon_int8x16x2_t %val
1379}
1380
1381define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(ptr %addr) {
1382; CHECK-LABEL: ld1_x2_v8i16:
1383; CHECK:       // %bb.0:
1384; CHECK-NEXT:    ld1.8h { v0, v1 }, [x0]
1385; CHECK-NEXT:    ret
1386  %val = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr %addr)
1387  ret %struct.__neon_int16x8x2_t %val
1388}
1389
1390define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(ptr %addr) {
1391; CHECK-LABEL: ld1_x2_v4i32:
1392; CHECK:       // %bb.0:
1393; CHECK-NEXT:    ld1.4s { v0, v1 }, [x0]
1394; CHECK-NEXT:    ret
1395  %val = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr %addr)
1396  ret %struct.__neon_int32x4x2_t %val
1397}
1398
1399define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(ptr %addr) {
1400; CHECK-LABEL: ld1_x2_v4f32:
1401; CHECK:       // %bb.0:
1402; CHECK-NEXT:    ld1.4s { v0, v1 }, [x0]
1403; CHECK-NEXT:    ret
1404  %val = call %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr %addr)
1405  ret %struct.__neon_float32x4x2_t %val
1406}
1407
1408define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(ptr %addr) {
1409; CHECK-LABEL: ld1_x2_v2i64:
1410; CHECK:       // %bb.0:
1411; CHECK-NEXT:    ld1.2d { v0, v1 }, [x0]
1412; CHECK-NEXT:    ret
1413  %val = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %addr)
1414  ret %struct.__neon_int64x2x2_t %val
1415}
1416
1417define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(ptr %addr) {
1418; CHECK-LABEL: ld1_x2_v2f64:
1419; CHECK:       // %bb.0:
1420; CHECK-NEXT:    ld1.2d { v0, v1 }, [x0]
1421; CHECK-NEXT:    ret
1422  %val = call %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %addr)
1423  ret %struct.__neon_float64x2x2_t %val
1424}
1425
1426declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr) nounwind readonly
1427declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr) nounwind readonly
1428declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr) nounwind readonly
1429declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr) nounwind readonly
1430declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr) nounwind readonly
1431declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr) nounwind readonly
1432
1433define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(ptr %addr) {
1434; CHECK-LABEL: ld1_x3_v8i8:
1435; CHECK:       // %bb.0:
1436; CHECK-NEXT:    ld1.8b { v0, v1, v2 }, [x0]
1437; CHECK-NEXT:    ret
1438  %val = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr %addr)
1439  ret %struct.__neon_int8x8x3_t %val
1440}
1441
1442define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(ptr %addr) {
1443; CHECK-LABEL: ld1_x3_v4i16:
1444; CHECK:       // %bb.0:
1445; CHECK-NEXT:    ld1.4h { v0, v1, v2 }, [x0]
1446; CHECK-NEXT:    ret
1447  %val = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr %addr)
1448  ret %struct.__neon_int16x4x3_t %val
1449}
1450
1451define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(ptr %addr) {
1452; CHECK-LABEL: ld1_x3_v2i32:
1453; CHECK:       // %bb.0:
1454; CHECK-NEXT:    ld1.2s { v0, v1, v2 }, [x0]
1455; CHECK-NEXT:    ret
1456  %val = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr %addr)
1457  ret %struct.__neon_int32x2x3_t %val
1458}
1459
1460define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(ptr %addr) {
1461; CHECK-LABEL: ld1_x3_v2f32:
1462; CHECK:       // %bb.0:
1463; CHECK-NEXT:    ld1.2s { v0, v1, v2 }, [x0]
1464; CHECK-NEXT:    ret
1465  %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr %addr)
1466  ret %struct.__neon_float32x2x3_t %val
1467}
1468
1469define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(ptr %addr) {
1470; CHECK-LABEL: ld1_x3_v1i64:
1471; CHECK:       // %bb.0:
1472; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
1473; CHECK-NEXT:    ret
1474  %val = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %addr)
1475  ret %struct.__neon_int64x1x3_t %val
1476}
1477
1478define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(ptr %addr) {
1479; CHECK-LABEL: ld1_x3_v1f64:
1480; CHECK:       // %bb.0:
1481; CHECK-NEXT:    ld1.1d { v0, v1, v2 }, [x0]
1482; CHECK-NEXT:    ret
1483  %val = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %addr)
1484  ret %struct.__neon_float64x1x3_t %val
1485}
1486
1487declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr) nounwind readonly
1488declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr) nounwind readonly
1489declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr) nounwind readonly
1490declare %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr) nounwind readonly
1491declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr) nounwind readonly
1492declare %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr) nounwind readonly
1493
1494define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(ptr %addr) {
1495; CHECK-LABEL: ld1_x3_v16i8:
1496; CHECK:       // %bb.0:
1497; CHECK-NEXT:    ld1.16b { v0, v1, v2 }, [x0]
1498; CHECK-NEXT:    ret
1499  %val = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr %addr)
1500  ret %struct.__neon_int8x16x3_t %val
1501}
1502
1503define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(ptr %addr) {
1504; CHECK-LABEL: ld1_x3_v8i16:
1505; CHECK:       // %bb.0:
1506; CHECK-NEXT:    ld1.8h { v0, v1, v2 }, [x0]
1507; CHECK-NEXT:    ret
1508  %val = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr %addr)
1509  ret %struct.__neon_int16x8x3_t %val
1510}
1511
1512define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(ptr %addr) {
1513; CHECK-LABEL: ld1_x3_v4i32:
1514; CHECK:       // %bb.0:
1515; CHECK-NEXT:    ld1.4s { v0, v1, v2 }, [x0]
1516; CHECK-NEXT:    ret
1517  %val = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr %addr)
1518  ret %struct.__neon_int32x4x3_t %val
1519}
1520
1521define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(ptr %addr) {
1522; CHECK-LABEL: ld1_x3_v4f32:
1523; CHECK:       // %bb.0:
1524; CHECK-NEXT:    ld1.4s { v0, v1, v2 }, [x0]
1525; CHECK-NEXT:    ret
1526  %val = call %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr %addr)
1527  ret %struct.__neon_float32x4x3_t %val
1528}
1529
1530define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(ptr %addr) {
1531; CHECK-LABEL: ld1_x3_v2i64:
1532; CHECK:       // %bb.0:
1533; CHECK-NEXT:    ld1.2d { v0, v1, v2 }, [x0]
1534; CHECK-NEXT:    ret
1535  %val = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %addr)
1536  ret %struct.__neon_int64x2x3_t %val
1537}
1538
1539define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(ptr %addr) {
1540; CHECK-LABEL: ld1_x3_v2f64:
1541; CHECK:       // %bb.0:
1542; CHECK-NEXT:    ld1.2d { v0, v1, v2 }, [x0]
1543; CHECK-NEXT:    ret
1544  %val = call %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %addr)
1545  ret %struct.__neon_float64x2x3_t %val
1546}
1547
1548declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr) nounwind readonly
1549declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr) nounwind readonly
1550declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr) nounwind readonly
1551declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr) nounwind readonly
1552declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr) nounwind readonly
1553declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr) nounwind readonly
1554
1555define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(ptr %addr) {
1556; CHECK-LABEL: ld1_x4_v8i8:
1557; CHECK:       // %bb.0:
1558; CHECK-NEXT:    ld1.8b { v0, v1, v2, v3 }, [x0]
1559; CHECK-NEXT:    ret
1560  %val = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr %addr)
1561  ret %struct.__neon_int8x8x4_t %val
1562}
1563
1564define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(ptr %addr) {
1565; CHECK-LABEL: ld1_x4_v4i16:
1566; CHECK:       // %bb.0:
1567; CHECK-NEXT:    ld1.4h { v0, v1, v2, v3 }, [x0]
1568; CHECK-NEXT:    ret
1569  %val = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr %addr)
1570  ret %struct.__neon_int16x4x4_t %val
1571}
1572
1573define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(ptr %addr) {
1574; CHECK-LABEL: ld1_x4_v2i32:
1575; CHECK:       // %bb.0:
1576; CHECK-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0]
1577; CHECK-NEXT:    ret
1578  %val = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr %addr)
1579  ret %struct.__neon_int32x2x4_t %val
1580}
1581
1582define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(ptr %addr) {
1583; CHECK-LABEL: ld1_x4_v2f32:
1584; CHECK:       // %bb.0:
1585; CHECK-NEXT:    ld1.2s { v0, v1, v2, v3 }, [x0]
1586; CHECK-NEXT:    ret
1587  %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr %addr)
1588  ret %struct.__neon_float32x2x4_t %val
1589}
1590
1591define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(ptr %addr) {
1592; CHECK-LABEL: ld1_x4_v1i64:
1593; CHECK:       // %bb.0:
1594; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
1595; CHECK-NEXT:    ret
1596  %val = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %addr)
1597  ret %struct.__neon_int64x1x4_t %val
1598}
1599
1600define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(ptr %addr) {
1601; CHECK-LABEL: ld1_x4_v1f64:
1602; CHECK:       // %bb.0:
1603; CHECK-NEXT:    ld1.1d { v0, v1, v2, v3 }, [x0]
1604; CHECK-NEXT:    ret
1605  %val = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %addr)
1606  ret %struct.__neon_float64x1x4_t %val
1607}
1608
1609declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr) nounwind readonly
1610declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr) nounwind readonly
1611declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr) nounwind readonly
1612declare %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr) nounwind readonly
1613declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr) nounwind readonly
1614declare %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr) nounwind readonly
1615
1616define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(ptr %addr) {
1617; CHECK-LABEL: ld1_x4_v16i8:
1618; CHECK:       // %bb.0:
1619; CHECK-NEXT:    ld1.16b { v0, v1, v2, v3 }, [x0]
1620; CHECK-NEXT:    ret
1621  %val = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr %addr)
1622  ret %struct.__neon_int8x16x4_t %val
1623}
1624
1625define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(ptr %addr) {
1626; CHECK-LABEL: ld1_x4_v8i16:
1627; CHECK:       // %bb.0:
1628; CHECK-NEXT:    ld1.8h { v0, v1, v2, v3 }, [x0]
1629; CHECK-NEXT:    ret
1630  %val = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr %addr)
1631  ret %struct.__neon_int16x8x4_t %val
1632}
1633
1634define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(ptr %addr) {
1635; CHECK-LABEL: ld1_x4_v4i32:
1636; CHECK:       // %bb.0:
1637; CHECK-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0]
1638; CHECK-NEXT:    ret
1639  %val = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr %addr)
1640  ret %struct.__neon_int32x4x4_t %val
1641}
1642
1643define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(ptr %addr) {
1644; CHECK-LABEL: ld1_x4_v4f32:
1645; CHECK:       // %bb.0:
1646; CHECK-NEXT:    ld1.4s { v0, v1, v2, v3 }, [x0]
1647; CHECK-NEXT:    ret
1648  %val = call %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr %addr)
1649  ret %struct.__neon_float32x4x4_t %val
1650}
1651
1652define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(ptr %addr) {
1653; CHECK-LABEL: ld1_x4_v2i64:
1654; CHECK:       // %bb.0:
1655; CHECK-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0]
1656; CHECK-NEXT:    ret
1657  %val = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %addr)
1658  ret %struct.__neon_int64x2x4_t %val
1659}
1660
1661define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(ptr %addr) {
1662; CHECK-LABEL: ld1_x4_v2f64:
1663; CHECK:       // %bb.0:
1664; CHECK-NEXT:    ld1.2d { v0, v1, v2, v3 }, [x0]
1665; CHECK-NEXT:    ret
1666  %val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %addr)
1667  ret %struct.__neon_float64x2x4_t %val
1668}
1669
1670define <8 x i8> @dup_ld1_from_stack(ptr %__ret) {
1671; CHECK-SD-LABEL: dup_ld1_from_stack:
1672; CHECK-SD:       // %bb.0: // %entry
1673; CHECK-SD-NEXT:    sub sp, sp, #16
1674; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
1675; CHECK-SD-NEXT:    add x8, sp, #15
1676; CHECK-SD-NEXT:    ld1r.8b { v0 }, [x8]
1677; CHECK-SD-NEXT:    add sp, sp, #16
1678; CHECK-SD-NEXT:    ret
1679;
1680; CHECK-GI-LABEL: dup_ld1_from_stack:
1681; CHECK-GI:       // %bb.0: // %entry
1682; CHECK-GI-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
1683; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
1684; CHECK-GI-NEXT:    .cfi_offset w29, -16
1685; CHECK-GI-NEXT:    add x8, sp, #15
1686; CHECK-GI-NEXT:    ld1r.8b { v0 }, [x8]
1687; CHECK-GI-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
1688; CHECK-GI-NEXT:    ret
1689entry:
1690  %item = alloca i8, align 1
1691  %0 = load i8, ptr %item, align 1
1692  %1 = insertelement <8 x i8> poison, i8 %0, i32 0
1693  %lane = shufflevector <8 x i8> %1, <8 x i8> %1, <8 x i32> zeroinitializer
1694  ret <8 x i8> %lane
1695}
1696